diff options
Diffstat (limited to 'src/usr/diag/prdf/common/plat')
75 files changed, 5811 insertions, 1323 deletions
diff --git a/src/usr/diag/prdf/common/plat/axone/axone_mc.rule b/src/usr/diag/prdf/common/plat/axone/axone_mc.rule index 4f63011fc..f23fee7d2 100644 --- a/src/usr/diag/prdf/common/plat/axone/axone_mc.rule +++ b/src/usr/diag/prdf/common/plat/axone/axone_mc.rule @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2018 +# Contributors Listed Below - COPYRIGHT 2018,2019 # [+] International Business Machines Corp. # # @@ -141,39 +141,39 @@ chip axone_mc }; ############################################################################ - # P9 MC target MCBISTFIR + # P9 MC target MCMISCFIR ############################################################################ - register MCBISTFIR + register MCMISCFIR { - name "P9 MC target MCBISTFIR"; + name "P9 MC target MCMISCFIR"; scomaddr 0x07012300; reset (&, 0x07012301); mask (|, 0x07012305); capture group default; }; - register MCBISTFIR_MASK + register MCMISCFIR_MASK { - name "P9 MC target MCBISTFIR MASK"; + name "P9 MC target MCMISCFIR MASK"; scomaddr 0x07012303; capture group default; }; - register MCBISTFIR_ACT0 + register MCMISCFIR_ACT0 { - name "P9 MC target MCBISTFIR ACT0"; + name "P9 MC target MCMISCFIR ACT0"; scomaddr 0x07012306; capture group default; - capture req nonzero("MCBISTFIR"); + capture req nonzero("MCMISCFIR"); }; - register MCBISTFIR_ACT1 + register MCMISCFIR_ACT1 { - name "P9 MC target MCBISTFIR ACT1"; + name "P9 MC target MCMISCFIR ACT1"; scomaddr 0x07012307; capture group default; - capture req nonzero("MCBISTFIR"); + capture req nonzero("MCMISCFIR"); }; # Include registers not defined by the xml @@ -253,9 +253,9 @@ group gMC_CHIPLET_FIR attntype CHECK_STOP, RECOVERABLE (rMC_CHIPLET_FIR, bit(11)) ? analyzeConnectedMCC3; /** MC_CHIPLET_FIR[12] - * Attention from MCBISTFIR + * Attention from MCMISCFIR */ - (rMC_CHIPLET_FIR, bit(12)) ? analyzeMCBISTFIR; + (rMC_CHIPLET_FIR, bit(12)) ? analyzeMCMISCFIR; /** MC_CHIPLET_FIR[13] * Attention from IOOMIFIR 0 @@ -358,9 +358,9 @@ group gMC_CHIPLET_UCS_FIR attntype UNIT_CS (rMC_CHIPLET_UCS_FIR, bit(8)) ? analyzeConnectedMCC3; /** MC_CHIPLET_UCS_FIR[9] - * Attention from MCBISTFIR + * Attention from MCMISCFIR */ - (rMC_CHIPLET_UCS_FIR, bit(9)) ? analyzeMCBISTFIR; + (rMC_CHIPLET_UCS_FIR, bit(9)) ? analyzeMCMISCFIR; /** MC_CHIPLET_UCS_FIR[10] * Attention from IOOMIFIR 0 @@ -448,9 +448,9 @@ group gMC_CHIPLET_HA_FIR attntype HOST_ATTN (rMC_CHIPLET_HA_FIR, bit(8)) ? analyzeConnectedMCC3; /** MC_CHIPLET_HA_FIR[9] - * Attention from MCBISTFIR + * Attention from MCMISCFIR */ - (rMC_CHIPLET_HA_FIR, bit(9)) ? analyzeMCBISTFIR; + (rMC_CHIPLET_HA_FIR, bit(9)) ? analyzeMCMISCFIR; }; @@ -563,94 +563,94 @@ group gMC_LFIR }; ################################################################################ -# P9 MC target MCBISTFIR +# P9 MC target MCMISCFIR ################################################################################ -rule rMCBISTFIR +rule rMCMISCFIR { CHECK_STOP: - MCBISTFIR & ~MCBISTFIR_MASK & ~MCBISTFIR_ACT0 & ~MCBISTFIR_ACT1; + MCMISCFIR & ~MCMISCFIR_MASK & ~MCMISCFIR_ACT0 & ~MCMISCFIR_ACT1; RECOVERABLE: - MCBISTFIR & ~MCBISTFIR_MASK & ~MCBISTFIR_ACT0 & MCBISTFIR_ACT1; + MCMISCFIR & ~MCMISCFIR_MASK & ~MCMISCFIR_ACT0 & MCMISCFIR_ACT1; HOST_ATTN: - MCBISTFIR & ~MCBISTFIR_MASK & MCBISTFIR_ACT0 & ~MCBISTFIR_ACT1; + MCMISCFIR & ~MCMISCFIR_MASK & MCMISCFIR_ACT0 & ~MCMISCFIR_ACT1; UNIT_CS: - MCBISTFIR & ~MCBISTFIR_MASK & MCBISTFIR_ACT0 & MCBISTFIR_ACT1; + MCMISCFIR & ~MCMISCFIR_MASK & MCMISCFIR_ACT0 & MCMISCFIR_ACT1; }; -group gMCBISTFIR +group gMCMISCFIR filter singlebit, cs_root_cause { - /** MCBISTFIR[0] + /** MCMISCFIR[0] * WAT debug bus attn */ - (rMCBISTFIR, bit(0)) ? defaultMaskedError; + (rMCMISCFIR, bit(0)) ? defaultMaskedError; - /** MCBISTFIR[1] + /** MCMISCFIR[1] * WAT debug register parity error */ - (rMCBISTFIR, bit(1)) ? defaultMaskedError; + (rMCMISCFIR, bit(1)) ? defaultMaskedError; - /** MCBISTFIR[2] + /** MCMISCFIR[2] * SCOM recoverable register parity error */ - (rMCBISTFIR, bit(2)) ? defaultMaskedError; + (rMCMISCFIR, bit(2)) ? self_th_1; - /** MCBISTFIR[3] + /** MCMISCFIR[3] * Spare */ - (rMCBISTFIR, bit(3)) ? defaultMaskedError; + (rMCMISCFIR, bit(3)) ? defaultMaskedError; - /** MCBISTFIR[4] + /** MCMISCFIR[4] * Chan 0A application interrupt */ - (rMCBISTFIR, bit(4)) ? defaultMaskedError; + (rMCMISCFIR, bit(4)) ? defaultMaskedError; - /** MCBISTFIR[5] + /** MCMISCFIR[5] * Chan 0B application interrupt */ - (rMCBISTFIR, bit(5)) ? defaultMaskedError; + (rMCMISCFIR, bit(5)) ? defaultMaskedError; - /** MCBISTFIR[6] + /** MCMISCFIR[6] * Chan 1A application interrupt */ - (rMCBISTFIR, bit(6)) ? defaultMaskedError; + (rMCMISCFIR, bit(6)) ? defaultMaskedError; - /** MCBISTFIR[7] + /** MCMISCFIR[7] * Chan 1B application interrupt */ - (rMCBISTFIR, bit(7)) ? defaultMaskedError; + (rMCMISCFIR, bit(7)) ? defaultMaskedError; - /** MCBISTFIR[8] + /** MCMISCFIR[8] * Chan 2A application interrupt */ - (rMCBISTFIR, bit(8)) ? defaultMaskedError; + (rMCMISCFIR, bit(8)) ? defaultMaskedError; - /** MCBISTFIR[9] + /** MCMISCFIR[9] * Chan 2B application interrupt */ - (rMCBISTFIR, bit(9)) ? defaultMaskedError; + (rMCMISCFIR, bit(9)) ? defaultMaskedError; - /** MCBISTFIR[10] + /** MCMISCFIR[10] * Chan 3A application interrupt */ - (rMCBISTFIR, bit(10)) ? defaultMaskedError; + (rMCMISCFIR, bit(10)) ? defaultMaskedError; - /** MCBISTFIR[11] + /** MCMISCFIR[11] * Chan 3B application interrupt */ - (rMCBISTFIR, bit(11)) ? defaultMaskedError; + (rMCMISCFIR, bit(11)) ? defaultMaskedError; - /** MCBISTFIR[12] + /** MCMISCFIR[12] * Internal SCOM error */ - (rMCBISTFIR, bit(12)) ? defaultMaskedError; + (rMCMISCFIR, bit(12)) ? defaultMaskedError; - /** MCBISTFIR[13] + /** MCMISCFIR[13] * Internal SCOM error clone */ - (rMCBISTFIR, bit(13)) ? defaultMaskedError; + (rMCMISCFIR, bit(13)) ? defaultMaskedError; }; diff --git a/src/usr/diag/prdf/common/plat/axone/axone_mc_actions.rule b/src/usr/diag/prdf/common/plat/axone/axone_mc_actions.rule index aab2297ef..7c639bf5e 100644 --- a/src/usr/diag/prdf/common/plat/axone/axone_mc_actions.rule +++ b/src/usr/diag/prdf/common/plat/axone/axone_mc_actions.rule @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2017,2018 +# Contributors Listed Below - COPYRIGHT 2017,2019 # [+] International Business Machines Corp. # # @@ -28,7 +28,7 @@ ############################################################################### actionclass analyzeMC_LFIR { analyze(gMC_LFIR); }; -actionclass analyzeMCBISTFIR { analyze(gMCBISTFIR); }; +actionclass analyzeMCMISCFIR { analyze(gMCMISCFIR); }; ############################################################################### # Analyze connected diff --git a/src/usr/diag/prdf/common/plat/axone/axone_mc_regs.rule b/src/usr/diag/prdf/common/plat/axone/axone_mc_regs.rule new file mode 100644 index 000000000..150e6895a --- /dev/null +++ b/src/usr/diag/prdf/common/plat/axone/axone_mc_regs.rule @@ -0,0 +1,47 @@ +# IBM_PROLOG_BEGIN_TAG +# This is an automatically generated prolog. +# +# $Source: src/usr/diag/prdf/common/plat/axone/axone_mc_regs.rule $ +# +# OpenPOWER HostBoot Project +# +# Contributors Listed Below - COPYRIGHT 2019 +# [+] International Business Machines Corp. +# +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# IBM_PROLOG_END_TAG + +################################################################################ +# Additional registers for MC target, not defined in XML +################################################################################ + + ############################################################################ + # PCB Slave Error Regs + ############################################################################ + + register MC_ERROR_REG + { + name "MC PCB Slave error reg"; + scomaddr 0x070F001F; + capture group PllFIRs; + }; + + register MC_CONFIG_REG + { + name "MC PCB Slave config reg"; + scomaddr 0x070F001E; + capture group PllFIRs; + }; + diff --git a/src/usr/diag/prdf/common/plat/axone/axone_mcc.rule b/src/usr/diag/prdf/common/plat/axone/axone_mcc.rule index bf632abbb..31f663c77 100644 --- a/src/usr/diag/prdf/common/plat/axone/axone_mcc.rule +++ b/src/usr/diag/prdf/common/plat/axone/axone_mcc.rule @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2018 +# Contributors Listed Below - COPYRIGHT 2018,2019 # [+] International Business Machines Corp. # # @@ -180,22 +180,22 @@ rule rDSTLFIR group gDSTLFIR filter singlebit, - cs_root_cause + cs_root_cause(0,4) { /** DSTLFIR[0] * AFU initiated Checkstop on Subchannel A */ - (rDSTLFIR, bit(0)) ? defaultMaskedError; + (rDSTLFIR, bit(0)) ? analyze_ocmb_chnl0_UERE; /** DSTLFIR[1] * AFU initiated Recoverable Attn on Subchannel A */ - (rDSTLFIR, bit(1)) ? defaultMaskedError; + (rDSTLFIR, bit(1)) ? analyze_ocmb_chnl0; /** DSTLFIR[2] * AFU initiated Special Attn on Subchannel A */ - (rDSTLFIR, bit(2)) ? defaultMaskedError; + (rDSTLFIR, bit(2)) ? analyze_ocmb_chnl0; /** DSTLFIR[3] * AFU initiated Application Interrupt Attn on Subchannel A @@ -205,17 +205,17 @@ group gDSTLFIR /** DSTLFIR[4] * AFU initiated Checkstop on Subchannel B */ - (rDSTLFIR, bit(4)) ? defaultMaskedError; + (rDSTLFIR, bit(4)) ? analyze_ocmb_chnl1_UERE; /** DSTLFIR[5] * AFU initiated Recoverable Attn on Subchannel B */ - (rDSTLFIR, bit(5)) ? defaultMaskedError; + (rDSTLFIR, bit(5)) ? analyze_ocmb_chnl1; /** DSTLFIR[6] * AFU initiated Special Attn on Subchannel B */ - (rDSTLFIR, bit(6)) ? defaultMaskedError; + (rDSTLFIR, bit(6)) ? analyze_ocmb_chnl1; /** DSTLFIR[7] * AFU initiated Application Interrupt Attn on Subchannel B @@ -225,52 +225,52 @@ group gDSTLFIR /** DSTLFIR[8] * Async crossing parity error */ - (rDSTLFIR, bit(8)) ? defaultMaskedError; + (rDSTLFIR, bit(8)) ? self_th_1; /** DSTLFIR[9] * Async crossing sequence error */ - (rDSTLFIR, bit(9)) ? defaultMaskedError; + (rDSTLFIR, bit(9)) ? self_th_1; /** DSTLFIR[10] * Config reg recoverable parity error */ - (rDSTLFIR, bit(10)) ? defaultMaskedError; + (rDSTLFIR, bit(10)) ? self_th_1; /** DSTLFIR[11] * Config reg fatal parity error */ - (rDSTLFIR, bit(11)) ? defaultMaskedError; + (rDSTLFIR, bit(11)) ? self_th_1; /** DSTLFIR[12] * Subchannel A counter error */ - (rDSTLFIR, bit(12)) ? defaultMaskedError; + (rDSTLFIR, bit(12)) ? chnl0_omi_bus_th_1; /** DSTLFIR[13] * Subchannel B counter error */ - (rDSTLFIR, bit(13)) ? defaultMaskedError; + (rDSTLFIR, bit(13)) ? chnl1_omi_bus_th_1; /** DSTLFIR[14] * Subchannel A timeout error */ - (rDSTLFIR, bit(14)) ? defaultMaskedError; + (rDSTLFIR, bit(14)) ? chnl0_omi_bus_th_32_perDay; /** DSTLFIR[15] * Subchannel B timeout error */ - (rDSTLFIR, bit(15)) ? defaultMaskedError; + (rDSTLFIR, bit(15)) ? chnl1_omi_bus_th_32_perDay; /** DSTLFIR[16] * Subchannel A buffer overuse error */ - (rDSTLFIR, bit(16)) ? defaultMaskedError; + (rDSTLFIR, bit(16)) ? chnl0_ocmb_th_1; /** DSTLFIR[17] * Subchannel B buffer overuse error */ - (rDSTLFIR, bit(17)) ? defaultMaskedError; + (rDSTLFIR, bit(17)) ? chnl1_ocmb_th_1; /** DSTLFIR[18] * Subchannel A DL link down @@ -293,14 +293,29 @@ group gDSTLFIR (rDSTLFIR, bit(21)) ? defaultMaskedError; /** DSTLFIR[22] - * Internal SCOM error + * DSTLFIR channel timeout on subch A */ - (rDSTLFIR, bit(22)) ? defaultMaskedError; + (rDSTLFIR, bit(22)) ? chnl0_omi_bus_th_1; /** DSTLFIR[23] - * Internal SCOM error clone + * DSTLFIR channel timeout on subch B + */ + (rDSTLFIR, bit(23)) ? chnl1_omi_bus_th_1; + + /** DSTLFIR[24:25] + * spare + */ + (rDSTLFIR, bit(24|25)) ? defaultMaskedError; + + /** DSTLFIR[26] + * Internal SCOM Error + */ + (rDSTLFIR, bit(26)) ? defaultMaskedError; + + /** DSTLFIR[27] + * Internal SCOM Error Clone */ - (rDSTLFIR, bit(23)) ? defaultMaskedError; + (rDSTLFIR, bit(27)) ? defaultMaskedError; }; @@ -327,22 +342,22 @@ group gUSTLFIR /** USTLFIR[0] * Chan A unexpected data error */ - (rUSTLFIR, bit(0)) ? defaultMaskedError; + (rUSTLFIR, bit(0)) ? chnl0_ocmb_th_1; /** USTLFIR[1] * Chan B unexpected data error */ - (rUSTLFIR, bit(1)) ? defaultMaskedError; + (rUSTLFIR, bit(1)) ? chnl1_ocmb_th_1; /** USTLFIR[2] * Chan A invalid template error */ - (rUSTLFIR, bit(2)) ? defaultMaskedError; + (rUSTLFIR, bit(2)) ? chnl0_ocmb_th_1; /** USTLFIR[3] * Chan B invalid template error */ - (rUSTLFIR, bit(3)) ? defaultMaskedError; + (rUSTLFIR, bit(3)) ? chnl1_ocmb_th_1; /** USTLFIR[4] * Chan A half speed mode @@ -357,12 +372,12 @@ group gUSTLFIR /** USTLFIR[6] * WDF buffer CE */ - (rUSTLFIR, bit(6)) ? defaultMaskedError; + (rUSTLFIR, bit(6)) ? self_th_32perDay; /** USTLFIR[7] * WDF buffer UE */ - (rUSTLFIR, bit(7)) ? defaultMaskedError; + (rUSTLFIR, bit(7)) ? self_th_1; /** USTLFIR[8] * WDF buffer SUE @@ -372,32 +387,32 @@ group gUSTLFIR /** USTLFIR[9] * WDF buffer overrun */ - (rUSTLFIR, bit(9)) ? defaultMaskedError; + (rUSTLFIR, bit(9)) ? self_th_1; /** USTLFIR[10] * WDF tag parity error */ - (rUSTLFIR, bit(10)) ? defaultMaskedError; + (rUSTLFIR, bit(10)) ? self_th_1; /** USTLFIR[11] * WDF scom sequencer error */ - (rUSTLFIR, bit(11)) ? defaultMaskedError; + (rUSTLFIR, bit(11)) ? self_th_1; /** USTLFIR[12] * WDF pwctl sequencer error */ - (rUSTLFIR, bit(12)) ? defaultMaskedError; + (rUSTLFIR, bit(12)) ? self_th_1; /** USTLFIR[13] * WDF misc_reg parity error */ - (rUSTLFIR, bit(13)) ? defaultMaskedError; + (rUSTLFIR, bit(13)) ? self_th_1; /** USTLFIR[14] * WDF MCA async error */ - (rUSTLFIR, bit(14)) ? defaultMaskedError; + (rUSTLFIR, bit(14)) ? self_th_1; /** USTLFIR[15] * WDF Data Syndrome NE0 @@ -407,32 +422,32 @@ group gUSTLFIR /** USTLFIR[16] * WDF CMT parity error */ - (rUSTLFIR, bit(16)) ? defaultMaskedError; + (rUSTLFIR, bit(16)) ? self_th_1; /** USTLFIR[17] - * TBD + * spare */ (rUSTLFIR, bit(17)) ? defaultMaskedError; /** USTLFIR[18] - * TBD + * spare */ (rUSTLFIR, bit(18)) ? defaultMaskedError; /** USTLFIR[19] - * TBD + * Read Buffers overflowed/underflowed */ - (rUSTLFIR, bit(19)) ? defaultMaskedError; + (rUSTLFIR, bit(19)) ? all_ocmb_and_mcc_th_1; /** USTLFIR[20] * WRT Buffer CE */ - (rUSTLFIR, bit(20)) ? defaultMaskedError; + (rUSTLFIR, bit(20)) ? parent_proc_th_32perDay; /** USTLFIR[21] * WRT Buffer UE */ - (rUSTLFIR, bit(21)) ? defaultMaskedError; + (rUSTLFIR, bit(21)) ? parent_proc_th_1; /** USTLFIR[22] * WRT Buffer SUE @@ -442,12 +457,12 @@ group gUSTLFIR /** USTLFIR[23] * WRT scom sequencer error */ - (rUSTLFIR, bit(23)) ? defaultMaskedError; + (rUSTLFIR, bit(23)) ? self_th_1; /** USTLFIR[24] * WRT misc_reg parity error */ - (rUSTLFIR, bit(24)) ? defaultMaskedError; + (rUSTLFIR, bit(24)) ? self_th_1; /** USTLFIR[25:26] * WRT error information spares @@ -457,22 +472,22 @@ group gUSTLFIR /** USTLFIR[27] * Chan A fail response checkstop */ - (rUSTLFIR, bit(27)) ? defaultMaskedError; + (rUSTLFIR, bit(27)) ? chnl0_ocmb_th_1; /** USTLFIR[28] * Chan B fail response checkstop */ - (rUSTLFIR, bit(28)) ? defaultMaskedError; + (rUSTLFIR, bit(28)) ? chnl1_ocmb_th_1; /** USTLFIR[29] * Chan A fail response recoverable */ - (rUSTLFIR, bit(29)) ? defaultMaskedError; + (rUSTLFIR, bit(29)) ? threshold_and_mask_chnl0_ocmb_th_1; /** USTLFIR[30] * Chan B fail response recoverable */ - (rUSTLFIR, bit(30)) ? defaultMaskedError; + (rUSTLFIR, bit(30)) ? threshold_and_mask_chnl1_ocmb_th_1; /** USTLFIR[31] * Chan A lol drop checkstop @@ -487,72 +502,72 @@ group gUSTLFIR /** USTLFIR[33] * Chan A lol drop recoverable */ - (rUSTLFIR, bit(33)) ? defaultMaskedError; + (rUSTLFIR, bit(33)) ? chnl0_ocmb_H_omi_L_th_1; /** USTLFIR[34] * Chan B lol drop recoverable */ - (rUSTLFIR, bit(34)) ? defaultMaskedError; + (rUSTLFIR, bit(34)) ? chnl1_ocmb_H_omi_L_th_1; /** USTLFIR[35] * Chan A flit parity error */ - (rUSTLFIR, bit(35)) ? defaultMaskedError; + (rUSTLFIR, bit(35)) ? chnl0_omi_th_1; /** USTLFIR[36] * Chan B flit parity error */ - (rUSTLFIR, bit(36)) ? defaultMaskedError; + (rUSTLFIR, bit(36)) ? chnl1_omi_th_1; /** USTLFIR[37] * Chan A fatal parity error */ - (rUSTLFIR, bit(37)) ? defaultMaskedError; + (rUSTLFIR, bit(37)) ? chnl0_omi_th_1; /** USTLFIR[38] * Chan B fatal parity error */ - (rUSTLFIR, bit(38)) ? defaultMaskedError; + (rUSTLFIR, bit(38)) ? chnl1_omi_th_1; /** USTLFIR[39] * Chan A more than 2 data flits for template 9 */ - (rUSTLFIR, bit(39)) ? defaultMaskedError; + (rUSTLFIR, bit(39)) ? chnl0_ocmb_th_1; /** USTLFIR[40] * Chan B more than 2 data flits for template 9 */ - (rUSTLFIR, bit(40)) ? defaultMaskedError; + (rUSTLFIR, bit(40)) ? chnl1_ocmb_th_1; /** USTLFIR[41] * Chan A excess bad data bits */ - (rUSTLFIR, bit(41)) ? defaultMaskedError; + (rUSTLFIR, bit(41)) ? chnl0_ocmb_th_1; /** USTLFIR[42] * Chan B excess bad data bits */ - (rUSTLFIR, bit(42)) ? defaultMaskedError; + (rUSTLFIR, bit(42)) ? chnl1_ocmb_th_1; /** USTLFIR[43] * Chan A memory read data returned in template 0 */ - (rUSTLFIR, bit(43)) ? defaultMaskedError; + (rUSTLFIR, bit(43)) ? chnl0_ocmb_th_1; /** USTLFIR[44] * Chan B memory read data returned in template 0 */ - (rUSTLFIR, bit(44)) ? defaultMaskedError; + (rUSTLFIR, bit(44)) ? chnl1_ocmb_th_1; /** USTLFIR[45] * Chan A MMIO in lol mode */ - (rUSTLFIR, bit(45)) ? defaultMaskedError; + (rUSTLFIR, bit(45)) ? chnl0_omi_th_1; /** USTLFIR[46] * Chan B MMIO in lol mode */ - (rUSTLFIR, bit(46)) ? defaultMaskedError; + (rUSTLFIR, bit(46)) ? chnl1_omi_th_1; /** USTLFIR[47] * Chan A bad data @@ -567,62 +582,62 @@ group gUSTLFIR /** USTLFIR[49] * Chan A excess data error */ - (rUSTLFIR, bit(49)) ? defaultMaskedError; + (rUSTLFIR, bit(49)) ? chnl0_ocmb_th_1; /** USTLFIR[50] * Chan B excess data error */ - (rUSTLFIR, bit(50)) ? defaultMaskedError; + (rUSTLFIR, bit(50)) ? chnl1_ocmb_th_1; /** USTLFIR[51] * Chan A Bad CRC data not valid error */ - (rUSTLFIR, bit(51)) ? defaultMaskedError; + (rUSTLFIR, bit(51)) ? chnl0_omi_th_1; /** USTLFIR[52] * Chan B Bad CRC data not valid error */ - (rUSTLFIR, bit(52)) ? defaultMaskedError; + (rUSTLFIR, bit(52)) ? chnl1_omi_th_1; /** USTLFIR[53] * Chan A FIFO overflow error */ - (rUSTLFIR, bit(53)) ? defaultMaskedError; + (rUSTLFIR, bit(53)) ? chnl0_omi_th_1; /** USTLFIR[54] * Chan B FIFO overflow error */ - (rUSTLFIR, bit(54)) ? defaultMaskedError; + (rUSTLFIR, bit(54)) ? chnl1_omi_th_1; /** USTLFIR[55] * Chan A invalid cmd error */ - (rUSTLFIR, bit(55)) ? defaultMaskedError; + (rUSTLFIR, bit(55)) ? chnl0_ocmb_th_1; /** USTLFIR[56] * Chan B invalid cmd error */ - (rUSTLFIR, bit(56)) ? defaultMaskedError; + (rUSTLFIR, bit(56)) ? chnl1_ocmb_th_1; /** USTLFIR[57] * Fatal reg parity error */ - (rUSTLFIR, bit(57)) ? defaultMaskedError; + (rUSTLFIR, bit(57)) ? self_th_1; /** USTLFIR[58] * Recoverable reg parity error */ - (rUSTLFIR, bit(58)) ? defaultMaskedError; + (rUSTLFIR, bit(58)) ? self_th_1; /** USTLFIR[59] * Chan A invalid DL DP combo */ - (rUSTLFIR, bit(59)) ? defaultMaskedError; + (rUSTLFIR, bit(59)) ? chnl0_ocmb_th_1; /** USTLFIR[60] * Chan B invalid DL DP combo */ - (rUSTLFIR, bit(60)) ? defaultMaskedError; + (rUSTLFIR, bit(60)) ? chnl1_ocmb_th_1; /** USTLFIR[61] * spare diff --git a/src/usr/diag/prdf/common/plat/axone/axone_mcc_actions.rule b/src/usr/diag/prdf/common/plat/axone/axone_mcc_actions.rule index 38edbaaea..e34035165 100644 --- a/src/usr/diag/prdf/common/plat/axone/axone_mcc_actions.rule +++ b/src/usr/diag/prdf/common/plat/axone/axone_mcc_actions.rule @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2018 +# Contributors Listed Below - COPYRIGHT 2018,2019 # [+] International Business Machines Corp. # # @@ -24,9 +24,163 @@ # IBM_PROLOG_END_TAG ################################################################################ +# Callouts +################################################################################ + +actionclass chnl0_omi +{ + callout(connected(TYPE_OMI,0), MRU_MED); +}; + +actionclass chnl1_omi +{ + callout(connected(TYPE_OMI,1), MRU_MED); +}; + +actionclass chnl0_omi_L +{ + callout(connected(TYPE_OMI,0), MRU_LOW); +}; + +actionclass chnl1_omi_L +{ + callout(connected(TYPE_OMI,1), MRU_LOW); +}; + +actionclass chnl0_ocmb +{ + callout(connected(TYPE_OCMB_CHIP,0), MRU_MED); +}; + +actionclass chnl1_ocmb +{ + callout(connected(TYPE_OCMB_CHIP,1), MRU_MED); +}; + +actionclass chnl0_omi_bus +{ + funccall("omiParentCalloutBusInterfacePlugin_0"); +}; + +actionclass chnl1_omi_bus +{ + funccall("omiParentCalloutBusInterfacePlugin_1"); +}; + +actionclass chnl0_omi_bus_th_1 +{ + chnl0_omi_bus; + threshold1; +}; + +actionclass chnl1_omi_bus_th_1 +{ + chnl1_omi_bus; + threshold1; +}; + +actionclass chnl0_omi_bus_th_32_perDay +{ + chnl0_omi_bus; + threshold32pday; +}; + +actionclass chnl1_omi_bus_th_32_perDay +{ + chnl1_omi_bus; + threshold32pday; +}; + +actionclass chnl0_omi_th_1 +{ + chnl0_omi; + threshold1; +}; + +actionclass chnl1_omi_th_1 +{ + chnl1_omi; + threshold1; +}; + +actionclass chnl0_ocmb_th_1 +{ + chnl0_ocmb; + threshold1; +}; + +actionclass chnl1_ocmb_th_1 +{ + chnl1_ocmb; + threshold1; +}; + +actionclass all_ocmb_and_mcc_th_1 +{ + chnl0_ocmb; + chnl1_ocmb; + calloutSelfMed; + threshold1; +}; + +actionclass chnl0_ocmb_H_omi_L_th_1 +{ + chnl0_ocmb; + chnl0_omi_L; + threshold1; +}; + +actionclass chnl1_ocmb_H_omi_L_th_1 +{ + chnl1_ocmb; + chnl1_omi_L; + threshold1; +}; + +actionclass threshold_and_mask_chnl0_ocmb_th_1 +{ + threshold_and_mask; + chnl0_ocmb; + threshold1; +}; + +actionclass threshold_and_mask_chnl1_ocmb_th_1 +{ + threshold_and_mask; + chnl1_ocmb; + threshold1; +}; + +################################################################################ # Analyze groups ################################################################################ actionclass analyzeDSTLFIR { analyze(gDSTLFIR); }; actionclass analyzeUSTLFIR { analyze(gUSTLFIR); }; +################################################################################ +# Analyze connected +################################################################################ + +actionclass analyze_ocmb_chnl0 +{ + try( funccall("checkOcmb_0"), analyze(connected(TYPE_OCMB_CHIP, 0)) ); +}; + +actionclass analyze_ocmb_chnl1 +{ + try( funccall("checkOcmb_1"), analyze(connected(TYPE_OCMB_CHIP, 1)) ); +}; + +actionclass analyze_ocmb_chnl0_UERE +{ + SueSource; + analyze_ocmb_chnl0; +}; + +actionclass analyze_ocmb_chnl1_UERE +{ + SueSource; + analyze_ocmb_chnl1; +}; + diff --git a/src/usr/diag/prdf/common/plat/axone/axone_mcc_regs.rule b/src/usr/diag/prdf/common/plat/axone/axone_mcc_regs.rule new file mode 100644 index 000000000..001a54e5c --- /dev/null +++ b/src/usr/diag/prdf/common/plat/axone/axone_mcc_regs.rule @@ -0,0 +1,80 @@ +# IBM_PROLOG_BEGIN_TAG +# This is an automatically generated prolog. +# +# $Source: src/usr/diag/prdf/common/plat/axone/axone_mcc_regs.rule $ +# +# OpenPOWER HostBoot Project +# +# Contributors Listed Below - COPYRIGHT 2019 +# [+] International Business Machines Corp. +# +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# IBM_PROLOG_END_TAG + + +############################################################################### +# Additional registers for mcc, not defined in XML +############################################################################### + + ########################################################################### + # P9 Axone target Channel Fail Config registers + ########################################################################### + + register DSTLCFG2 + { + name "P9 Axone DSTL Error Injection Register"; + scomaddr 0x0701090E; + capture group default; + }; + + register USTLFAILMASK + { + name "P9 Axone USTL Fail Response Channel Fail Mask"; + scomaddr 0x07010A13; + capture group default; + }; + + ########################################################################### + # P9 Axone target DSTLFIR + ########################################################################### + + register DSTLFIR_AND + { + name "P9 MCC target DSTLFIR atomic AND"; + scomaddr 0x07010901; + capture group never; + access write_only; + }; + + register DSTLFIR_MASK_OR + { + name "P9 MCC target DSTLFIR MASK atomic OR"; + scomaddr 0x07010905; + capture group never; + access write_only; + }; + + ########################################################################### + # P9 Axone target USTLFIR + ########################################################################### + + register USTLFIR_MASK_OR + { + name "P9 MCC target USTLFIR MASK atomic OR"; + scomaddr 0x07010A05; + capture group never; + access write_only; + }; + diff --git a/src/usr/diag/prdf/common/plat/axone/axone_mi.rule b/src/usr/diag/prdf/common/plat/axone/axone_mi.rule index 078163819..56366a7f5 100644 --- a/src/usr/diag/prdf/common/plat/axone/axone_mi.rule +++ b/src/usr/diag/prdf/common/plat/axone/axone_mi.rule @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2018 +# Contributors Listed Below - COPYRIGHT 2018,2019 # [+] International Business Machines Corp. # # @@ -148,27 +148,27 @@ group gMCFIR /** MCFIR[0] * MC internal recoverable error */ - (rMCFIR, bit(0)) ? defaultMaskedError; + (rMCFIR, bit(0)) ? self_th_1; /** MCFIR[1] * MC internal non recoverable error */ - (rMCFIR, bit(1)) ? defaultMaskedError; + (rMCFIR, bit(1)) ? parent_proc_th_1; /** MCFIR[2] * Powerbus protocol error */ - (rMCFIR, bit(2)) ? defaultMaskedError; + (rMCFIR, bit(2)) ? level2_th_1; /** MCFIR[3] * Inband bar hit with incorrect ttype */ - (rMCFIR, bit(3)) ? defaultMaskedError; + (rMCFIR, bit(3)) ? level2_M_self_L_th_1; /** MCFIR[4] * Multiple bar */ - (rMCFIR, bit(4)) ? defaultMaskedError; + (rMCFIR, bit(4)) ? self_th_1; /** MCFIR[5] * PB write ECC syndrome NE0 @@ -183,7 +183,7 @@ group gMCFIR /** MCFIR[8] * Command list timeout */ - (rMCFIR, bit(8)) ? defaultMaskedError; + (rMCFIR, bit(8)) ? threshold_and_mask_level2; /** MCFIR[9:10] * reserved diff --git a/src/usr/diag/prdf/common/plat/axone/axone_mi_regs.rule b/src/usr/diag/prdf/common/plat/axone/axone_mi_regs.rule index 0e47e05a5..d9441d719 100644 --- a/src/usr/diag/prdf/common/plat/axone/axone_mi_regs.rule +++ b/src/usr/diag/prdf/common/plat/axone/axone_mi_regs.rule @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2018 +# Contributors Listed Below - COPYRIGHT 2018,2019 # [+] International Business Machines Corp. # # @@ -48,34 +48,66 @@ capture group default; }; - register MCFGP + register MCFGP0 { - name "MCFGP"; - scomaddr 0x501080A; + name "MCFGP0"; + scomaddr 0x0501080A; capture group default; capture group MirrorConfig; }; - register MCFGPA + register MCFGP1 { - name "MCFGPA"; + name "MCFGP1"; scomaddr 0x0501080B; capture group default; capture group MirrorConfig; }; - register MCFGPM + register MCFGP0A { - name "MCFGPM"; - scomaddr 0x501080C; + name "MCFGP0A"; + scomaddr 0x0501080E; capture group default; capture group MirrorConfig; }; - register MCFGPMA + register MCFGP1A { - name "MCFGPMA"; - scomaddr 0x0501080D; + name "MCFGP1A"; + scomaddr 0x0501080F; + capture group default; + capture group MirrorConfig; + }; + + register MCFGPM0 + { + name "MCFGPM0"; + scomaddr 0x5010820; + capture group default; + capture group MirrorConfig; + }; + + register MCFGPM0A + { + name "MCFGPM0A"; + scomaddr 0x05010821; + capture group default; + capture group MirrorConfig; + }; + + register MCFGPM1 + { + name "MCFGPM1"; + scomaddr 0x5010830; + capture group default; + capture group MirrorConfig; + }; + + register MCFGPM1A + { + name "MCFGPM1A"; + scomaddr 0x05010831; capture group default; capture group MirrorConfig; }; diff --git a/src/usr/diag/prdf/common/plat/axone/axone_npu.rule b/src/usr/diag/prdf/common/plat/axone/axone_npu.rule index ede5ef5cc..49c71d74a 100644 --- a/src/usr/diag/prdf/common/plat/axone/axone_npu.rule +++ b/src/usr/diag/prdf/common/plat/axone/axone_npu.rule @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2018 +# Contributors Listed Below - COPYRIGHT 2018,2019 # [+] International Business Machines Corp. # # @@ -214,7 +214,7 @@ rule rNPU0FIR group gNPU0FIR filter singlebit, - cs_root_cause(1,2,3,4,5,6,7,9,10,16,18,29,31,42,44) + cs_root_cause(1,2,3,4,5,6,7,9,10,16,18,19,25,29,31,40,42,44,45) { /** NPU0FIR[0] * NTL array CE @@ -354,7 +354,7 @@ group gNPU0FIR /** NPU0FIR[27] * Invalid access to secure memory attempted */ - (rNPU0FIR, bit(27)) ? defaultMaskedError; + (rNPU0FIR, bit(27)) ? self_th_1; /** NPU0FIR[28] * spare @@ -489,12 +489,12 @@ rule rNPU1FIR group gNPU1FIR filter singlebit, - cs_root_cause + cs_root_cause(0,2,4,6,8,10,13,14,15,20,22,25,27,29,31,32,33,34,35,37,39,40,41,42,47,49,51,53,55,57) { /** NPU1FIR[0] * NDL Brick0 stall */ - (rNPU1FIR, bit(0)) ? defaultMaskedError; + (rNPU1FIR, bit(0)) ? self_th_1; /** NPU1FIR[1] * NDL Brick0 nostall @@ -504,7 +504,7 @@ group gNPU1FIR /** NPU1FIR[2] * NDL Brick1 stall */ - (rNPU1FIR, bit(2)) ? defaultMaskedError; + (rNPU1FIR, bit(2)) ? self_th_1; /** NPU1FIR[3] * NDL Brick1 nostall @@ -514,7 +514,7 @@ group gNPU1FIR /** NPU1FIR[4] * NDL Brick2 stall */ - (rNPU1FIR, bit(4)) ? defaultMaskedError; + (rNPU1FIR, bit(4)) ? self_th_1; /** NPU1FIR[5] * NDL Brick2 nostall @@ -524,7 +524,7 @@ group gNPU1FIR /** NPU1FIR[6] * NDL Brick3 stall */ - (rNPU1FIR, bit(6)) ? defaultMaskedError; + (rNPU1FIR, bit(6)) ? self_th_1; /** NPU1FIR[7] * NDL Brick3 nostall @@ -534,7 +534,7 @@ group gNPU1FIR /** NPU1FIR[8] * NDL Brick4 stall */ - (rNPU1FIR, bit(8)) ? defaultMaskedError; + (rNPU1FIR, bit(8)) ? self_th_1; /** NPU1FIR[9] * NDL Brick4 nostall @@ -544,7 +544,7 @@ group gNPU1FIR /** NPU1FIR[10] * NDL Brick5 stall */ - (rNPU1FIR, bit(10)) ? defaultMaskedError; + (rNPU1FIR, bit(10)) ? self_th_1; /** NPU1FIR[11] * NDL Brick5 nostall @@ -554,22 +554,22 @@ group gNPU1FIR /** NPU1FIR[12] * MISC Register ring error (ie noack) */ - (rNPU1FIR, bit(12)) ? defaultMaskedError; + (rNPU1FIR, bit(12)) ? self_th_32perDay; /** NPU1FIR[13] - * MISC Parity error from ibr addr regi + * MISC Parity error on MISC Cntrl reg */ - (rNPU1FIR, bit(13)) ? defaultMaskedError; + (rNPU1FIR, bit(13)) ? self_th_1; /** NPU1FIR[14] * MISC Parity error on SCOM D/A addr reg */ - (rNPU1FIR, bit(14)) ? defaultMaskedError; + (rNPU1FIR, bit(14)) ? self_th_1; /** NPU1FIR[15] * MISC Parity error on MISC Cntrl reg */ - (rNPU1FIR, bit(15)) ? defaultMaskedError; + (rNPU1FIR, bit(15)) ? self_th_1; /** NPU1FIR[16] * Reserved @@ -594,7 +594,7 @@ group gNPU1FIR /** NPU1FIR[20] * ATS Effective Address hit multiple TCE */ - (rNPU1FIR, bit(20)) ? defaultMaskedError; + (rNPU1FIR, bit(20)) ? self_th_1; /** NPU1FIR[21] * ATS TCE Page access error @@ -604,72 +604,72 @@ group gNPU1FIR /** NPU1FIR[22] * ATS Timeout on TCE tree walk */ - (rNPU1FIR, bit(22)) ? defaultMaskedError; + (rNPU1FIR, bit(22)) ? self_th_1; /** NPU1FIR[23] * ATS Parity error on TCE cache dir array */ - (rNPU1FIR, bit(23)) ? defaultMaskedError; + (rNPU1FIR, bit(23)) ? self_th_32perDay; /** NPU1FIR[24] * ATS Parity error on TCE cache data array */ - (rNPU1FIR, bit(24)) ? defaultMaskedError; + (rNPU1FIR, bit(24)) ? self_th_32perDay; /** NPU1FIR[25] * ATS ECC UE on Effective Address array */ - (rNPU1FIR, bit(25)) ? defaultMaskedError; + (rNPU1FIR, bit(25)) ? self_th_1; /** NPU1FIR[26] * ATS ECC CE on Effective Address array */ - (rNPU1FIR, bit(26)) ? defaultMaskedError; + (rNPU1FIR, bit(26)) ? self_th_32perDay; /** NPU1FIR[27] * ATS ECC UE on TDRmem array */ - (rNPU1FIR, bit(27)) ? defaultMaskedError; + (rNPU1FIR, bit(27)) ? self_th_1; /** NPU1FIR[28] * ATS ECC CE on TDRmem array */ - (rNPU1FIR, bit(28)) ? defaultMaskedError; + (rNPU1FIR, bit(28)) ? self_th_32perDay; /** NPU1FIR[29] * ATS ECC UE on CQ CTL DMA Read */ - (rNPU1FIR, bit(29)) ? defaultMaskedError; + (rNPU1FIR, bit(29)) ? self_th_1; /** NPU1FIR[30] * ATS ECC CE on CQ CTL DMA Read */ - (rNPU1FIR, bit(30)) ? defaultMaskedError; + (rNPU1FIR, bit(30)) ? self_th_32perDay; /** NPU1FIR[31] * ATS Parity error on TVT entry */ - (rNPU1FIR, bit(31)) ? defaultMaskedError; + (rNPU1FIR, bit(31)) ? self_th_1; /** NPU1FIR[32] * ATS Parity err on IODA Address Reg */ - (rNPU1FIR, bit(32)) ? defaultMaskedError; + (rNPU1FIR, bit(32)) ? self_th_1; /** NPU1FIR[33] * ATS Parity error on ATS Control Register */ - (rNPU1FIR, bit(33)) ? defaultMaskedError; + (rNPU1FIR, bit(33)) ? self_th_1; /** NPU1FIR[34] - * ATS Parity error on ATS Timeout Control Register + * ATS Parity error on ATS reg */ - (rNPU1FIR, bit(34)) ? defaultMaskedError; + (rNPU1FIR, bit(34)) ? self_th_1; /** NPU1FIR[35] * ATS Invalid IODA Table Select entry */ - (rNPU1FIR, bit(35)) ? defaultMaskedError; + (rNPU1FIR, bit(35)) ? self_th_1; /** NPU1FIR[36] * Reserved @@ -679,7 +679,7 @@ group gNPU1FIR /** NPU1FIR[37] * Kill xlate epoch timeout */ - (rNPU1FIR, bit(37)) ? defaultMaskedError; + (rNPU1FIR, bit(37)) ? self_th_1; /** NPU1FIR[38] * PEE secure SMF not secure @@ -689,17 +689,32 @@ group gNPU1FIR /** NPU1FIR[39] * XSL in suspend mode when OTL sends cmd */ - (rNPU1FIR, bit(39)) ? defaultMaskedError; + (rNPU1FIR, bit(39)) ? self_th_1; + + /** NPU1FIR[40] + * Unsupported page size + */ + (rNPU1FIR, bit(40)) ? self_th_1; + + /** NPU1FIR[41] + * Unexpected XLATE release + */ + (rNPU1FIR, bit(41)) ? self_th_1; + + /** NPU1FIR[42] + * Kill XLATE done fail + */ + (rNPU1FIR, bit(42)) ? self_th_1; - /** NPU1FIR[40:46] + /** NPU1FIR[43:46] * Reserved */ - (rNPU1FIR, bit(40|41|42|43|44|45|46)) ? defaultMaskedError; + (rNPU1FIR, bit(43|44|45|46)) ? defaultMaskedError; /** NPU1FIR[47] * NDL Brick6 stall */ - (rNPU1FIR, bit(47)) ? defaultMaskedError; + (rNPU1FIR, bit(47)) ? self_th_1; /** NPU1FIR[48] * NDL Brick6 nostall @@ -709,7 +724,7 @@ group gNPU1FIR /** NPU1FIR[49] * NDL Brick7 stall */ - (rNPU1FIR, bit(49)) ? defaultMaskedError; + (rNPU1FIR, bit(49)) ? self_th_1; /** NPU1FIR[50] * NDL Brick7 nostall @@ -719,7 +734,7 @@ group gNPU1FIR /** NPU1FIR[51] * NDL Brick8 stall */ - (rNPU1FIR, bit(51)) ? defaultMaskedError; + (rNPU1FIR, bit(51)) ? self_th_1; /** NPU1FIR[52] * NDL Brick8 nostall @@ -729,7 +744,7 @@ group gNPU1FIR /** NPU1FIR[53] * NDL Brick9 stall */ - (rNPU1FIR, bit(53)) ? defaultMaskedError; + (rNPU1FIR, bit(53)) ? self_th_1; /** NPU1FIR[54] * NDL Brick9 nostall @@ -739,7 +754,7 @@ group gNPU1FIR /** NPU1FIR[55] * NDL Brick10 stall */ - (rNPU1FIR, bit(55)) ? defaultMaskedError; + (rNPU1FIR, bit(55)) ? self_th_1; /** NPU1FIR[56] * NDL Brick10 nostall @@ -749,7 +764,7 @@ group gNPU1FIR /** NPU1FIR[57] * NDL Brick11 stall */ - (rNPU1FIR, bit(57)) ? defaultMaskedError; + (rNPU1FIR, bit(57)) ? self_th_1; /** NPU1FIR[58] * NDL Brick11 nostall @@ -762,22 +777,22 @@ group gNPU1FIR (rNPU1FIR, bit(59)) ? defaultMaskedError; /** NPU1FIR[60] - * MISC SCOM ring 0 sat 0 signaled internal FSM err + * Misc SCOM ring 0 sat 0 signalled internal FSM error */ (rNPU1FIR, bit(60)) ? defaultMaskedError; /** NPU1FIR[61] - * MISC SCOM ring 0 sat 1 signaled internal FSM err + * Misc SCOM ring 0 sat 1 signalled internal FSM error */ (rNPU1FIR, bit(61)) ? defaultMaskedError; /** NPU1FIR[62] - * Scom Error + * scom error */ (rNPU1FIR, bit(62)) ? defaultMaskedError; /** NPU1FIR[63] - * Scom Error + * scom error */ (rNPU1FIR, bit(63)) ? defaultMaskedError; @@ -799,7 +814,7 @@ rule rNPU2FIR group gNPU2FIR filter singlebit, - cs_root_cause + cs_root_cause(4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,20,21,22,23,24,25,26,27,28,29,30,31,36,37,38,39,40,41,42,43,45,47,48,50,51,52) { /** NPU2FIR[0] * OTL Brick2 translation fault @@ -824,145 +839,145 @@ group gNPU2FIR /** NPU2FIR[4] * OTL TL credit ctr overflow */ - (rNPU2FIR, bit(4)) ? defaultMaskedError; + (rNPU2FIR, bit(4)) ? self_th_1; /** NPU2FIR[5] * OTL RX acTag invalid */ - (rNPU2FIR, bit(5)) ? defaultMaskedError; + (rNPU2FIR, bit(5)) ? self_th_1; /** NPU2FIR[6] * OTL RX acTag points to an invalid entry. */ - (rNPU2FIR, bit(6)) ? defaultMaskedError; + (rNPU2FIR, bit(6)) ? self_th_1; /** NPU2FIR[7] * OTL RX reserved opcode used. */ - (rNPU2FIR, bit(7)) ? defaultMaskedError; + (rNPU2FIR, bit(7)) ? self_th_1; /** NPU2FIR[8] * OTL RX rtn_tl_credit cmd outside slot0. */ - (rNPU2FIR, bit(8)) ? defaultMaskedError; + (rNPU2FIR, bit(8)) ? self_th_1; /** NPU2FIR[9] * OTL RX bad opcode and template combo */ - (rNPU2FIR, bit(9)) ? defaultMaskedError; + (rNPU2FIR, bit(9)) ? self_th_1; /** NPU2FIR[10] * OTL RX unsupported template format. */ - (rNPU2FIR, bit(10)) ? defaultMaskedError; + (rNPU2FIR, bit(10)) ? self_th_1; /** NPU2FIR[11] * OTL RX bad template x00 format. */ - (rNPU2FIR, bit(11)) ? defaultMaskedError; + (rNPU2FIR, bit(11)) ? self_th_1; /** NPU2FIR[12] * OTL RX control flit overrun. */ - (rNPU2FIR, bit(12)) ? defaultMaskedError; + (rNPU2FIR, bit(12)) ? self_th_1; /** NPU2FIR[13] * OTL RX unexpected data flit. */ - (rNPU2FIR, bit(13)) ? defaultMaskedError; + (rNPU2FIR, bit(13)) ? self_th_1; /** NPU2FIR[14] * OTL RX DL link down. */ - (rNPU2FIR, bit(14)) ? defaultMaskedError; + (rNPU2FIR, bit(14)) ? self_th_1; /** NPU2FIR[15] * OTL RX bad data received on command. */ - (rNPU2FIR, bit(15)) ? defaultMaskedError; + (rNPU2FIR, bit(15)) ? self_th_1; /** NPU2FIR[16] * OTL RX bad data received on response. */ - (rNPU2FIR, bit(16)) ? defaultMaskedError; + (rNPU2FIR, bit(16)) ? self_th_1; /** NPU2FIR[17] * OTL RX AP response not allowed */ - (rNPU2FIR, bit(17)) ? defaultMaskedError; + (rNPU2FIR, bit(17)) ? self_th_1; /** NPU2FIR[18] * OR of all OTL parity errors. */ - (rNPU2FIR, bit(18)) ? defaultMaskedError; + (rNPU2FIR, bit(18)) ? self_th_1; /** NPU2FIR[19] * OR of all OTL ECC CE errors. */ - (rNPU2FIR, bit(19)) ? defaultMaskedError; + (rNPU2FIR, bit(19)) ? self_th_32perDay; /** NPU2FIR[20] * OR of all OTL ECC UE errors. */ - (rNPU2FIR, bit(20)) ? defaultMaskedError; + (rNPU2FIR, bit(20)) ? self_th_1; /** NPU2FIR[21] * RXO OP Errors. */ - (rNPU2FIR, bit(21)) ? defaultMaskedError; + (rNPU2FIR, bit(21)) ? self_th_1; /** NPU2FIR[22] * RXO Internal Errors. */ - (rNPU2FIR, bit(22)) ? defaultMaskedError; + (rNPU2FIR, bit(22)) ? self_th_1; /** NPU2FIR[23] * OTL RXI fifo overrun. */ - (rNPU2FIR, bit(23)) ? defaultMaskedError; + (rNPU2FIR, bit(23)) ? self_th_1; /** NPU2FIR[24] * OTL RXI ctrl flit data run len invalid. */ - (rNPU2FIR, bit(24)) ? defaultMaskedError; + (rNPU2FIR, bit(24)) ? self_th_1; /** NPU2FIR[25] * OTL RXI opcode specifies dL=0b00. */ - (rNPU2FIR, bit(25)) ? defaultMaskedError; + (rNPU2FIR, bit(25)) ? self_th_1; /** NPU2FIR[26] * OTL RXI bad data received vc2 */ - (rNPU2FIR, bit(26)) ? defaultMaskedError; + (rNPU2FIR, bit(26)) ? self_th_1; /** NPU2FIR[27] * OTL RXI dcp2 fifo overrun */ - (rNPU2FIR, bit(27)) ? defaultMaskedError; + (rNPU2FIR, bit(27)) ? self_th_1; /** NPU2FIR[28] * OTL RXI vc1 fifo overrun */ - (rNPU2FIR, bit(28)) ? defaultMaskedError; + (rNPU2FIR, bit(28)) ? self_th_1; /** NPU2FIR[29] * OTL RXI vc2 fifo overrun */ - (rNPU2FIR, bit(29)) ? defaultMaskedError; + (rNPU2FIR, bit(29)) ? self_th_1; /** NPU2FIR[30] - * Reserved + * OTL RXI Data link not supported */ - (rNPU2FIR, bit(30)) ? defaultMaskedError; + (rNPU2FIR, bit(30)) ? self_th_1; /** NPU2FIR[31] * OTL TXI opcode error */ - (rNPU2FIR, bit(31)) ? defaultMaskedError; + (rNPU2FIR, bit(31)) ? self_th_1; /** NPU2FIR[32] - * Malformed packet error type 4 + * OTL RXI reserved field not equal to 0 */ (rNPU2FIR, bit(32)) ? defaultMaskedError; @@ -974,42 +989,42 @@ group gNPU2FIR /** NPU2FIR[36] * MMIO invalidate while one in progress. */ - (rNPU2FIR, bit(36)) ? defaultMaskedError; + (rNPU2FIR, bit(36)) ? self_th_1; /** NPU2FIR[37] * Unexpected ITAG on itag completion pt 0 */ - (rNPU2FIR, bit(37)) ? defaultMaskedError; + (rNPU2FIR, bit(37)) ? self_th_1; /** NPU2FIR[38] * Unexpected ITAG on itag completion pt 1 */ - (rNPU2FIR, bit(38)) ? defaultMaskedError; + (rNPU2FIR, bit(38)) ? self_th_1; /** NPU2FIR[39] * Unexpected Read PEE completion. */ - (rNPU2FIR, bit(39)) ? defaultMaskedError; + (rNPU2FIR, bit(39)) ? self_th_1; /** NPU2FIR[40] * Unexpected Checkout response. */ - (rNPU2FIR, bit(40)) ? defaultMaskedError; + (rNPU2FIR, bit(40)) ? self_th_1; /** NPU2FIR[41] * Translation request but SPAP is invalid. */ - (rNPU2FIR, bit(41)) ? defaultMaskedError; + (rNPU2FIR, bit(41)) ? self_th_1; /** NPU2FIR[42] * Read a PEE which was not valid. */ - (rNPU2FIR, bit(42)) ? defaultMaskedError; + (rNPU2FIR, bit(42)) ? self_th_1; /** NPU2FIR[43] * Bloom filter protection error. */ - (rNPU2FIR, bit(43)) ? defaultMaskedError; + (rNPU2FIR, bit(43)) ? self_th_1; /** NPU2FIR[44] * Translation request to non-valid TA @@ -1017,44 +1032,44 @@ group gNPU2FIR (rNPU2FIR, bit(44)) ? defaultMaskedError; /** NPU2FIR[45] - * TA Translation request to an invalid TA + * TA translation request to an invalid TA */ - (rNPU2FIR, bit(45)) ? defaultMaskedError; + (rNPU2FIR, bit(45)) ? self_th_1; /** NPU2FIR[46] * correctable array error (SBE). */ - (rNPU2FIR, bit(46)) ? defaultMaskedError; + (rNPU2FIR, bit(46)) ? self_th_32perDay; /** NPU2FIR[47] * array error (UE or parity). */ - (rNPU2FIR, bit(47)) ? defaultMaskedError; + (rNPU2FIR, bit(47)) ? self_th_1; /** NPU2FIR[48] * S/TLBI buffer overflow. */ - (rNPU2FIR, bit(48)) ? defaultMaskedError; + (rNPU2FIR, bit(48)) ? self_th_1; /** NPU2FIR[49] * SBE CE on Pb cout rsp or PEE read data. */ - (rNPU2FIR, bit(49)) ? defaultMaskedError; + (rNPU2FIR, bit(49)) ? self_th_32perDay; /** NPU2FIR[50] * UE on Pb cut rsp or PEE read data. */ - (rNPU2FIR, bit(50)) ? defaultMaskedError; + (rNPU2FIR, bit(50)) ? self_th_1; /** NPU2FIR[51] * SUE on Pb chkout rsp or Pb PEE rd data. */ - (rNPU2FIR, bit(51)) ? defaultMaskedError; + (rNPU2FIR, bit(51)) ? self_th_1; /** NPU2FIR[52] - * PA mem_hit when bar mode is nonzero + * PA mem hit when bar mode is nonzero */ - (rNPU2FIR, bit(52)) ? defaultMaskedError; + (rNPU2FIR, bit(52)) ? self_th_1; /** NPU2FIR[53] * XSL Reserved, macro bit 17. diff --git a/src/usr/diag/prdf/common/plat/axone/axone_obus.rule b/src/usr/diag/prdf/common/plat/axone/axone_obus.rule index a079fac59..1a346c417 100644 --- a/src/usr/diag/prdf/common/plat/axone/axone_obus.rule +++ b/src/usr/diag/prdf/common/plat/axone/axone_obus.rule @@ -469,12 +469,12 @@ group gIOOLFIR /** IOOLFIR[8] * link0 nak received */ - (rIOOLFIR, bit(8)) ? defaultMaskedError; + (rIOOLFIR, bit(8)) ? threshold_and_mask_self_non_smp_only; /** IOOLFIR[9] * link1 nak received */ - (rIOOLFIR, bit(9)) ? defaultMaskedError; + (rIOOLFIR, bit(9)) ? threshold_and_mask_self_non_smp_only; /** IOOLFIR[10] * link0 replay buffer full @@ -499,22 +499,22 @@ group gIOOLFIR /** IOOLFIR[14] * link0 sl ecc correctable */ - (rIOOLFIR, bit(14)) ? threshold_and_mask_self; + (rIOOLFIR, bit(14)) ? threshold_and_mask_self_smp_only; /** IOOLFIR[15] * link1 sl ecc correctable */ - (rIOOLFIR, bit(15)) ? threshold_and_mask_self; + (rIOOLFIR, bit(15)) ? threshold_and_mask_self_smp_only; /** IOOLFIR[16] * link0 sl ecc ue */ - (rIOOLFIR, bit(16)) ? threshold_and_mask_self; + (rIOOLFIR, bit(16)) ? threshold_and_mask_self_smp_only; /** IOOLFIR[17] * link1 sl ecc ue */ - (rIOOLFIR, bit(17)) ? threshold_and_mask_self; + (rIOOLFIR, bit(17)) ? threshold_and_mask_self_smp_only; /** IOOLFIR[18] * link0 retrain threshold @@ -597,12 +597,12 @@ group gIOOLFIR (rIOOLFIR, bit(33)) ? defaultMaskedError; /** IOOLFIR[34] - * link0 num replay + * link0 num replay or no forward progress */ (rIOOLFIR, bit(34)) ? defaultMaskedError; /** IOOLFIR[35] - * link1 num replay + * link1 num replay or no forward progress */ (rIOOLFIR, bit(35)) ? defaultMaskedError; @@ -619,12 +619,12 @@ group gIOOLFIR /** IOOLFIR[38] * link0 prbs select error */ - (rIOOLFIR, bit(38)) ? threshold_and_mask_self; + (rIOOLFIR, bit(38)) ? threshold_and_mask_self_smp_only; /** IOOLFIR[39] * link1 prbs select error */ - (rIOOLFIR, bit(39)) ? threshold_and_mask_self; + (rIOOLFIR, bit(39)) ? threshold_and_mask_self_smp_only; /** IOOLFIR[40] * link0 tcomplete bad @@ -639,102 +639,102 @@ group gIOOLFIR /** IOOLFIR[42] * link0 no spare lane available */ - (rIOOLFIR, bit(42)) ? obusSmpCallout_L0; + (rIOOLFIR, bit(42)) ? obusSmpCallout_L0_smp_only; /** IOOLFIR[43] * link1 no spare lane available */ - (rIOOLFIR, bit(43)) ? obusSmpCallout_L1; + (rIOOLFIR, bit(43)) ? obusSmpCallout_L1_smp_only; /** IOOLFIR[44] - * link0 spare done + * link0 spare done or degraded mode */ - (rIOOLFIR, bit(44)) ? obusSmpCallout_th32_L0; + (rIOOLFIR, bit(44)) ? spare_lane_degraded_mode_L0; /** IOOLFIR[45] - * link1 spare done + * link1 spare done or degraded mode */ - (rIOOLFIR, bit(45)) ? obusSmpCallout_th32_L1; + (rIOOLFIR, bit(45)) ? spare_lane_degraded_mode_L1; /** IOOLFIR[46] * link0 too many crc errors */ - (rIOOLFIR, bit(46)) ? obusSmpCallout_L0; + (rIOOLFIR, bit(46)) ? obusSmpCallout_L0_smp_only; /** IOOLFIR[47] * link1 too many crc errors */ - (rIOOLFIR, bit(47)) ? obusSmpCallout_L1; + (rIOOLFIR, bit(47)) ? obusSmpCallout_L1_smp_only; /** IOOLFIR[48] - * link0 npu error + * link0 npu error or orx otx dlx errors */ (rIOOLFIR, bit(48)) ? threshold_and_mask_self; /** IOOLFIR[49] - * link1 npu error + * link1 npu error or orx otx dlx errors */ (rIOOLFIR, bit(49)) ? threshold_and_mask_self; /** IOOLFIR[50] * linkx npu error */ - (rIOOLFIR, bit(50)) ? threshold_and_mask_self; + (rIOOLFIR, bit(50)) ? threshold_and_mask_self_smp_only; /** IOOLFIR[51] * osc switch */ - (rIOOLFIR, bit(51)) ? threshold_and_mask_self; + (rIOOLFIR, bit(51)) ? threshold_and_mask_self_smp_only; /** IOOLFIR[52] * link0 correctable array error */ - (rIOOLFIR, bit(52)) ? obusSmpCallout_th32_L0; + (rIOOLFIR, bit(52)) ? self_th_32perDay; /** IOOLFIR[53] * link1 correctable array error */ - (rIOOLFIR, bit(53)) ? obusSmpCallout_th32_L1; + (rIOOLFIR, bit(53)) ? self_th_32perDay; /** IOOLFIR[54] * link0 uncorrectable array error */ - (rIOOLFIR, bit(54)) ? obusSmpFailure_L0; + (rIOOLFIR, bit(54)) ? self_th_1; /** IOOLFIR[55] * link1 uncorrectable array error */ - (rIOOLFIR, bit(55)) ? obusSmpFailure_L1; + (rIOOLFIR, bit(55)) ? self_th_1; /** IOOLFIR[56] * link0 training failed */ - (rIOOLFIR, bit(56)) ? obusSmpFailure_L0; + (rIOOLFIR, bit(56)) ? training_failure_L0; /** IOOLFIR[57] * link1 training failed */ - (rIOOLFIR, bit(57)) ? obusSmpFailure_L1; + (rIOOLFIR, bit(57)) ? training_failure_L1; /** IOOLFIR[58] * link0 unrecoverable error */ - (rIOOLFIR, bit(58)) ? obusSmpFailure_L0; + (rIOOLFIR, bit(58)) ? unrecoverable_error_L0; /** IOOLFIR[59] * link1 unrecoverable error */ - (rIOOLFIR, bit(59)) ? obusSmpFailure_L1; + (rIOOLFIR, bit(59)) ? unrecoverable_error_L1; /** IOOLFIR[60] * link0 internal error */ - (rIOOLFIR, bit(60)) ? obusSmpFailure_L0; + (rIOOLFIR, bit(60)) ? internal_error_L0; /** IOOLFIR[61] * link1 internal error */ - (rIOOLFIR, bit(61)) ? obusSmpFailure_L1; + (rIOOLFIR, bit(61)) ? internal_error_L1; /** IOOLFIR[62] * fir scom err dup diff --git a/src/usr/diag/prdf/common/plat/axone/axone_omic.rule b/src/usr/diag/prdf/common/plat/axone/axone_omic.rule index 09ed59f2d..7b26f7a3a 100644 --- a/src/usr/diag/prdf/common/plat/axone/axone_omic.rule +++ b/src/usr/diag/prdf/common/plat/axone/axone_omic.rule @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2018 +# Contributors Listed Below - COPYRIGHT 2018,2020 # [+] International Business Machines Corp. # # @@ -196,8 +196,10 @@ rule rOMIC }; group gOMIC attntype CHECK_STOP, RECOVERABLE, UNIT_CS, HOST_ATTN - filter singlebit + filter priority(2,0,1) { + # We need to prioritize analysis to the OMIDLFIR here because of potential + # Channel Fail attentions in that FIR that will be reported as RECOVERABLE. (rOMIC, bit(0)) ? analyzeIOOMIFIR; (rOMIC, bit(1)) ? analyzeMCPPEFIR; (rOMIC, bit(2)) ? analyzeOMIDLFIR; @@ -226,17 +228,17 @@ group gIOOMIFIR /** IOOMIFIR[0] * RX invalid state or parity error */ - (rIOOMIFIR, bit(0)) ? defaultMaskedError; + (rIOOMIFIR, bit(0)) ? self_th_1; /** IOOMIFIR[1] * TX invalid state or parity error */ - (rIOOMIFIR, bit(1)) ? defaultMaskedError; + (rIOOMIFIR, bit(1)) ? self_th_1; /** IOOMIFIR[2] * GCR hang error */ - (rIOOMIFIR, bit(2)) ? defaultMaskedError; + (rIOOMIFIR, bit(2)) ? self_th_1; /** IOOMIFIR[3:47] * Unused @@ -359,306 +361,306 @@ rule rOMIDLFIR }; group gOMIDLFIR - filter singlebit, - cs_root_cause + filter priority(0,20,40), + cs_root_cause(0,20,40) { /** OMIDLFIR[0] - * DL0 fatal error + * OMI-DL0 fatal error */ - (rOMIDLFIR, bit(0)) ? defaultMaskedError; + (rOMIDLFIR, bit(0)) ? dl0_fatal_error; /** OMIDLFIR[1] - * DL0 data UE + * OMI-DL0 UE on data flit */ - (rOMIDLFIR, bit(1)) ? defaultMaskedError; + (rOMIDLFIR, bit(1)) ? dl0_omi_th_1; /** OMIDLFIR[2] - * DL0 flit CE + * OMI-DL0 CE on TL flit */ - (rOMIDLFIR, bit(2)) ? defaultMaskedError; + (rOMIDLFIR, bit(2)) ? dl0_omi_th_32perDay; /** OMIDLFIR[3] - * DL0 CRC error + * OMI-DL0 detected a CRC error */ (rOMIDLFIR, bit(3)) ? defaultMaskedError; /** OMIDLFIR[4] - * DL0 nack + * OMI-DL0 received a nack */ (rOMIDLFIR, bit(4)) ? defaultMaskedError; /** OMIDLFIR[5] - * DL0 X4 mode + * OMI-DL0 running in degraded mode */ - (rOMIDLFIR, bit(5)) ? defaultMaskedError; + (rOMIDLFIR, bit(5)) ? dl0_omi_bus_th_1; /** OMIDLFIR[6] - * DL0 EDPL + * OMI-DL0 parity error detection on a lane */ (rOMIDLFIR, bit(6)) ? defaultMaskedError; /** OMIDLFIR[7] - * DL0 timeout + * OMI-DL0 retrained due to no forward progress */ - (rOMIDLFIR, bit(7)) ? defaultMaskedError; + (rOMIDLFIR, bit(7)) ? dl0_omi_bus_th_32perDay; /** OMIDLFIR[8] - * DL0 remote retrain + * OMI-DL0 remote side initiated a retrain */ (rOMIDLFIR, bit(8)) ? defaultMaskedError; /** OMIDLFIR[9] - * DL0 error retrain + * OMI-DL0 retrain due to internal error or software initiated */ - (rOMIDLFIR, bit(9)) ? defaultMaskedError; + (rOMIDLFIR, bit(9)) ? dl0_omi_bus_th_32perDay; /** OMIDLFIR[10] - * DL0 EDPL retrain + * OMI-DL0 threshold reached */ - (rOMIDLFIR, bit(10)) ? defaultMaskedError; + (rOMIDLFIR, bit(10)) ? dl0_omi_bus_th_32perDay; /** OMIDLFIR[11] - * DL0 trained + * OMI-DL0 trained */ (rOMIDLFIR, bit(11)) ? defaultMaskedError; /** OMIDLFIR[12] - * DL0 endpoint bit 0 + * OMI-DL0 endpoint error bit 0 */ (rOMIDLFIR, bit(12)) ? defaultMaskedError; /** OMIDLFIR[13] - * DL0 endpoint bit 1 + * OMI-DL0 endpoint error bit 1 */ (rOMIDLFIR, bit(13)) ? defaultMaskedError; /** OMIDLFIR[14] - * DL0 endpoint bit 2 + * OMI-DL0 endpoint error bit 2 */ (rOMIDLFIR, bit(14)) ? defaultMaskedError; /** OMIDLFIR[15] - * DL0 endpoint bit 3 + * OMI-DL0 endpoint error bit 3 */ (rOMIDLFIR, bit(15)) ? defaultMaskedError; /** OMIDLFIR[16] - * DL0 endpoint bit 4 + * OMI-DL0 endpoint error bit 4 */ (rOMIDLFIR, bit(16)) ? defaultMaskedError; /** OMIDLFIR[17] - * DL0 endpoint bit 5 + * OMI-DL0 endpoint error bit 5 */ (rOMIDLFIR, bit(17)) ? defaultMaskedError; /** OMIDLFIR[18] - * DL0 endpoint bit 6 + * OMI-DL0 endpoint error bit 6 */ (rOMIDLFIR, bit(18)) ? defaultMaskedError; /** OMIDLFIR[19] - * DL0 endpoint bit 7 + * OMI-DL0 endpoint error bit 7 */ (rOMIDLFIR, bit(19)) ? defaultMaskedError; /** OMIDLFIR[20] - * DL1 fatal error + * OMI-DL1 fatal error */ - (rOMIDLFIR, bit(20)) ? defaultMaskedError; + (rOMIDLFIR, bit(20)) ? dl1_fatal_error; /** OMIDLFIR[21] - * DL1 data UE + * OMI-DL1 UE on data flit */ - (rOMIDLFIR, bit(21)) ? defaultMaskedError; + (rOMIDLFIR, bit(21)) ? dl1_omi_th_1; /** OMIDLFIR[22] - * DL1 flit CE + * OMI-DL1 CE on TL flit */ - (rOMIDLFIR, bit(22)) ? defaultMaskedError; + (rOMIDLFIR, bit(22)) ? dl1_omi_th_32perDay; /** OMIDLFIR[23] - * DL1 CRC error + * OMI-DL1 detected a CRC error */ (rOMIDLFIR, bit(23)) ? defaultMaskedError; /** OMIDLFIR[24] - * DL1 nack + * OMI-DL1 received a nack */ (rOMIDLFIR, bit(24)) ? defaultMaskedError; /** OMIDLFIR[25] - * DL1 X4 mode + * OMI-DL1 running in degraded mode */ - (rOMIDLFIR, bit(25)) ? defaultMaskedError; + (rOMIDLFIR, bit(25)) ? dl1_omi_bus_th_1; /** OMIDLFIR[26] - * DL1 EDPL + * OMI-DL1 parity error detection on a lane */ (rOMIDLFIR, bit(26)) ? defaultMaskedError; /** OMIDLFIR[27] - * DL1 timeout + * OMI-DL1 retrained due to no forward progress */ - (rOMIDLFIR, bit(27)) ? defaultMaskedError; + (rOMIDLFIR, bit(27)) ? dl1_omi_bus_th_32perDay; /** OMIDLFIR[28] - * DL1 remote retrain + * OMI-DL1 remote side initiated a retrain */ (rOMIDLFIR, bit(28)) ? defaultMaskedError; /** OMIDLFIR[29] - * DL1 error retrain + * OMI-DL1 retrain due to internal error or software initiated */ - (rOMIDLFIR, bit(29)) ? defaultMaskedError; + (rOMIDLFIR, bit(29)) ? dl1_omi_bus_th_32perDay; /** OMIDLFIR[30] - * DL1 EDPL retrain + * OMI-DL1 threshold reached */ - (rOMIDLFIR, bit(30)) ? defaultMaskedError; + (rOMIDLFIR, bit(30)) ? dl1_omi_bus_th_32perDay; /** OMIDLFIR[31] - * DL1 trained + * OMI-DL1 trained */ (rOMIDLFIR, bit(31)) ? defaultMaskedError; /** OMIDLFIR[32] - * DL1 endpoint bit 0 + * OMI-DL1 endpoint error bit 0 */ (rOMIDLFIR, bit(32)) ? defaultMaskedError; /** OMIDLFIR[33] - * DL1 endpoint bit 1 + * OMI-DL1 endpoint error bit 1 */ (rOMIDLFIR, bit(33)) ? defaultMaskedError; /** OMIDLFIR[34] - * DL1 endpoint bit 2 + * OMI-DL1 endpoint error bit 2 */ (rOMIDLFIR, bit(34)) ? defaultMaskedError; /** OMIDLFIR[35] - * DL1 endpoint bit 3 + * OMI-DL1 endpoint error bit 3 */ (rOMIDLFIR, bit(35)) ? defaultMaskedError; /** OMIDLFIR[36] - * DL1 endpoint bit 4 + * OMI-DL1 endpoint error bit 4 */ (rOMIDLFIR, bit(36)) ? defaultMaskedError; /** OMIDLFIR[37] - * DL1 endpoint bit 5 + * OMI-DL1 endpoint error bit 5 */ (rOMIDLFIR, bit(37)) ? defaultMaskedError; /** OMIDLFIR[38] - * DL1 endpoint bit 6 + * OMI-DL1 endpoint error bit 6 */ (rOMIDLFIR, bit(38)) ? defaultMaskedError; /** OMIDLFIR[39] - * DL1 endpoint bit 7 + * OMI-DL1 endpoint error bit 7 */ (rOMIDLFIR, bit(39)) ? defaultMaskedError; /** OMIDLFIR[40] - * DL2 fatal error + * OMI-DL2 fatal error */ - (rOMIDLFIR, bit(40)) ? defaultMaskedError; + (rOMIDLFIR, bit(40)) ? dl2_fatal_error; /** OMIDLFIR[41] - * DL2 data UE + * OMI-DL2 UE on data flit */ - (rOMIDLFIR, bit(41)) ? defaultMaskedError; + (rOMIDLFIR, bit(41)) ? dl2_omi_th_1; /** OMIDLFIR[42] - * DL2 flit CE + * OMI-DL2 CE on TL flit */ - (rOMIDLFIR, bit(42)) ? defaultMaskedError; + (rOMIDLFIR, bit(42)) ? dl2_omi_th_32perDay; /** OMIDLFIR[43] - * DL2 CRC error + * OMI-DL2 detected a CRC error */ (rOMIDLFIR, bit(43)) ? defaultMaskedError; /** OMIDLFIR[44] - * DL2 nack + * OMI-DL2 received a nack */ (rOMIDLFIR, bit(44)) ? defaultMaskedError; /** OMIDLFIR[45] - * DL2 X4 mode + * OMI-DL2 running in degraded mode */ - (rOMIDLFIR, bit(45)) ? defaultMaskedError; + (rOMIDLFIR, bit(45)) ? dl2_omi_bus_th_1; /** OMIDLFIR[46] - * DL2 EDPL + * OMI-DL2 parity error detection on a lane */ (rOMIDLFIR, bit(46)) ? defaultMaskedError; /** OMIDLFIR[47] - * DL2 timeout + * OMI-DL2 retrained due to no forward progress */ - (rOMIDLFIR, bit(47)) ? defaultMaskedError; + (rOMIDLFIR, bit(47)) ? dl2_omi_bus_th_32perDay; /** OMIDLFIR[48] - * DL2 remote retrain + * OMI-DL2 remote side initiated a retrain */ (rOMIDLFIR, bit(48)) ? defaultMaskedError; /** OMIDLFIR[49] - * DL2 error retrain + * OMI-DL2 retrain due to internal error or software initiated */ - (rOMIDLFIR, bit(49)) ? defaultMaskedError; + (rOMIDLFIR, bit(49)) ? dl2_omi_bus_th_32perDay; /** OMIDLFIR[50] - * DL2 EDPL retrain + * OMI-DL2 threshold reached */ - (rOMIDLFIR, bit(50)) ? defaultMaskedError; + (rOMIDLFIR, bit(50)) ? dl2_omi_bus_th_32perDay; /** OMIDLFIR[51] - * DL2 trained + * OMI-DL2 trained */ (rOMIDLFIR, bit(51)) ? defaultMaskedError; /** OMIDLFIR[52] - * DL2 endpoint bit 0 + * OMI-DL2 endpoint error bit 0 */ (rOMIDLFIR, bit(52)) ? defaultMaskedError; /** OMIDLFIR[53] - * DL2 endpoint bit 1 + * OMI-DL2 endpoint error bit 1 */ (rOMIDLFIR, bit(53)) ? defaultMaskedError; /** OMIDLFIR[54] - * DL2 endpoint bit 2 + * OMI-DL2 endpoint error bit 2 */ (rOMIDLFIR, bit(54)) ? defaultMaskedError; /** OMIDLFIR[55] - * DL2 endpoint bit 3 + * OMI-DL2 endpoint error bit 3 */ (rOMIDLFIR, bit(55)) ? defaultMaskedError; /** OMIDLFIR[56] - * DL2 endpoint bit 4 + * OMI-DL2 endpoint error bit 4 */ (rOMIDLFIR, bit(56)) ? defaultMaskedError; /** OMIDLFIR[57] - * DL2 endpoint bit 5 + * OMI-DL2 endpoint error bit 5 */ (rOMIDLFIR, bit(57)) ? defaultMaskedError; /** OMIDLFIR[58] - * DL2 endpoint bit 6 + * OMI-DL2 endpoint error bit 6 */ (rOMIDLFIR, bit(58)) ? defaultMaskedError; /** OMIDLFIR[59] - * DL2 endpoint bit 7 + * OMI-DL2 endpoint error bit 7 */ (rOMIDLFIR, bit(59)) ? defaultMaskedError; @@ -667,6 +669,21 @@ group gOMIDLFIR */ (rOMIDLFIR, bit(60)) ? defaultMaskedError; + /** OMIDLFIR[61] + * reserved + */ + (rOMIDLFIR, bit(61)) ? defaultMaskedError; + + /** OMIDLFIR[62] + * LFIR internal parity error + */ + (rOMIDLFIR, bit(62)) ? defaultMaskedError; + + /** OMIDLFIR[63] + * SCOM Satellite Error + */ + (rOMIDLFIR, bit(63)) ? defaultMaskedError; + }; ############################################################################## diff --git a/src/usr/diag/prdf/common/plat/axone/axone_omic_actions.rule b/src/usr/diag/prdf/common/plat/axone/axone_omic_actions.rule index ecb6626a8..dbf563b47 100644 --- a/src/usr/diag/prdf/common/plat/axone/axone_omic_actions.rule +++ b/src/usr/diag/prdf/common/plat/axone/axone_omic_actions.rule @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2018 +# Contributors Listed Below - COPYRIGHT 2018,2019 # [+] International Business Machines Corp. # # @@ -24,6 +24,133 @@ # IBM_PROLOG_END_TAG ################################################################################ +# OMIC Actions # +################################################################################ + +actionclass dl0_omi +{ + callout(connected(TYPE_OMI,0), MRU_MED); +}; + +actionclass dl1_omi +{ + callout(connected(TYPE_OMI,1), MRU_MED); +}; + +actionclass dl2_omi +{ + callout(connected(TYPE_OMI,2), MRU_MED); +}; + +actionclass dl0_omi_bus +{ + funccall("omiParentCalloutBusInterfacePlugin_0"); +}; + +actionclass dl1_omi_bus +{ + funccall("omiParentCalloutBusInterfacePlugin_1"); +}; + +actionclass dl2_omi_bus +{ + funccall("omiParentCalloutBusInterfacePlugin_2"); +}; + +/** OMI-DL0 Fatal Error */ +actionclass dl0_fatal_error +{ + try( funccall("DlFatalError_0"), dl0_omi_bus ); + threshold1; +}; + +/** OMI-DL1 Fatal Error */ +actionclass dl1_fatal_error +{ + try( funccall("DlFatalError_1"), dl1_omi_bus ); + threshold1; +}; + +/** OMI-DL2 Fatal Error */ +actionclass dl2_fatal_error +{ + try( funccall("DlFatalError_2"), dl2_omi_bus ); + threshold1; +}; + +actionclass dl0_omi_th_1 +{ + dl0_omi; + threshold1; +}; + +actionclass dl1_omi_th_1 +{ + dl1_omi; + threshold1; +}; + +actionclass dl2_omi_th_1 +{ + dl2_omi; + threshold1; +}; + +actionclass dl0_omi_th_32perDay +{ + dl0_omi; + threshold32pday; +}; + +actionclass dl1_omi_th_32perDay +{ + dl1_omi; + threshold32pday; +}; + +actionclass dl2_omi_th_32perDay +{ + dl2_omi; + threshold32pday; +}; + +actionclass dl0_omi_bus_th_1 +{ + dl0_omi_bus; + threshold1; +}; + +actionclass dl1_omi_bus_th_1 +{ + dl1_omi_bus; + threshold1; +}; + +actionclass dl2_omi_bus_th_1 +{ + dl2_omi_bus; + threshold1; +}; + +actionclass dl0_omi_bus_th_32perDay +{ + dl0_omi_bus; + threshold1; +}; + +actionclass dl1_omi_bus_th_32perDay +{ + dl1_omi_bus; + threshold1; +}; + +actionclass dl2_omi_bus_th_32perDay +{ + dl2_omi_bus; + threshold1; +}; + +################################################################################ # Analyze groups ################################################################################ diff --git a/src/usr/diag/prdf/common/plat/axone/axone_omic_regs.rule b/src/usr/diag/prdf/common/plat/axone/axone_omic_regs.rule new file mode 100644 index 000000000..e698652a6 --- /dev/null +++ b/src/usr/diag/prdf/common/plat/axone/axone_omic_regs.rule @@ -0,0 +1,62 @@ +# IBM_PROLOG_BEGIN_TAG +# This is an automatically generated prolog. +# +# $Source: src/usr/diag/prdf/common/plat/axone/axone_omic_regs.rule $ +# +# OpenPOWER HostBoot Project +# +# Contributors Listed Below - COPYRIGHT 2019 +# [+] International Business Machines Corp. +# +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# IBM_PROLOG_END_TAG + +############################################################################### +# Additional registers for omic, not defined in XML +############################################################################### + + + ########################################################################### + # P9 Axone target OMIDLFIR + ########################################################################### + + register OMIDLFIR_MASK_OR + { + name "P9 OMIC target OMIDLFIR MASK atomic OR"; + scomaddr 0x07013345; + capture group never; + access write_only; + }; + + register DL0_ERROR_HOLD + { + name "P9 Axone target DL0 Error Hold Register"; + scomaddr 0x07013353; + capture group default; + }; + + register DL1_ERROR_HOLD + { + name "P9 Axone target DL1 Error Hold Register"; + scomaddr 0x07013363; + capture group default; + }; + + register DL2_ERROR_HOLD + { + name "P9 Axone target DL2 Error Hold Register"; + scomaddr 0x07013373; + capture group default; + }; diff --git a/src/usr/diag/prdf/common/plat/axone/axone_phb.rule b/src/usr/diag/prdf/common/plat/axone/axone_phb.rule index 844739ee2..1c5bb566d 100644 --- a/src/usr/diag/prdf/common/plat/axone/axone_phb.rule +++ b/src/usr/diag/prdf/common/plat/axone/axone_phb.rule @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2018 +# Contributors Listed Below - COPYRIGHT 2018,2019 # [+] International Business Machines Corp. # # @@ -212,7 +212,7 @@ group gPHBNFIR /** PHBNFIR[0] * BAR Parity Error */ - (rPHBNFIR, bit(0)) ? self_th_1; + (rPHBNFIR, bit(0)) ? parent_proc_th_1; /** PHBNFIR[1] * Parity Errors on Registers besides BAR @@ -252,12 +252,12 @@ group gPHBNFIR /** PHBNFIR[8] * Register Array Parity Error */ - (rPHBNFIR, bit(8)) ? self_th_1; + (rPHBNFIR, bit(8)) ? parent_proc_th_1; /** PHBNFIR[9] * Power Bus Interface Parity Error */ - (rPHBNFIR, bit(9)) ? self_th_1; + (rPHBNFIR, bit(9)) ? parent_proc_th_1; /** PHBNFIR[10] * Power Bus Data Hang @@ -297,7 +297,7 @@ group gPHBNFIR /** PHBNFIR[17] * Hardware Error */ - (rPHBNFIR, bit(17)) ? self_th_1; + (rPHBNFIR, bit(17)) ? parent_proc_th_1; /** PHBNFIR[18] * Unsolicited Power Bus Data diff --git a/src/usr/diag/prdf/common/plat/axone/axone_proc.rule b/src/usr/diag/prdf/common/plat/axone/axone_proc.rule index c37c103be..b936106e2 100644 --- a/src/usr/diag/prdf/common/plat/axone/axone_proc.rule +++ b/src/usr/diag/prdf/common/plat/axone/axone_proc.rule @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2018 +# Contributors Listed Below - COPYRIGHT 2018,2019 # [+] International Business Machines Corp. # # @@ -950,42 +950,6 @@ chip axone_proc }; ############################################################################ - # P9 chip ENHCAFIR - ############################################################################ - - register ENHCAFIR - { - name "P9 chip ENHCAFIR"; - scomaddr 0x05012940; - reset (&, 0x05012941); - mask (|, 0x05012945); - capture group default; - }; - - register ENHCAFIR_MASK - { - name "P9 chip ENHCAFIR MASK"; - scomaddr 0x05012943; - capture group default; - }; - - register ENHCAFIR_ACT0 - { - name "P9 chip ENHCAFIR ACT0"; - scomaddr 0x05012946; - capture group default; - capture req nonzero("ENHCAFIR"); - }; - - register ENHCAFIR_ACT1 - { - name "P9 chip ENHCAFIR ACT1"; - scomaddr 0x05012947; - capture group default; - capture req nonzero("ENHCAFIR"); - }; - - ############################################################################ # P9 chip PBAMFIR ############################################################################ @@ -2758,7 +2722,7 @@ group gNXCQFIR /** NXCQFIR[19] * Uncorrectable error on ERAT arrays */ - (rNXCQFIR, bit(19)) ? nx_th_32perDay; + (rNXCQFIR, bit(19)) ? nx_th_1; /** NXCQFIR[20] * SUE on ERAT arrays @@ -4077,14 +4041,14 @@ group gN3_CHIPLET_FIR (rN3_CHIPLET_FIR, bit(14)) ? analyzePBPPEFIR; /** N3_CHIPLET_FIR[15] - * Attention from PBIOEFIR + * Attention from PBIOOFIR */ - (rN3_CHIPLET_FIR, bit(15)) ? analyzePBIOEFIR; + (rN3_CHIPLET_FIR, bit(15)) ? analyzePBIOOFIR; /** N3_CHIPLET_FIR[16] - * Attention from PBIOOFIR + * Attention from NPU0FIR 1 */ - (rN3_CHIPLET_FIR, bit(16)) ? analyzePBIOOFIR; + (rN3_CHIPLET_FIR, bit(16)) ? analyzeConnectedNPU1; /** N3_CHIPLET_FIR[17] * Attention from INTCQFIR @@ -4106,15 +4070,10 @@ group gN3_CHIPLET_FIR */ (rN3_CHIPLET_FIR, bit(20)) ? analyzePBAMFIR; - /** N3_CHIPLET_FIR[21] - * Attention from NPU0FIR 1 - */ - (rN3_CHIPLET_FIR, bit(21)) ? analyzeConnectedNPU1; - /** N3_CHIPLET_FIR[22] - * Attention from ENHCAFIR + * Attention from PBIOEFIR */ - (rN3_CHIPLET_FIR, bit(22)) ? analyzeENHCAFIR; + (rN3_CHIPLET_FIR, bit(22)) ? analyzePBIOEFIR; /** N3_CHIPLET_FIR[23] * Attention from NPU2FIR 0 @@ -5145,144 +5104,6 @@ group gPSIHBFIR }; ################################################################################ -# P9 chip ENHCAFIR -################################################################################ - -rule rENHCAFIR -{ - CHECK_STOP: - ENHCAFIR & ~ENHCAFIR_MASK & ~ENHCAFIR_ACT0 & ~ENHCAFIR_ACT1; - RECOVERABLE: - ENHCAFIR & ~ENHCAFIR_MASK & ~ENHCAFIR_ACT0 & ENHCAFIR_ACT1; -}; - -group gENHCAFIR - filter singlebit, - cs_root_cause -{ - /** ENHCAFIR[0] - * PB0 data UE - */ - (rENHCAFIR, bit(0)) ? defaultMaskedError; - - /** ENHCAFIR[1] - * PB0 data SUE - */ - (rENHCAFIR, bit(1)) ? defaultMaskedError; - - /** ENHCAFIR[2] - * PB0 data ue - */ - (rENHCAFIR, bit(2)) ? defaultMaskedError; - - /** ENHCAFIR[3] - * spare - */ - (rENHCAFIR, bit(3)) ? defaultMaskedError; - - /** ENHCAFIR[4] - * Castout Drop Counter Full - */ - (rENHCAFIR, bit(4)) ? defaultMaskedError; - - /** ENHCAFIR[5] - * Data Hang Detect - */ - (rENHCAFIR, bit(5)) ? defaultMaskedError; - - /** ENHCAFIR[6] - * Unexpected data or cresp - */ - (rENHCAFIR, bit(6)) ? defaultMaskedError; - - /** ENHCAFIR[7] - * Internal Error - */ - (rENHCAFIR, bit(7)) ? defaultMaskedError; - - /** ENHCAFIR[8] - * ADU checkstop error from power bus data - */ - (rENHCAFIR, bit(8)) ? defaultMaskedError; - - /** ENHCAFIR[9] - * ADU checkstop error from alter display - */ - (rENHCAFIR, bit(9)) ? defaultMaskedError; - - /** ENHCAFIR[10] - * ADU checkstop error from xsco m - */ - (rENHCAFIR, bit(10)) ? defaultMaskedError; - - /** ENHCAFIR[11] - * ADU checkstop from power bus cmd - */ - (rENHCAFIR, bit(11)) ? defaultMaskedError; - - /** ENHCAFIR[12] - * ADU checkstop error from power bus send - */ - (rENHCAFIR, bit(12)) ? defaultMaskedError; - - /** ENHCAFIR[13] - * ADU checkstop from power bus receive - */ - (rENHCAFIR, bit(13)) ? defaultMaskedError; - - /** ENHCAFIR[14] - * ADU recoverable error from pb data - */ - (rENHCAFIR, bit(14)) ? defaultMaskedError; - - /** ENHCAFIR[15] - * ADU recoverable error from alter display - */ - (rENHCAFIR, bit(15)) ? defaultMaskedError; - - /** ENHCAFIR[16] - * ADU recoverable error from xscom - */ - (rENHCAFIR, bit(16)) ? defaultMaskedError; - - /** ENHCAFIR[17] - * ADU recoverable from power bus cmd - */ - (rENHCAFIR, bit(17)) ? defaultMaskedError; - - /** ENHCAFIR[18] - * ADU recoverable error from pb send - */ - (rENHCAFIR, bit(18)) ? defaultMaskedError; - - /** ENHCAFIR[19] - * ADU recoverable error from pb receive - */ - (rENHCAFIR, bit(19)) ? defaultMaskedError; - - /** ENHCAFIR[20] - * NHTM scom error - */ - (rENHCAFIR, bit(20)) ? defaultMaskedError; - - /** ENHCAFIR[21] - * spare - */ - (rENHCAFIR, bit(21)) ? defaultMaskedError; - - /** ENHCAFIR[22] - * scom error - */ - (rENHCAFIR, bit(22)) ? defaultMaskedError; - - /** ENHCAFIR[23] - * scom error - */ - (rENHCAFIR, bit(23)) ? defaultMaskedError; - -}; - -################################################################################ # P9 chip PBAMFIR ################################################################################ diff --git a/src/usr/diag/prdf/common/plat/axone/prdfMccPlugins.C b/src/usr/diag/prdf/common/plat/axone/prdfMccPlugins.C new file mode 100644 index 000000000..804418717 --- /dev/null +++ b/src/usr/diag/prdf/common/plat/axone/prdfMccPlugins.C @@ -0,0 +1,142 @@ +/* IBM_PROLOG_BEGIN_TAG */ +/* This is an automatically generated prolog. */ +/* */ +/* $Source: src/usr/diag/prdf/common/plat/axone/prdfMccPlugins.C $ */ +/* */ +/* OpenPOWER HostBoot Project */ +/* */ +/* Contributors Listed Below - COPYRIGHT 2019 */ +/* [+] International Business Machines Corp. */ +/* */ +/* */ +/* Licensed under the Apache License, Version 2.0 (the "License"); */ +/* you may not use this file except in compliance with the License. */ +/* You may obtain a copy of the License at */ +/* */ +/* http://www.apache.org/licenses/LICENSE-2.0 */ +/* */ +/* Unless required by applicable law or agreed to in writing, software */ +/* distributed under the License is distributed on an "AS IS" BASIS, */ +/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */ +/* implied. See the License for the specific language governing */ +/* permissions and limitations under the License. */ +/* */ +/* IBM_PROLOG_END_TAG */ + +// Framework includes +#include <iipServiceDataCollector.h> +#include <prdfExtensibleChip.H> +#include <prdfPluginMap.H> + +// Platform includes +#include <prdfMemUtils.H> +#include <prdfPlatServices.H> +#include <prdfMemExtraSig.H> + +using namespace TARGETING; + +namespace PRDF +{ + +using namespace PlatServices; + +namespace axone_mcc +{ + +//############################################################################## +// +// Special plugins +// +//############################################################################## + +/** + * @brief Analysis code that is called before the main analyze() function. + * @param i_chip A MCC chip. + * @param io_sc The step code data struct. + * @param o_analyzed True if analysis is done on this chip, false otherwise. + * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. + */ +int32_t PreAnalysis( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc, + bool & o_analyzed ) +{ + // Check for a channel failure before analyzing this chip. + o_analyzed = MemUtils::analyzeChnlFail<TYPE_MCC>( i_chip, io_sc ); + + return SUCCESS; +} +PRDF_PLUGIN_DEFINE( axone_mcc, PreAnalysis ); + +/** + * @brief Plugin function called after analysis is complete but before PRD + * exits. + * @param i_chip A MCC chip. + * @param io_sc The step code data struct. + * @note This is especially useful for any analysis that still needs to be + * done after the framework clears the FIR bits that were at attention. + * @return SUCCESS. + */ +int32_t PostAnalysis( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc ) +{ + // If there was a channel failure some cleanup is required to ensure + // there are no more attentions from this channel. + MemUtils::cleanupChnlFail<TYPE_MCC>( i_chip, io_sc ); + + return SUCCESS; +} +PRDF_PLUGIN_DEFINE( axone_mcc, PostAnalysis ); + +//############################################################################## +// +// DSTLFIR +// +//############################################################################## + +/** + * @brief Plugin function called to avoid analyzing to a checkstop on an OCMB. + * @param i_chip A MCC chip. + * @param io_sc The step code data struct. + * @param i_pos Position of the OMI/OCMB relative to the MCC. + * @return SUCCESS if the primary attn is CS, else PRD_SCAN_COMM_REGISTER_ZERO. + */ +int32_t checkOcmb( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc, + uint8_t i_pos ) +{ + int32_t rc = PRD_SCAN_COMM_REGISTER_ZERO; + + #ifdef CONFIG_ENABLE_CHECKSTOP_ANALYSIS + // We do not have support for the OCMB in the checkstop analysis path. + // As such, we will simply indicate there is an attention from the OCMB and + // add second level support and both sides of the bus as callouts. + if ( CHECK_STOP == io_sc.service_data->getPrimaryAttnType() ) + { + TargetHandle_t omi = getConnectedChild( i_chip->getTrgt(), TYPE_OMI, + i_pos ); + ExtensibleChip * ocmb = getConnectedChild( i_chip, TYPE_OCMB_CHIP, + i_pos ); + + io_sc.service_data->SetCallout( LEVEL2_SUPPORT, MRU_MED, NO_GARD ); + io_sc.service_data->SetCallout( omi, MRU_LOW, NO_GARD ); + io_sc.service_data->SetCallout( ocmb->getTrgt(), MRU_LOW, NO_GARD ); + + rc = SUCCESS; + } + #endif + + return rc; +} + +#define CHECK_OCMB_PLUGIN( POS ) \ +int32_t checkOcmb_##POS( ExtensibleChip * i_chip, \ + STEP_CODE_DATA_STRUCT & io_sc ) \ +{ \ + return checkOcmb( i_chip, io_sc, POS ); \ +} \ +PRDF_PLUGIN_DEFINE( axone_mcc, checkOcmb_##POS ); + +CHECK_OCMB_PLUGIN( 0 ); +CHECK_OCMB_PLUGIN( 1 ); + +} // end namespace axone_mcc + +} // end namespace PRDF + diff --git a/src/usr/diag/prdf/common/plat/axone/prdfOmicPlugins.C b/src/usr/diag/prdf/common/plat/axone/prdfOmicPlugins.C new file mode 100644 index 000000000..f6ea182b9 --- /dev/null +++ b/src/usr/diag/prdf/common/plat/axone/prdfOmicPlugins.C @@ -0,0 +1,173 @@ +/* IBM_PROLOG_BEGIN_TAG */ +/* This is an automatically generated prolog. */ +/* */ +/* $Source: src/usr/diag/prdf/common/plat/axone/prdfOmicPlugins.C $ */ +/* */ +/* OpenPOWER HostBoot Project */ +/* */ +/* Contributors Listed Below - COPYRIGHT 2019,2020 */ +/* [+] International Business Machines Corp. */ +/* */ +/* */ +/* Licensed under the Apache License, Version 2.0 (the "License"); */ +/* you may not use this file except in compliance with the License. */ +/* You may obtain a copy of the License at */ +/* */ +/* http://www.apache.org/licenses/LICENSE-2.0 */ +/* */ +/* Unless required by applicable law or agreed to in writing, software */ +/* distributed under the License is distributed on an "AS IS" BASIS, */ +/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */ +/* implied. See the License for the specific language governing */ +/* permissions and limitations under the License. */ +/* */ +/* IBM_PROLOG_END_TAG */ + +// Framework includes +#include <iipServiceDataCollector.h> +#include <prdfExtensibleChip.H> +#include <prdfPluginMap.H> + +// Platform includes +#include <prdfMemUtils.H> +#include <prdfPlatServices.H> + +using namespace TARGETING; + +namespace PRDF +{ + +using namespace PlatServices; + +namespace axone_omic +{ + +//############################################################################## +// +// Special plugins +// +//############################################################################## + +/** + * @brief Analysis code that is called before the main analyze() function. + * @param i_chip An OMIC chip. + * @param io_sc The step code data struct. + * @param o_analyzed True if analysis is done on this chip, false otherwise. + * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. + */ +int32_t PreAnalysis( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc, + bool & o_analyzed ) +{ + // Check for a channel failure before analyzing this chip. + o_analyzed = MemUtils::analyzeChnlFail<TYPE_OMIC>( i_chip, io_sc ); + + return SUCCESS; +} +PRDF_PLUGIN_DEFINE( axone_omic, PreAnalysis ); + +/** + * @brief Plugin function called after analysis is complete but before PRD + * exits. + * @param i_chip An OMIC chip. + * @param io_sc The step code data struct. + * @note This is especially useful for any analysis that still needs to be + * done after the framework clears the FIR bits that were at attention. + * @return SUCCESS. + */ +int32_t PostAnalysis( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc ) +{ + // If there was a channel failure some cleanup is required to ensure + // there are no more attentions from this channel. + MemUtils::cleanupChnlFail<TYPE_OMIC>( i_chip, io_sc ); + + return SUCCESS; +} +PRDF_PLUGIN_DEFINE( axone_omic, PostAnalysis ); + +//############################################################################## +// +// OMIDLFIR +// +//############################################################################## + +/** + * @brief OMIDLFIR[0|20|40] - OMI-DL Fatal Error + * @param i_chip An OMIC chip. + * @param io_sc The step code data struct. + * @param i_dl The DL relative to the OMIC. + * @return PRD_SCAN_COMM_REGISTER_ZERO for the bus callout, else SUCCESS + */ +int32_t DlFatalError( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc, + uint8_t i_dl ) +{ + #define PRDF_FUNC "[axone_omic::DlFatalError] " + + int32_t rc = SUCCESS; + + do + { + // Note: The OMIDLFIR can't actually be set up to report UNIT_CS + // attentions, instead, as a workaround, the relevant channel fail + // bits will be set as recoverable bits and we will manually set + // the attention types to UNIT_CS in our handling of these errors. + io_sc.service_data->setPrimaryAttnType( UNIT_CS ); + + char reg[64]; + sprintf( reg, "DL%d_ERROR_HOLD", i_dl ); + + // Check DL#_ERROR_HOLD[52:63] to determine callout + SCAN_COMM_REGISTER_CLASS * dl_error_hold = i_chip->getRegister( reg ); + + if ( SUCCESS != dl_error_hold->Read() ) + { + PRDF_ERR( PRDF_FUNC "Read() Failed on DL%d_ERROR_HOLD: " + "i_chip=0x%08x", i_dl, i_chip->getHuid() ); + break; + } + + if ( dl_error_hold->IsBitSet(53) || + dl_error_hold->IsBitSet(55) || + dl_error_hold->IsBitSet(57) || + dl_error_hold->IsBitSet(58) || + dl_error_hold->IsBitSet(59) || + dl_error_hold->IsBitSet(60) || + dl_error_hold->IsBitSet(62) || + dl_error_hold->IsBitSet(63) ) + { + // Get and callout the OMI target + TargetHandle_t omi = getConnectedChild( i_chip->getTrgt(), TYPE_OMI, + i_dl ); + io_sc.service_data->SetCallout( omi ); + } + else if ( dl_error_hold->IsBitSet(54) || + dl_error_hold->IsBitSet(56) || + dl_error_hold->IsBitSet(61) ) + { + // callout the OMI target, the OMI bus, and the OCMB + // Return PRD_SCAN_COMM_REGISTER_ZERO so the rule code makes + // the appropriate callout. + rc = PRD_SCAN_COMM_REGISTER_ZERO; + } + + }while(0); + + return rc; + + #undef PRDF_FUNC +} + +#define DL_FATAL_ERROR_PLUGIN( POS ) \ +int32_t DlFatalError_##POS( ExtensibleChip * i_chip, \ + STEP_CODE_DATA_STRUCT & io_sc ) \ +{ \ + return DlFatalError( i_chip, io_sc, POS ); \ +} \ +PRDF_PLUGIN_DEFINE( axone_omic, DlFatalError_##POS ); + +DL_FATAL_ERROR_PLUGIN( 0 ); +DL_FATAL_ERROR_PLUGIN( 1 ); +DL_FATAL_ERROR_PLUGIN( 2 ); + +} // end namespace axone_omic + +} // end namespace PRDF diff --git a/src/usr/diag/prdf/common/plat/axone/prdf_plat_axone.mk b/src/usr/diag/prdf/common/plat/axone/prdf_plat_axone.mk index ea76f9121..24acb5bb6 100644 --- a/src/usr/diag/prdf/common/plat/axone/prdf_plat_axone.mk +++ b/src/usr/diag/prdf/common/plat/axone/prdf_plat_axone.mk @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2016,2018 +# Contributors Listed Below - COPYRIGHT 2016,2019 # [+] International Business Machines Corp. # # @@ -37,5 +37,7 @@ prd_incpath += ${PRD_SRC_PATH}/common/plat/axone # Object files common to both FSP and Hostboot ################################################################################ -# plat/cumulus/ (rule plugin related) +# plat/axone/ (rule plugin related) +prd_rule_plugin += prdfMccPlugins.o +prd_rule_plugin += prdfOmicPlugins.o diff --git a/src/usr/diag/prdf/common/plat/cumulus/cumulus_mc_regs.rule b/src/usr/diag/prdf/common/plat/cumulus/cumulus_mc_regs.rule index 50a0170c2..027a0c08c 100644 --- a/src/usr/diag/prdf/common/plat/cumulus/cumulus_mc_regs.rule +++ b/src/usr/diag/prdf/common/plat/cumulus/cumulus_mc_regs.rule @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2018 +# Contributors Listed Below - COPYRIGHT 2018,2019 # [+] International Business Machines Corp. # # @@ -50,3 +50,20 @@ capture group default; }; + ############################################################################ + # PCB Slave Error Regs + ############################################################################ + + register MC_ERROR_REG + { + name "MC PCB Slave error reg"; + scomaddr 0x070F001F; + capture group PllFIRs; + }; + + register MC_CONFIG_REG + { + name "MC PCB Slave config reg"; + scomaddr 0x070F001E; + capture group PllFIRs; + }; diff --git a/src/usr/diag/prdf/common/plat/cumulus/cumulus_obus.rule b/src/usr/diag/prdf/common/plat/cumulus/cumulus_obus.rule index 8c950bbc7..7275e26a3 100644 --- a/src/usr/diag/prdf/common/plat/cumulus/cumulus_obus.rule +++ b/src/usr/diag/prdf/common/plat/cumulus/cumulus_obus.rule @@ -469,12 +469,12 @@ group gIOOLFIR /** IOOLFIR[8] * link0 nak received */ - (rIOOLFIR, bit(8)) ? defaultMaskedError; + (rIOOLFIR, bit(8)) ? threshold_and_mask_self_non_smp_only; /** IOOLFIR[9] * link1 nak received */ - (rIOOLFIR, bit(9)) ? defaultMaskedError; + (rIOOLFIR, bit(9)) ? threshold_and_mask_self_non_smp_only; /** IOOLFIR[10] * link0 replay buffer full @@ -499,22 +499,22 @@ group gIOOLFIR /** IOOLFIR[14] * link0 sl ecc correctable */ - (rIOOLFIR, bit(14)) ? threshold_and_mask_self; + (rIOOLFIR, bit(14)) ? threshold_and_mask_self_smp_only; /** IOOLFIR[15] * link1 sl ecc correctable */ - (rIOOLFIR, bit(15)) ? threshold_and_mask_self; + (rIOOLFIR, bit(15)) ? threshold_and_mask_self_smp_only; /** IOOLFIR[16] * link0 sl ecc ue */ - (rIOOLFIR, bit(16)) ? threshold_and_mask_self; + (rIOOLFIR, bit(16)) ? threshold_and_mask_self_smp_only; /** IOOLFIR[17] * link1 sl ecc ue */ - (rIOOLFIR, bit(17)) ? threshold_and_mask_self; + (rIOOLFIR, bit(17)) ? threshold_and_mask_self_smp_only; /** IOOLFIR[18] * link0 retrain threshold @@ -597,12 +597,12 @@ group gIOOLFIR (rIOOLFIR, bit(33)) ? defaultMaskedError; /** IOOLFIR[34] - * link0 num replay + * link0 num replay or no forward progress */ (rIOOLFIR, bit(34)) ? defaultMaskedError; /** IOOLFIR[35] - * link1 num replay + * link1 num replay or no forward progress */ (rIOOLFIR, bit(35)) ? defaultMaskedError; @@ -619,12 +619,12 @@ group gIOOLFIR /** IOOLFIR[38] * link0 prbs select error */ - (rIOOLFIR, bit(38)) ? threshold_and_mask_self; + (rIOOLFIR, bit(38)) ? threshold_and_mask_self_smp_only; /** IOOLFIR[39] * link1 prbs select error */ - (rIOOLFIR, bit(39)) ? threshold_and_mask_self; + (rIOOLFIR, bit(39)) ? threshold_and_mask_self_smp_only; /** IOOLFIR[40] * link0 tcomplete bad @@ -639,102 +639,102 @@ group gIOOLFIR /** IOOLFIR[42] * link0 no spare lane available */ - (rIOOLFIR, bit(42)) ? obusSmpCallout_L0; + (rIOOLFIR, bit(42)) ? obusSmpCallout_L0_smp_only; /** IOOLFIR[43] * link1 no spare lane available */ - (rIOOLFIR, bit(43)) ? obusSmpCallout_L1; + (rIOOLFIR, bit(43)) ? obusSmpCallout_L1_smp_only; /** IOOLFIR[44] - * link0 spare done + * link0 spare done or degraded mode */ - (rIOOLFIR, bit(44)) ? obusSmpCallout_th32_L0; + (rIOOLFIR, bit(44)) ? spare_lane_degraded_mode_L0; /** IOOLFIR[45] - * link1 spare done + * link1 spare done or degraded mode */ - (rIOOLFIR, bit(45)) ? obusSmpCallout_th32_L1; + (rIOOLFIR, bit(45)) ? spare_lane_degraded_mode_L1; /** IOOLFIR[46] * link0 too many crc errors */ - (rIOOLFIR, bit(46)) ? obusSmpCallout_L0; + (rIOOLFIR, bit(46)) ? obusSmpCallout_L0_smp_only; /** IOOLFIR[47] * link1 too many crc errors */ - (rIOOLFIR, bit(47)) ? obusSmpCallout_L1; + (rIOOLFIR, bit(47)) ? obusSmpCallout_L1_smp_only; /** IOOLFIR[48] - * link0 npu error + * link0 npu error or orx otx dlx errors */ (rIOOLFIR, bit(48)) ? threshold_and_mask_self; /** IOOLFIR[49] - * link1 npu error + * link1 npu error or orx otx dlx errors */ (rIOOLFIR, bit(49)) ? threshold_and_mask_self; /** IOOLFIR[50] * linkx npu error */ - (rIOOLFIR, bit(50)) ? threshold_and_mask_self; + (rIOOLFIR, bit(50)) ? threshold_and_mask_self_smp_only; /** IOOLFIR[51] * osc switch */ - (rIOOLFIR, bit(51)) ? threshold_and_mask_self; + (rIOOLFIR, bit(51)) ? threshold_and_mask_self_smp_only; /** IOOLFIR[52] * link0 correctable array error */ - (rIOOLFIR, bit(52)) ? obusSmpCallout_th32_L0; + (rIOOLFIR, bit(52)) ? self_th_32perDay; /** IOOLFIR[53] * link1 correctable array error */ - (rIOOLFIR, bit(53)) ? obusSmpCallout_th32_L1; + (rIOOLFIR, bit(53)) ? self_th_32perDay; /** IOOLFIR[54] * link0 uncorrectable array error */ - (rIOOLFIR, bit(54)) ? obusSmpFailure_L0; + (rIOOLFIR, bit(54)) ? self_th_1; /** IOOLFIR[55] * link1 uncorrectable array error */ - (rIOOLFIR, bit(55)) ? obusSmpFailure_L1; + (rIOOLFIR, bit(55)) ? self_th_1; /** IOOLFIR[56] * link0 training failed */ - (rIOOLFIR, bit(56)) ? obusSmpFailure_L0; + (rIOOLFIR, bit(56)) ? training_failure_L0; /** IOOLFIR[57] * link1 training failed */ - (rIOOLFIR, bit(57)) ? obusSmpFailure_L1; + (rIOOLFIR, bit(57)) ? training_failure_L1; /** IOOLFIR[58] * link0 unrecoverable error */ - (rIOOLFIR, bit(58)) ? obusSmpFailure_L0; + (rIOOLFIR, bit(58)) ? unrecoverable_error_L0; /** IOOLFIR[59] * link1 unrecoverable error */ - (rIOOLFIR, bit(59)) ? obusSmpFailure_L1; + (rIOOLFIR, bit(59)) ? unrecoverable_error_L1; /** IOOLFIR[60] * link0 internal error */ - (rIOOLFIR, bit(60)) ? obusSmpFailure_L0; + (rIOOLFIR, bit(60)) ? internal_error_L0; /** IOOLFIR[61] * link1 internal error */ - (rIOOLFIR, bit(61)) ? obusSmpFailure_L1; + (rIOOLFIR, bit(61)) ? internal_error_L1; /** IOOLFIR[62] * fir scom err dup diff --git a/src/usr/diag/prdf/common/plat/cumulus/cumulus_phb.rule b/src/usr/diag/prdf/common/plat/cumulus/cumulus_phb.rule index 9c8dcce38..88c917458 100644 --- a/src/usr/diag/prdf/common/plat/cumulus/cumulus_phb.rule +++ b/src/usr/diag/prdf/common/plat/cumulus/cumulus_phb.rule @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2016,2018 +# Contributors Listed Below - COPYRIGHT 2016,2019 # [+] International Business Machines Corp. # # @@ -212,7 +212,7 @@ group gPHBNFIR /** PHBNFIR[0] * BAR Parity Error */ - (rPHBNFIR, bit(0)) ? self_th_1; + (rPHBNFIR, bit(0)) ? parent_proc_th_1; /** PHBNFIR[1] * Parity Errors on Registers besides BAR @@ -252,12 +252,12 @@ group gPHBNFIR /** PHBNFIR[8] * Register Array Parity Error */ - (rPHBNFIR, bit(8)) ? self_th_1; + (rPHBNFIR, bit(8)) ? parent_proc_th_1; /** PHBNFIR[9] * Power Bus Interface Parity Error */ - (rPHBNFIR, bit(9)) ? self_th_1; + (rPHBNFIR, bit(9)) ? parent_proc_th_1; /** PHBNFIR[10] * Power Bus Data Hang @@ -297,7 +297,7 @@ group gPHBNFIR /** PHBNFIR[17] * Hardware Error */ - (rPHBNFIR, bit(17)) ? self_th_1; + (rPHBNFIR, bit(17)) ? parent_proc_th_1; /** PHBNFIR[18] * Unsolicited Power Bus Data diff --git a/src/usr/diag/prdf/common/plat/cumulus/cumulus_proc.rule b/src/usr/diag/prdf/common/plat/cumulus/cumulus_proc.rule index 187cd2a44..ae8e6bb80 100644 --- a/src/usr/diag/prdf/common/plat/cumulus/cumulus_proc.rule +++ b/src/usr/diag/prdf/common/plat/cumulus/cumulus_proc.rule @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2016,2018 +# Contributors Listed Below - COPYRIGHT 2016,2019 # [+] International Business Machines Corp. # # @@ -2893,7 +2893,7 @@ group gNXCQFIR /** NXCQFIR[19] * Uncorrectable error on ERAT arrays */ - (rNXCQFIR, bit(19)) ? nx_th_32perDay; + (rNXCQFIR, bit(19)) ? nx_th_1; /** NXCQFIR[20] * SUE on ERAT arrays diff --git a/src/usr/diag/prdf/common/plat/cumulus/cumulus_proc_actions.rule b/src/usr/diag/prdf/common/plat/cumulus/cumulus_proc_actions.rule index 26d62e95f..91298d653 100644 --- a/src/usr/diag/prdf/common/plat/cumulus/cumulus_proc_actions.rule +++ b/src/usr/diag/prdf/common/plat/cumulus/cumulus_proc_actions.rule @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2018 +# Contributors Listed Below - COPYRIGHT 2018,2019 # [+] International Business Machines Corp. # # @@ -23,6 +23,12 @@ # # IBM_PROLOG_END_TAG +################################################################################ +# Analyze +################################################################################ + +actionclass analyzeENHCAFIR { analyze(gENHCAFIR); }; + ############################################################################### # Analyze connected ############################################################################### diff --git a/src/usr/diag/prdf/common/plat/explorer/explorer_ocmb.rule b/src/usr/diag/prdf/common/plat/explorer/explorer_ocmb.rule index 1abd08c96..c1e5c15a8 100644 --- a/src/usr/diag/prdf/common/plat/explorer/explorer_ocmb.rule +++ b/src/usr/diag/prdf/common/plat/explorer/explorer_ocmb.rule @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2018 +# Contributors Listed Below - COPYRIGHT 2018,2019 # [+] International Business Machines Corp. # # @@ -44,82 +44,82 @@ chip explorer_ocmb ############################################################################# ############################################################################ - # MB Chiplet FIR + # OCMB Chiplet FIR ############################################################################ - register MB_CHIPLET_CS_FIR + register OCMB_CHIPLET_CS_FIR { - name "MB Chiplet Checkstop FIR"; + name "OCMB Chiplet Checkstop FIR"; scomaddr 0x08040000; capture group default; }; - register MB_CHIPLET_RE_FIR + register OCMB_CHIPLET_RE_FIR { - name "MB Chiplet Recoverable FIR"; + name "OCMB Chiplet Recoverable FIR"; scomaddr 0x08040001; capture group default; }; - register MB_CHIPLET_FIR_MASK + register OCMB_CHIPLET_FIR_MASK { - name "MB Chiplet FIR MASK"; + name "OCMB Chiplet FIR MASK"; scomaddr 0x08040002; capture group default; }; ############################################################################ - # MB Chiplet Special Attention FIR + # OCMB Chiplet Special Attention FIR ############################################################################ - register MB_CHIPLET_SPA_FIR + register OCMB_CHIPLET_SPA_FIR { - name "MB Chiplet Special Attention FIR"; + name "OCMB Chiplet Special Attention FIR"; scomaddr 0x08040004; capture group default; }; - register MB_CHIPLET_SPA_FIR_MASK + register OCMB_CHIPLET_SPA_FIR_MASK { - name "MB Chiplet Special Attention FIR MASK"; + name "OCMB Chiplet Special Attention FIR MASK"; scomaddr 0x08040007; capture group default; }; ############################################################################ - # Explorer chip MB_LFIR + # Explorer chip OCMB_LFIR ############################################################################ - register MB_LFIR + register OCMB_LFIR { - name "Explorer chip MB_LFIR"; + name "Explorer chip OCMB_LFIR"; scomaddr 0x0804000a; reset (&, 0x0804000b); mask (|, 0x0804000f); capture group default; }; - register MB_LFIR_MASK + register OCMB_LFIR_MASK { - name "Explorer chip MB_LFIR MASK"; + name "Explorer chip OCMB_LFIR MASK"; scomaddr 0x0804000d; capture group default; }; - register MB_LFIR_ACT0 + register OCMB_LFIR_ACT0 { - name "Explorer chip MB_LFIR ACT0"; + name "Explorer chip OCMB_LFIR ACT0"; scomaddr 0x08040010; capture group default; - capture req nonzero("MB_LFIR"); + capture req nonzero("OCMB_LFIR"); }; - register MB_LFIR_ACT1 + register OCMB_LFIR_ACT1 { - name "Explorer chip MB_LFIR ACT1"; + name "Explorer chip OCMB_LFIR ACT1"; scomaddr 0x08040011; capture group default; - capture req nonzero("MB_LFIR"); + capture req nonzero("OCMB_LFIR"); }; ############################################################################ @@ -355,174 +355,261 @@ chip explorer_ocmb ############################################################################## ################################################################################ -# MB Chiplet FIR +# OCMB Chiplet FIR ################################################################################ -rule rMB_CHIPLET_FIR +rule rOCMB_CHIPLET_FIR { UNIT_CS: - MB_CHIPLET_CS_FIR & ~MB_CHIPLET_FIR_MASK & `1fffffffffffffff`; + OCMB_CHIPLET_CS_FIR & ~OCMB_CHIPLET_FIR_MASK & `1fffffffffffffff`; RECOVERABLE: - (MB_CHIPLET_RE_FIR >> 2) & ~MB_CHIPLET_FIR_MASK & `1fffffffffffffff`; + (OCMB_CHIPLET_RE_FIR >> 2) & ~OCMB_CHIPLET_FIR_MASK & `1fffffffffffffff`; }; -group gMB_CHIPLET_FIR attntype CHECK_STOP, RECOVERABLE +# NOTE: RDFFIR[14|34] are possible side effects of OCMB_LFIR[38], as such, +# OCMB_LFIR must be analyzed first for correct handling. If changes are +# made so the RDFFIR is analyzed first, additional changes to the handling +# of those bits will be required. +group gOCMB_CHIPLET_FIR attntype UNIT_CS, RECOVERABLE filter singlebit { - /** MB_CHIPLET_FIR[3] - * Attention from MB_LFIR + /** OCMB_CHIPLET_FIR[3] + * Attention from OCMB_LFIR */ - (rMB_CHIPLET_FIR, bit(3)) ? analyzeMB_LFIR; + (rOCMB_CHIPLET_FIR, bit(3)) ? analyzeOCMB_LFIR; - /** MB_CHIPLET_FIR[4] + /** OCMB_CHIPLET_FIR[4] * Attention from MMIOFIR */ - (rMB_CHIPLET_FIR, bit(4)) ? analyzeMMIOFIR; + (rOCMB_CHIPLET_FIR, bit(4)) ? analyzeMMIOFIR; - /** MB_CHIPLET_FIR[7] + /** OCMB_CHIPLET_FIR[7] * Attention from SRQFIR */ - (rMB_CHIPLET_FIR, bit(7)) ? analyzeSRQFIR; + (rOCMB_CHIPLET_FIR, bit(7)) ? analyzeSRQFIR; - /** MB_CHIPLET_FIR[8] + /** OCMB_CHIPLET_FIR[8] * Attention from MCBISTFIR */ - (rMB_CHIPLET_FIR, bit(8)) ? analyzeMCBISTFIR; + (rOCMB_CHIPLET_FIR, bit(8)) ? analyzeMCBISTFIR; - /** MB_CHIPLET_FIR[9] + /** OCMB_CHIPLET_FIR[9] * Attention from RDFFIR */ - (rMB_CHIPLET_FIR, bit(9)) ? analyzeRDFFIR; + (rOCMB_CHIPLET_FIR, bit(9)) ? analyzeRDFFIR; - /** MB_CHIPLET_FIR[11] + /** OCMB_CHIPLET_FIR[11] * Attention from TLXFIR */ - (rMB_CHIPLET_FIR, bit(11)) ? analyzeTLXFIR; + (rOCMB_CHIPLET_FIR, bit(11)) ? analyzeTLXFIR; - /** MB_CHIPLET_FIR[12] + /** OCMB_CHIPLET_FIR[12] * Attention from OMIDLFIR */ - (rMB_CHIPLET_FIR, bit(12)) ? analyzeOMIDLFIR; + (rOCMB_CHIPLET_FIR, bit(12)) ? analyzeOMIDLFIR; }; ################################################################################ -# MB Chiplet Special Attention FIR +# OCMB Chiplet Special Attention FIR ################################################################################ -rule rMB_CHIPLET_SPA_FIR +rule rOCMB_CHIPLET_SPA_FIR { HOST_ATTN: - MB_CHIPLET_SPA_FIR & ~MB_CHIPLET_SPA_FIR_MASK; + OCMB_CHIPLET_SPA_FIR & ~OCMB_CHIPLET_SPA_FIR_MASK; }; -group gMB_CHIPLET_SPA_FIR attntype HOST_ATTN +group gOCMB_CHIPLET_SPA_FIR attntype HOST_ATTN filter singlebit { - /** MB_CHIPLET_SPA_FIR[1] + /** OCMB_CHIPLET_SPA_FIR[1] * Attention from MMIOFIR */ - (rMB_CHIPLET_SPA_FIR, bit(1)) ? analyzeMMIOFIR; + (rOCMB_CHIPLET_SPA_FIR, bit(1)) ? analyzeMMIOFIR; - /** MB_CHIPLET_SPA_FIR[4] + /** OCMB_CHIPLET_SPA_FIR[4] * Attention from SRQFIR */ - (rMB_CHIPLET_SPA_FIR, bit(4)) ? analyzeSRQFIR; + (rOCMB_CHIPLET_SPA_FIR, bit(4)) ? analyzeSRQFIR; - /** MB_CHIPLET_SPA_FIR[5] + /** OCMB_CHIPLET_SPA_FIR[5] * Attention from MCBISTFIR */ - (rMB_CHIPLET_SPA_FIR, bit(5)) ? analyzeMCBISTFIR; + (rOCMB_CHIPLET_SPA_FIR, bit(5)) ? analyzeMCBISTFIR; - /** MB_CHIPLET_SPA_FIR[6] + /** OCMB_CHIPLET_SPA_FIR[6] * Attention from RDFFIR */ - (rMB_CHIPLET_SPA_FIR, bit(6)) ? analyzeRDFFIR; + (rOCMB_CHIPLET_SPA_FIR, bit(6)) ? analyzeRDFFIR; - /** MB_CHIPLET_SPA_FIR[8] + /** OCMB_CHIPLET_SPA_FIR[8] * Attention from TLXFIR */ - (rMB_CHIPLET_SPA_FIR, bit(8)) ? analyzeTLXFIR; + (rOCMB_CHIPLET_SPA_FIR, bit(8)) ? analyzeTLXFIR; - /** MB_CHIPLET_SPA_FIR[9] + /** OCMB_CHIPLET_SPA_FIR[9] * Attention from OMIDLFIR */ - (rMB_CHIPLET_SPA_FIR, bit(9)) ? analyzeOMIDLFIR; + (rOCMB_CHIPLET_SPA_FIR, bit(9)) ? analyzeOMIDLFIR; }; ################################################################################ -# Explorer chip MB_LFIR +# Explorer chip OCMB_LFIR ################################################################################ -rule rMB_LFIR +rule rOCMB_LFIR { UNIT_CS: - MB_LFIR & ~MB_LFIR_MASK & ~MB_LFIR_ACT0 & ~MB_LFIR_ACT1; + OCMB_LFIR & ~OCMB_LFIR_MASK & ~OCMB_LFIR_ACT0 & ~OCMB_LFIR_ACT1; RECOVERABLE: - MB_LFIR & ~MB_LFIR_MASK & ~MB_LFIR_ACT0 & MB_LFIR_ACT1; - HOST_ATTN: - MB_LFIR & ~MB_LFIR_MASK & MB_LFIR_ACT0 & ~MB_LFIR_ACT1; + OCMB_LFIR & ~OCMB_LFIR_MASK & ~OCMB_LFIR_ACT0 & OCMB_LFIR_ACT1; }; -group gMB_LFIR +group gOCMB_LFIR filter singlebit, cs_root_cause { - /** MB_LFIR[0] + /** OCMB_LFIR[0] * CFIR access PCB error */ - (rMB_LFIR, bit(0)) ? defaultMaskedError; + (rOCMB_LFIR, bit(0)) ? self_th_32perDay; - /** MB_LFIR[1] + /** OCMB_LFIR[1] * CFIR internal parity error */ - (rMB_LFIR, bit(1)) ? defaultMaskedError; + (rOCMB_LFIR, bit(1)) ? self_th_32perDay; - /** MB_LFIR[2] + /** OCMB_LFIR[2] * LFIR internal parity error */ - (rMB_LFIR, bit(2)) ? defaultMaskedError; + (rOCMB_LFIR, bit(2)) ? self_th_32perDay; - /** MB_LFIR[3] + /** OCMB_LFIR[3] * Debug scom satellite error */ - (rMB_LFIR, bit(3)) ? defaultMaskedError; + (rOCMB_LFIR, bit(3)) ? defaultMaskedError; - /** MB_LFIR[4] + /** OCMB_LFIR[4] * PSCOM Logic: PCB Access Error */ - (rMB_LFIR, bit(4)) ? defaultMaskedError; + (rOCMB_LFIR, bit(4)) ? defaultMaskedError; - /** MB_LFIR[5] + /** OCMB_LFIR[5] * PSCOM Logic: Summarized internal errors */ - (rMB_LFIR, bit(5)) ? defaultMaskedError; + (rOCMB_LFIR, bit(5)) ? defaultMaskedError; - /** MB_LFIR[6] + /** OCMB_LFIR[6] * Trace Logic : Scom Satellite Error - Trace0 */ - (rMB_LFIR, bit(6)) ? defaultMaskedError; + (rOCMB_LFIR, bit(6)) ? defaultMaskedError; - /** MB_LFIR[7] + /** OCMB_LFIR[7] * Trace Logic : Scom Satellite Error - Trace1 */ - (rMB_LFIR, bit(7)) ? defaultMaskedError; + (rOCMB_LFIR, bit(7)) ? defaultMaskedError; - /** MB_LFIR[8] - * unused + /** OCMB_LFIR[8] + * PIB2GIF parity error on FSM or Registers */ - (rMB_LFIR, bit(8)) ? defaultMaskedError; + (rOCMB_LFIR, bit(8)) ? self_th_32perDay; - /** MB_LFIR[9] + /** OCMB_LFIR[9] * MSG access PCB error */ - (rMB_LFIR, bit(9)) ? defaultMaskedError; + (rOCMB_LFIR, bit(9)) ? defaultMaskedError; + + /** OCMB_LFIR[10:18] + * unused + */ + (rOCMB_LFIR, bit(10|11|12|13|14|15|16|17|18)) ? defaultMaskedError; + + /** OCMB_LFIR[19] + * DLL IRQ + */ + (rOCMB_LFIR, bit(19)) ? defaultMaskedError; + + /** OCMB_LFIR[20] + * Watchdog timer interrupt + */ + (rOCMB_LFIR, bit(20)) ? self_th_1; + + /** OCMB_LFIR[21] + * internal temp sensor tripped a threshold + */ + (rOCMB_LFIR, bit(21)) ? defaultMaskedError; + + /** OCMB_LFIR[22] + * GPBC_FATAL_ERROR + */ + (rOCMB_LFIR, bit(22)) ? self_th_1; + + /** OCMB_LFIR[23] + * GPBC_NON_FATAL_ERROR + */ + (rOCMB_LFIR, bit(23)) ? self_th_1; + + /** OCMB_LFIR[24] + * early power off warning + */ + (rOCMB_LFIR, bit(24)) ? defaultMaskedError; + + /** OCMB_LFIR[25] + * TOP fatal interrupts + */ + (rOCMB_LFIR, bit(25)) ? self_th_1; + + /** OCMB_LFIR[26] + * TOP non fatal interrupts + */ + (rOCMB_LFIR, bit(26)) ? level2_M_self_L_th_1; + + /** OCMB_LFIR[27:34] + * Interrupt from OPSe to OCMB + */ + (rOCMB_LFIR, bit(27|28|29|30|31|32|33|34)) ? defaultMaskedError; + + /** OCMB_LFIR[35] + * DDR thermal event + */ + (rOCMB_LFIR, bit(35)) ? defaultMaskedError; + + /** OCMB_LFIR[36] + * DDR4 PHY fatal + */ + (rOCMB_LFIR, bit(36)) ? self_th_1; + + /** OCMB_LFIR[37] + * DDR4 PHY non fatal + */ + (rOCMB_LFIR, bit(37)) ? self_th_32perDay; + + /** OCMB_LFIR[38] + * DDR4 PHY interrupt + */ + (rOCMB_LFIR, bit(38)) ? ddr4_phy_interrupt; - /** MB_LFIR[10:62] - * bits from the microsemi message register (0 to 52) + /** OCMB_LFIR[39:46] + * foxhound fatal */ - (rMB_LFIR, bit(10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37|38|39|40|41|42|43|44|45|46|47|48|49|50|51|52|53|54|55|56|57|58|59|60|61|62)) ? defaultMaskedError; + (rOCMB_LFIR, bit(39|40|41|42|43|44|45|46)) ? foxhound_fatal; + + /** OCMB_LFIR[47:54] + * foxhound non fatal + */ + (rOCMB_LFIR, bit(47|48|49|50|51|52|53|54)) ? defaultMaskedError; + + /** OCMB_LFIR[55:62] + * foxhound serdes interrupt + */ + (rOCMB_LFIR, bit(55|56|57|58|59|60|61|62)) ? defaultMaskedError; + + /** OCMB_LFIR[63] + * GIF2PCB parity error on FSM or Registers + */ + (rOCMB_LFIR, bit(63)) ? self_th_32perDay; }; @@ -557,27 +644,27 @@ group gMMIOFIR /** MMIOFIR[2] * SCOM err */ - (rMMIOFIR, bit(2)) ? defaultMaskedError; + (rMMIOFIR, bit(2)) ? self_th_32perDay; /** MMIOFIR[3] - * FSM err + * FSM perr */ - (rMMIOFIR, bit(3)) ? defaultMaskedError; + (rMMIOFIR, bit(3)) ? self_th_1; /** MMIOFIR[4] * FIFO overflow */ - (rMMIOFIR, bit(4)) ? defaultMaskedError; + (rMMIOFIR, bit(4)) ? self_th_1; /** MMIOFIR[5] * Ctl reg parity err */ - (rMMIOFIR, bit(5)) ? defaultMaskedError; + (rMMIOFIR, bit(5)) ? self_th_1; /** MMIOFIR[6] * Info reg parity error */ - (rMMIOFIR, bit(6)) ? defaultMaskedError; + (rMMIOFIR, bit(6)) ? self_th_1; /** MMIOFIR[7] * SNSC both starts err @@ -622,22 +709,22 @@ rule rSRQFIR group gSRQFIR filter singlebit, - cs_root_cause + cs_root_cause(18) { /** SRQFIR[0] * SRQ recoverable error */ - (rSRQFIR, bit(0)) ? defaultMaskedError; + (rSRQFIR, bit(0)) ? mem_port_th_1; /** SRQFIR[1] * SRQ nonrecoverable error */ - (rSRQFIR, bit(1)) ? defaultMaskedError; + (rSRQFIR, bit(1)) ? mem_port_th_1; /** SRQFIR[2] * Refresh overrun */ - (rSRQFIR, bit(2)) ? defaultMaskedError; + (rSRQFIR, bit(2)) ? mem_port_th_32perDay; /** SRQFIR[3] * WAT error @@ -647,12 +734,12 @@ group gSRQFIR /** SRQFIR[4] * RCD parity error */ - (rSRQFIR, bit(4)) ? defaultMaskedError; + (rSRQFIR, bit(4)) ? srq_rcd_parity_error; /** SRQFIR[5] * MCB logic error */ - (rSRQFIR, bit(5)) ? defaultMaskedError; + (rSRQFIR, bit(5)) ? mem_port_th_1; /** SRQFIR[6] * Emergency throttle @@ -662,7 +749,7 @@ group gSRQFIR /** SRQFIR[7] * NCF MCB parity error */ - (rSRQFIR, bit(7)) ? defaultMaskedError; + (rSRQFIR, bit(7)) ? mem_port_th_1; /** SRQFIR[8] * DDR MBA event n @@ -672,82 +759,82 @@ group gSRQFIR /** SRQFIR[9] * WRQ RRQ hang err */ - (rSRQFIR, bit(9)) ? defaultMaskedError; + (rSRQFIR, bit(9)) ? mem_port_th_1; /** SRQFIR[10] * SM one hot error */ - (rSRQFIR, bit(10)) ? defaultMaskedError; + (rSRQFIR, bit(10)) ? mem_port_th_1; /** SRQFIR[11] * Reg parity error */ - (rSRQFIR, bit(11)) ? defaultMaskedError; + (rSRQFIR, bit(11)) ? mem_port_th_1; /** SRQFIR[12] * Cmd parity error */ - (rSRQFIR, bit(12)) ? defaultMaskedError; + (rSRQFIR, bit(12)) ? mem_port_th_1; /** SRQFIR[13] * Port fail */ - (rSRQFIR, bit(13)) ? defaultMaskedError; + (rSRQFIR, bit(13)) ? mem_port_failure; /** SRQFIR[14] - * Spare + * informational register parity error bit */ - (rSRQFIR, bit(14)) ? defaultMaskedError; + (rSRQFIR, bit(14)) ? threshold_and_mask_mem_port; /** SRQFIR[15] * Debug parity error */ - (rSRQFIR, bit(15)) ? defaultMaskedError; + (rSRQFIR, bit(15)) ? threshold_and_mask_mem_port; /** SRQFIR[16] * WDF unrecoverable mainline error */ - (rSRQFIR, bit(16)) ? defaultMaskedError; + (rSRQFIR, bit(16)) ? mem_port_th_1; /** SRQFIR[17] * WDF mmio error */ - (rSRQFIR, bit(17)) ? defaultMaskedError; + (rSRQFIR, bit(17)) ? mem_port_th_1; /** SRQFIR[18] * WDF array UE on mainline operations (SUE put in mem) */ - (rSRQFIR, bit(18)) ? defaultMaskedError; + (rSRQFIR, bit(18)) ? mem_port_th_1_UERE; /** SRQFIR[19] * WDF mainline dataflow error (SUE not reliably put in mem) */ - (rSRQFIR, bit(19)) ? defaultMaskedError; + (rSRQFIR, bit(19)) ? mem_port_th_1; /** SRQFIR[20] * WDF scom register parity err, affecting mainline config */ - (rSRQFIR, bit(20)) ? defaultMaskedError; + (rSRQFIR, bit(20)) ? mem_port_th_1; /** SRQFIR[21] * WDF scom register parity err, affecting scom ops only */ - (rSRQFIR, bit(21)) ? defaultMaskedError; + (rSRQFIR, bit(21)) ? mem_port_th_1; /** SRQFIR[22] * WDF SCOM fsm parity error */ - (rSRQFIR, bit(22)) ? defaultMaskedError; + (rSRQFIR, bit(22)) ? mem_port_th_1; /** SRQFIR[23] * WDF write buffer array CE */ - (rSRQFIR, bit(23)) ? defaultMaskedError; + (rSRQFIR, bit(23)) ? mem_port_th_32perDay; /** SRQFIR[24] * NCF UE */ - (rSRQFIR, bit(24)) ? defaultMaskedError; + (rSRQFIR, bit(24)) ? mem_port_th_1; /** SRQFIR[25] * TBD @@ -757,17 +844,17 @@ group gSRQFIR /** SRQFIR[26] * NCF logic error */ - (rSRQFIR, bit(26)) ? defaultMaskedError; + (rSRQFIR, bit(26)) ? mem_port_th_1; /** SRQFIR[27] * NCF parity error */ - (rSRQFIR, bit(27)) ? defaultMaskedError; + (rSRQFIR, bit(27)) ? mem_port_th_1; /** SRQFIR[28] * NCF correctable error */ - (rSRQFIR, bit(28)) ? defaultMaskedError; + (rSRQFIR, bit(28)) ? mem_port_th_32perDay; /** SRQFIR[29] * Internal scom error @@ -807,17 +894,17 @@ group gMCBISTFIR /** MCBISTFIR[1] * Command address timeout */ - (rMCBISTFIR, bit(1)) ? defaultMaskedError; + (rMCBISTFIR, bit(1)) ? self_th_1; /** MCBISTFIR[2] * Internal FSM error */ - (rMCBISTFIR, bit(2)) ? defaultMaskedError; + (rMCBISTFIR, bit(2)) ? self_th_1; /** MCBISTFIR[3] * MCBIST broadcast out of sync */ - (rMCBISTFIR, bit(3)) ? defaultMaskedError; + (rMCBISTFIR, bit(3)) ? self_th_1; /** MCBISTFIR[4] * MCBIST data error @@ -852,7 +939,7 @@ group gMCBISTFIR /** MCBISTFIR[10] * MCBIST program complete */ - (rMCBISTFIR, bit(10)) ? defaultMaskedError; + (rMCBISTFIR, bit(10)) ? mcbist_program_complete; /** MCBISTFIR[11] * MCBIST CCS subtest done @@ -865,14 +952,14 @@ group gMCBISTFIR (rMCBISTFIR, bit(12)) ? defaultMaskedError; /** MCBISTFIR[13] - * SCOM recoverable reg parity error + * SCOM recoverable register parity error */ - (rMCBISTFIR, bit(13)) ? defaultMaskedError; + (rMCBISTFIR, bit(13)) ? self_th_1; /** MCBISTFIR[14] * SCOM fatal reg parity error */ - (rMCBISTFIR, bit(14)) ? defaultMaskedError; + (rMCBISTFIR, bit(14)) ? self_th_1; /** MCBISTFIR[15] * SCOM WAT and debug reg parity error @@ -917,57 +1004,57 @@ rule rRDFFIR group gRDFFIR filter singlebit, - cs_root_cause + cs_root_cause(14,15,17,35,37) { /** RDFFIR[0] * Mainline read MPE on rank 0 */ - (rRDFFIR, bit(0)) ? defaultMaskedError; + (rRDFFIR, bit(0)) ? verify_chip_mark_0; /** RDFFIR[1] * Mainline read MPE on rank 1 */ - (rRDFFIR, bit(1)) ? defaultMaskedError; + (rRDFFIR, bit(1)) ? verify_chip_mark_1; /** RDFFIR[2] * Mainline read MPE on rank 2 */ - (rRDFFIR, bit(2)) ? defaultMaskedError; + (rRDFFIR, bit(2)) ? verify_chip_mark_2; /** RDFFIR[3] - * Maineline read MPE on rank 3 + * Mainline read MPE on rank 3 */ - (rRDFFIR, bit(3)) ? defaultMaskedError; + (rRDFFIR, bit(3)) ? verify_chip_mark_3; /** RDFFIR[4] * Mainline read MPE on rank 4 */ - (rRDFFIR, bit(4)) ? defaultMaskedError; + (rRDFFIR, bit(4)) ? verify_chip_mark_4; /** RDFFIR[5] * Mainline read MPE on rank 5 */ - (rRDFFIR, bit(5)) ? defaultMaskedError; + (rRDFFIR, bit(5)) ? verify_chip_mark_5; /** RDFFIR[6] * Mainline read MPE on rank 6 */ - (rRDFFIR, bit(6)) ? defaultMaskedError; + (rRDFFIR, bit(6)) ? verify_chip_mark_6; /** RDFFIR[7] * Mainline read MPE on rank 7 */ - (rRDFFIR, bit(7)) ? defaultMaskedError; + (rRDFFIR, bit(7)) ? verify_chip_mark_7; /** RDFFIR[8] * Mainline read NCE */ - (rRDFFIR, bit(8)) ? defaultMaskedError; + (rRDFFIR, bit(8)) ? mainline_nce_tce_handling; /** RDFFIR[9] * Mainline read TCE */ - (rRDFFIR, bit(9)) ? defaultMaskedError; + (rRDFFIR, bit(9)) ? mainline_nce_tce_handling; /** RDFFIR[10] * Mainline read SCE @@ -987,27 +1074,27 @@ group gRDFFIR /** RDFFIR[13] * Mainline read AUE */ - (rRDFFIR, bit(13)) ? defaultMaskedError; + (rRDFFIR, bit(13)) ? mainline_aue_iaue_handling; /** RDFFIR[14] * Mainline read UE */ - (rRDFFIR, bit(14)) ? defaultMaskedError; + (rRDFFIR, bit(14)) ? mainline_ue_handling_UERE; /** RDFFIR[15] * Mainline read RCD */ - (rRDFFIR, bit(15)) ? defaultMaskedError; + (rRDFFIR, bit(15)) ? rdf_rcd_parity_error_UERE; /** RDFFIR[16] * Mainline read IAUE */ - (rRDFFIR, bit(16)) ? defaultMaskedError; + (rRDFFIR, bit(16)) ? mainline_aue_iaue_handling; /** RDFFIR[17] * Mainline read IUE */ - (rRDFFIR, bit(17)) ? defaultMaskedError; + (rRDFFIR, bit(17)) ? mainline_iue_handling; /** RDFFIR[18] * Mainline read IRCD @@ -1017,7 +1104,7 @@ group gRDFFIR /** RDFFIR[19] * Mainline read IMPE */ - (rRDFFIR, bit(19)) ? defaultMaskedError; + (rRDFFIR, bit(19)) ? memory_impe_handling; /** RDFFIR[20:27] * Maintenance MPE @@ -1052,7 +1139,7 @@ group gRDFFIR /** RDFFIR[33] * Maintenance AUE */ - (rRDFFIR, bit(33)) ? defaultMaskedError; + (rRDFFIR, bit(33)) ? maintenance_aue_handling; /** RDFFIR[34] * Maintenance UE @@ -1062,72 +1149,72 @@ group gRDFFIR /** RDFFIR[35] * Maintenance RCD */ - (rRDFFIR, bit(35)) ? defaultMaskedError; + (rRDFFIR, bit(35)) ? rdf_rcd_parity_error_UERE; /** RDFFIR[36] * Maintenance IAUE */ - (rRDFFIR, bit(36)) ? defaultMaskedError; + (rRDFFIR, bit(36)) ? maintenance_iaue_handling; /** RDFFIR[37] * Maintenance IUE */ - (rRDFFIR, bit(37)) ? defaultMaskedError; + (rRDFFIR, bit(37)) ? maintenance_iue_handling; /** RDFFIR[38] - * Maintenance IRCD + * Maintenance IRCD */ (rRDFFIR, bit(38)) ? defaultMaskedError; /** RDFFIR[39] * Maintenance IMPE */ - (rRDFFIR, bit(39)) ? defaultMaskedError; + (rRDFFIR, bit(39)) ? memory_impe_handling; /** RDFFIR[40] * RDDATA valid error */ - (rRDFFIR, bit(40)) ? defaultMaskedError; + (rRDFFIR, bit(40)) ? mem_port_th_32perDay; /** RDFFIR[41] * SCOM status register parity error */ - (rRDFFIR, bit(41)) ? defaultMaskedError; + (rRDFFIR, bit(41)) ? threshold_and_mask_mem_port; /** RDFFIR[42] * SCOM recoverable register parity error */ - (rRDFFIR, bit(42)) ? defaultMaskedError; + (rRDFFIR, bit(42)) ? mem_port_th_1; /** RDFFIR[43] * SCOM unrecoverable register parity error */ - (rRDFFIR, bit(43)) ? defaultMaskedError; + (rRDFFIR, bit(43)) ? mem_port_th_1; /** RDFFIR[44] * ECC corrector internal parity error */ - (rRDFFIR, bit(44)) ? defaultMaskedError; + (rRDFFIR, bit(44)) ? mem_port_th_1; /** RDFFIR[45] * Rd Buff ECC CHK Cor CE DW0 Detected */ - (rRDFFIR, bit(45)) ? defaultMaskedError; + (rRDFFIR, bit(45)) ? mem_port_th_32perDay; /** RDFFIR[46] * Rd Buff ECC CHK Cor CE DW1 Detected */ - (rRDFFIR, bit(46)) ? defaultMaskedError; + (rRDFFIR, bit(46)) ? mem_port_th_32perDay; /** RDFFIR[47] * Rd Buff ECC CHK Cor UE DW0 Detected */ - (rRDFFIR, bit(47)) ? defaultMaskedError; + (rRDFFIR, bit(47)) ? mem_port_th_1; /** RDFFIR[48] * Rd Buff ECC CHK Cor UE DW1 Detected */ - (rRDFFIR, bit(48)) ? defaultMaskedError; + (rRDFFIR, bit(48)) ? mem_port_th_1; /** RDFFIR[49:59] * Reserved @@ -1177,67 +1264,67 @@ group gTLXFIR /** TLXFIR[0] * Info reg parity error */ - (rTLXFIR, bit(0)) ? defaultMaskedError; + (rTLXFIR, bit(0)) ? threshold_and_mask_self; /** TLXFIR[1] * Ctrl reg parity error */ - (rTLXFIR, bit(1)) ? defaultMaskedError; + (rTLXFIR, bit(1)) ? self_th_1; /** TLXFIR[2] * TLX VC0 return credit counter overflow */ - (rTLXFIR, bit(2)) ? defaultMaskedError; + (rTLXFIR, bit(2)) ? omi_bus_th_1; /** TLXFIR[3] * TLX VC1 return credit counter overflow */ - (rTLXFIR, bit(3)) ? defaultMaskedError; + (rTLXFIR, bit(3)) ? omi_bus_th_1; /** TLXFIR[4] * TLX dcp0 return credit counter overflow */ - (rTLXFIR, bit(4)) ? defaultMaskedError; + (rTLXFIR, bit(4)) ? omi_bus_th_1; /** TLXFIR[5] * TLX dcp1 return credit counter overflow */ - (rTLXFIR, bit(5)) ? defaultMaskedError; + (rTLXFIR, bit(5)) ? omi_bus_th_1; /** TLXFIR[6] * TLX credit management block error */ - (rTLXFIR, bit(6)) ? defaultMaskedError; + (rTLXFIR, bit(6)) ? self_th_1; /** TLXFIR[7] * TLX credit management block parity error */ - (rTLXFIR, bit(7)) ? defaultMaskedError; + (rTLXFIR, bit(7)) ? self_th_1; /** TLXFIR[8] * TLXT fatal parity error */ - (rTLXFIR, bit(8)) ? defaultMaskedError; + (rTLXFIR, bit(8)) ? self_th_1; /** TLXFIR[9] * TLXT recoverable error */ - (rTLXFIR, bit(9)) ? defaultMaskedError; + (rTLXFIR, bit(9)) ? analyzeTLXERR1; /** TLXFIR[10] * TLXT configuration error */ - (rTLXFIR, bit(10)) ? defaultMaskedError; + (rTLXFIR, bit(10)) ? level2_M_self_L_th_1; /** TLXFIR[11] * TLXT informational parity error */ - (rTLXFIR, bit(11)) ? defaultMaskedError; + (rTLXFIR, bit(11)) ? self_th_1; /** TLXFIR[12] * TLXT hard error */ - (rTLXFIR, bit(12)) ? defaultMaskedError; + (rTLXFIR, bit(12)) ? self_th_1; /** TLXFIR[13:15] * Reserved @@ -1257,47 +1344,47 @@ group gTLXFIR /** TLXFIR[18] * OC malformed */ - (rTLXFIR, bit(18)) ? defaultMaskedError; + (rTLXFIR, bit(18)) ? omi_bus_th_1; /** TLXFIR[19] * OC protocol error */ - (rTLXFIR, bit(19)) ? defaultMaskedError; + (rTLXFIR, bit(19)) ? omi_th_1; /** TLXFIR[20] * Address translate error */ - (rTLXFIR, bit(20)) ? defaultMaskedError; + (rTLXFIR, bit(20)) ? self_th_1; /** TLXFIR[21] * Metadata unc or data parity error */ - (rTLXFIR, bit(21)) ? defaultMaskedError; + (rTLXFIR, bit(21)) ? self_th_1; /** TLXFIR[22] * OC unsupported group 2 */ - (rTLXFIR, bit(22)) ? defaultMaskedError; + (rTLXFIR, bit(22)) ? omi_bus_th_1; /** TLXFIR[23] * OC unsupported group 1 */ - (rTLXFIR, bit(23)) ? defaultMaskedError; + (rTLXFIR, bit(23)) ? omi_bus_th_1; /** TLXFIR[24] * Bit flip control error */ - (rTLXFIR, bit(24)) ? defaultMaskedError; + (rTLXFIR, bit(24)) ? self_th_1; /** TLXFIR[25] * Control HW error */ - (rTLXFIR, bit(25)) ? defaultMaskedError; + (rTLXFIR, bit(25)) ? self_th_1; /** TLXFIR[26] * ECC corrected and others */ - (rTLXFIR, bit(26)) ? defaultMaskedError; + (rTLXFIR, bit(26)) ? self_th_32perDay; /** TLXFIR[27] * Trace stop @@ -1316,6 +1403,37 @@ group gTLXFIR }; +rule rTLX_ERR1_REPORT +{ + RECOVERABLE: + TLX_ERR1_REPORT & ~TLX_ERR1_REPORT_MASK; +}; + +group gTLX_ERR1_REPORT + filter singlebit, + cs_root_cause +{ + /** TLX_ERR1_REPORT[37] + * TLXT FIFO CE + */ + (rTLXFIR, bit(37)) ? self_th_32perDay; + + /** TLX_ERR1_REPORT[39] + * Unexpected Interrupt Response + */ + (rTLXFIR, bit(39)) ? parent_proc_th_32perDay; + + /** TLX_ERR1_REPORT[40] + * BDI Poisoned + */ + (rTLXFIR, bit(40)) ? self_th_1; + + /** TLX_ERR1_REPORT[41] + * TLXT Metadata UE + */ + (rTLXFIR, bit(41)) ? self_th_1; +}; + ################################################################################ # Explorer chip OMIDLFIR ################################################################################ @@ -1335,112 +1453,112 @@ group gOMIDLFIR cs_root_cause { /** OMIDLFIR[0] - * DL0 fatal error + * OMI-DL0 fatal error */ - (rOMIDLFIR, bit(0)) ? defaultMaskedError; + (rOMIDLFIR, bit(0)) ? dl_fatal_error; /** OMIDLFIR[1] - * Dl0 data UE + * OMI-DL0 UE on data flit */ - (rOMIDLFIR, bit(1)) ? defaultMaskedError; + (rOMIDLFIR, bit(1)) ? self_th_1; /** OMIDLFIR[2] - * Dl0 flit CE + * OMI-DL0 CE on TL flit */ - (rOMIDLFIR, bit(2)) ? defaultMaskedError; + (rOMIDLFIR, bit(2)) ? self_th_32perDay; /** OMIDLFIR[3] - * Dl0 CRC error + * OMI-DL0 detected a CRC error */ (rOMIDLFIR, bit(3)) ? defaultMaskedError; /** OMIDLFIR[4] - * DL0 nack + * OMI-DL0 received a nack */ (rOMIDLFIR, bit(4)) ? defaultMaskedError; /** OMIDLFIR[5] - * DL0 X4 mode + * OMI-DL0 running in degraded mode */ - (rOMIDLFIR, bit(5)) ? defaultMaskedError; + (rOMIDLFIR, bit(5)) ? omi_bus_th_1; /** OMIDLFIR[6] - * DL0 EDPL + * OMI-DL0 parity error detection on a lane */ (rOMIDLFIR, bit(6)) ? defaultMaskedError; /** OMIDLFIR[7] - * DL0 timeout + * OMI-DL0 retrained due to no forward progress */ - (rOMIDLFIR, bit(7)) ? defaultMaskedError; + (rOMIDLFIR, bit(7)) ? omi_bus_th_32perDay; /** OMIDLFIR[8] - * DL0 remote retrain + * OMI-DL0 remote side initiated a retrain */ (rOMIDLFIR, bit(8)) ? defaultMaskedError; /** OMIDLFIR[9] - * DL0 error retrain + * OMI-DL0 retrain due to internal error or software initiated */ - (rOMIDLFIR, bit(9)) ? defaultMaskedError; + (rOMIDLFIR, bit(9)) ? omi_bus_th_32perDay; /** OMIDLFIR[10] - * DL0 EDPL retrain + * OMI-DL0 threshold reached */ - (rOMIDLFIR, bit(10)) ? defaultMaskedError; + (rOMIDLFIR, bit(10)) ? omi_bus_th_32perDay; /** OMIDLFIR[11] - * DL0 trained + * OMI-DL0 trained */ (rOMIDLFIR, bit(11)) ? defaultMaskedError; /** OMIDLFIR[12] - * DL0 endpoint bit 0 + * OMI-DL0 endpoint error bit 0 */ (rOMIDLFIR, bit(12)) ? defaultMaskedError; /** OMIDLFIR[13] - * DL0 endpoint bit 1 + * OMI-DL0 endpoint error bit 1 */ (rOMIDLFIR, bit(13)) ? defaultMaskedError; /** OMIDLFIR[14] - * DL0 endpoint bit 2 + * OMI-DL0 endpoint error bit 2 */ (rOMIDLFIR, bit(14)) ? defaultMaskedError; /** OMIDLFIR[15] - * DL0 endpoint bit 3 + * OMI-DL0 endpoint error bit 3 */ (rOMIDLFIR, bit(15)) ? defaultMaskedError; /** OMIDLFIR[16] - * DL0 endpoint bit 4 + * OMI-DL0 endpoint error bit 4 */ (rOMIDLFIR, bit(16)) ? defaultMaskedError; /** OMIDLFIR[17] - * DL0 endpoint bit 5 + * OMI-DL0 endpoint error bit 5 */ (rOMIDLFIR, bit(17)) ? defaultMaskedError; /** OMIDLFIR[18] - * DL0 endpoint bit 6 + * OMI-DL0 endpoint error bit 6 */ (rOMIDLFIR, bit(18)) ? defaultMaskedError; /** OMIDLFIR[19] - * DL0 endpoint bit 7 + * OMI-DL0 endpoint error bit 7 */ (rOMIDLFIR, bit(19)) ? defaultMaskedError; /** OMIDLFIR[20:39] - * DL1 reserved + * OMI-DL1 reserved */ (rOMIDLFIR, bit(20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37|38|39)) ? defaultMaskedError; /** OMIDLFIR[40:59] - * DL2 reserved + * OMI-DL2 reserved */ (rOMIDLFIR, bit(40|41|42|43|44|45|46|47|48|49|50|51|52|53|54|55|56|57|58|59)) ? defaultMaskedError; @@ -1449,6 +1567,21 @@ group gOMIDLFIR */ (rOMIDLFIR, bit(60)) ? defaultMaskedError; + /** OMIDLFIR[61] + * reserved + */ + (rOMIDLFIR, bit(61)) ? defaultMaskedError; + + /** OMIDLFIR[62] + * LFIR internal parity error + */ + (rOMIDLFIR, bit(62)) ? defaultMaskedError; + + /** OMIDLFIR[63] + * SCOM Satellite Error + */ + (rOMIDLFIR, bit(63)) ? defaultMaskedError; + }; ############################################################################## @@ -1463,6 +1596,5 @@ group gOMIDLFIR ############################################################################## # Include the actions defined for this target -.include "p9_common_actions.rule"; .include "explorer_ocmb_actions.rule"; diff --git a/src/usr/diag/prdf/common/plat/explorer/explorer_ocmb_actions.rule b/src/usr/diag/prdf/common/plat/explorer/explorer_ocmb_actions.rule index 023821b0d..d5b6e3fad 100644 --- a/src/usr/diag/prdf/common/plat/explorer/explorer_ocmb_actions.rule +++ b/src/usr/diag/prdf/common/plat/explorer/explorer_ocmb_actions.rule @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2018 +# Contributors Listed Below - COPYRIGHT 2018,2019 # [+] International Business Machines Corp. # # @@ -55,22 +55,12 @@ actionclass threshold32pday threshold( field(32 / day) ); }; -################################################################################ -# Threshold and Mask policy -################################################################################ - -/** - * Threshold 32/day (field) and 1 (mnfg). Do not predictively callout on - * threshold in the field, instead just mask. - */ -actionclass threshold_and_mask +/** Threshold of 5 per day */ +actionclass threshold5pday { - threshold32pday; - funccall("ClearServiceCallFlag"); + threshold( field(5 / day) ); }; -actionclass threshold_and_mask_self { calloutSelfMed; threshold_and_mask; }; - ################################################################################ # Special Flags # ################################################################################ @@ -99,6 +89,50 @@ actionclass callout2ndLvlMed actionclass calloutSelfLowNoGard { callout(MRU_LOW, NO_GARD); }; +actionclass level2_M_self_L +{ + callout2ndLvlMed; + calloutSelfLow; +}; + +actionclass omi +{ + callout(connected(TYPE_OMI), MRU_MED); +}; + +actionclass omi_bus +{ + calloutSelfMedA; + callout(connected(TYPE_OMI), MRU_MEDA); + funccall("calloutBusInterfacePlugin"); +}; + +actionclass mem_port +{ + callout(connected(TYPE_MEM_PORT,0), MRU_MED); +}; + +actionclass mem_port_L +{ + callout(connected(TYPE_MEM_PORT,0), MRU_LOW); +}; + +actionclass all_dimm_H +{ + funccall("CalloutAttachedDimmsHigh"); +}; + +actionclass all_dimm_H_memport_L +{ + all_dimm_H; + mem_port_L; +}; + +actionclass parent_proc +{ + callout(connected(TYPE_PROC), MRU_MED); +}; + ################################################################################ # Callouts with thresholds # ################################################################################ @@ -109,15 +143,15 @@ actionclass self_th_1 threshold1; }; -actionclass self_th_5perHour +actionclass self_th_32perDay { calloutSelfMed; - threshold5phour; + threshold32pday; }; -actionclass self_th_32perDay +actionclass parent_proc_th_32perDay { - calloutSelfMed; + parent_proc; threshold32pday; }; @@ -127,12 +161,83 @@ actionclass level2_th_1 threshold1; }; +actionclass level2_th_32perDay +{ + callout2ndLvlMed; + threshold32pday; +}; + +actionclass level2_M_self_L_th_1 +{ + level2_M_self_L; + threshold1; +}; + +actionclass omi_th_1 +{ + omi; + threshold1; +}; + +actionclass omi_bus_th_1 +{ + omi_bus; + threshold1; +}; + +actionclass omi_bus_th_32perDay +{ + omi_bus; + threshold32pday; +}; + +actionclass mem_port_th_1 +{ + mem_port; + threshold1; +}; + +actionclass mem_port_th_32perDay +{ + mem_port; + threshold32pday; +}; + +################################################################################ +# Special # +################################################################################ + +/** + * Threshold 32/day (field) and 1 (mnfg). Do not predictively callout on + * threshold in the field, instead just mask. + */ +actionclass threshold_and_mask +{ + threshold32pday; + funccall("ClearServiceCallFlag"); +}; + +actionclass threshold_and_mask_self { calloutSelfMed; threshold_and_mask; }; + +actionclass threshold_and_mask_level2 +{ + level2_th_32perDay; + threshold_and_mask; +}; + +actionclass threshold_and_mask_mem_port +{ + mem_port_th_32perDay; + threshold_and_mask; +}; + ################################################################################ # Callouts with flags # ################################################################################ -actionclass self_th_1_UERE { self_th_1; SueSource; }; -actionclass level2_th_1_UERE { level2_th_1; SueSource; }; +actionclass self_th_1_UERE { self_th_1; SueSource; }; +actionclass level2_th_1_UERE { level2_th_1; SueSource; }; +actionclass mem_port_th_1_UERE { mem_port_th_1; SueSource; }; ################################################################################ # Default callouts # @@ -153,14 +258,166 @@ actionclass TBDDefaultCallout }; ################################################################################ +# OCMB Actions # +################################################################################ + +/** DDR4 PHY Interrupt */ +actionclass ddr4_phy_interrupt +{ + calloutSelfHigh; + threshold5pday; + funccall("Ddr4PhyInterrupt"); +}; + +/** Foxhound Fatal */ +actionclass foxhound_fatal +{ + funccall("FoxhoundFatal"); + threshold1; +}; + +/** OMI-DL Fatal Error */ +actionclass dl_fatal_error +{ + try( funccall("DlFatalError"), omi_bus ); + threshold1; +}; + +/** MCBIST program complete */ +actionclass mcbist_program_complete +{ + funccall("McbistCmdComplete"); +}; + +/** Verify Chip Mark */ +actionclass verify_chip_mark_0 { funccall("AnalyzeFetchMpe_0"); }; +actionclass verify_chip_mark_1 { funccall("AnalyzeFetchMpe_1"); }; +actionclass verify_chip_mark_2 { funccall("AnalyzeFetchMpe_2"); }; +actionclass verify_chip_mark_3 { funccall("AnalyzeFetchMpe_3"); }; +actionclass verify_chip_mark_4 { funccall("AnalyzeFetchMpe_4"); }; +actionclass verify_chip_mark_5 { funccall("AnalyzeFetchMpe_5"); }; +actionclass verify_chip_mark_6 { funccall("AnalyzeFetchMpe_6"); }; +actionclass verify_chip_mark_7 { funccall("AnalyzeFetchMpe_7"); }; + +/** Mainline NCE/TCE handling */ +actionclass mainline_nce_tce_handling +{ + funccall("AnalyzeFetchNceTce"); +}; + +/** Handle Mainline AUEs/IAUEs */ +actionclass mainline_aue_iaue_handling +{ + funccall("AnalyzeFetchAueIaue"); + mem_port_L; + threshold1; +}; + +/** Mainline UE handling */ +actionclass mainline_ue_handling +{ + threshold( field(33 / 30 min ) ); # To prevent flooding. Will be unmasked + # when background scrubbing resumes after + # targeted diagnostics is complete. + funccall("AnalyzeFetchUe"); +}; + +actionclass mainline_ue_handling_UERE +{ + SueSource; + mainline_ue_handling; +}; + +/** Handle Mainline IUEs */ +actionclass mainline_iue_handling +{ + # An IUE itself is not a SUE source, however, a threshold of IUEs will + # trigger a port failure, which will generate SUEs. The port failure could + # also crash the machine so we want to make sure this bit is flagged as an + # SUE just in case it is needed in the checkstop analysis. + SueSource; + # Thresholding done in the plugin + funccall("AnalyzeMainlineIue"); +}; + +/** Handle Maintenance IUEs */ +actionclass maintenance_iue_handling +{ + # An IUE itself is not a SUE source, however, a threshold of IUEs will + # trigger a port failure, which will generate SUEs. The port failure could + # also crash the machine so we want to make sure this bit is flagged as an + # SUE just in case it is needed in the checkstop analysis. + SueSource; + # Thresholding done in the plugin + funccall("AnalyzeMaintIue"); +}; + +actionclass memory_impe_handling +{ + funccall("AnalyzeImpe"); +}; + +/** Handle Maintenance AUEs */ +actionclass maintenance_aue_handling +{ + funccall("AnalyzeMaintAue"); + mem_port_L; + threshold1; +}; + +/** Handle Maintenance IAUEs */ +actionclass maintenance_iaue_handling +{ + all_dimm_H_memport_L; + threshold1; +}; + +/** RDF RCD Parity Error */ +actionclass rdf_rcd_parity_error +{ + funccall("RdfRcdParityError"); + threshold1; +}; + +actionclass rdf_rcd_parity_error_UERE +{ + rdf_rcd_parity_error; + SueSource; +}; + +/** SRQ RCD Parity Error */ +actionclass srq_rcd_parity_error +{ + all_dimm_H_memport_L; + threshold32pday; +}; + +actionclass srq_rcd_parity_error_UERE +{ + srq_rcd_parity_error; + SueSource; +}; + +actionclass mem_port_failure +{ + all_dimm_H_memport_L; + threshold1; # Threshold 1 +}; + +################################################################################ # Analyze groups ################################################################################ -actionclass analyzeMB_LFIR { analyze(gMB_LFIR); }; +actionclass analyzeOCMB_LFIR { analyze(gOCMB_LFIR); }; actionclass analyzeMMIOFIR { analyze(gMMIOFIR); }; actionclass analyzeSRQFIR { analyze(gSRQFIR); }; actionclass analyzeMCBISTFIR { analyze(gMCBISTFIR); }; actionclass analyzeRDFFIR { analyze(gRDFFIR); }; actionclass analyzeTLXFIR { analyze(gTLXFIR); }; +actionclass analyzeTLXERR1 +{ + analyze(gTLX_ERR1_REPORT); + funccall("clearAndMaskTlxtRe"); +}; actionclass analyzeOMIDLFIR { analyze(gOMIDLFIR); }; diff --git a/src/usr/diag/prdf/common/plat/explorer/explorer_ocmb_regs.rule b/src/usr/diag/prdf/common/plat/explorer/explorer_ocmb_regs.rule index a4a526124..c2205f2dd 100644 --- a/src/usr/diag/prdf/common/plat/explorer/explorer_ocmb_regs.rule +++ b/src/usr/diag/prdf/common/plat/explorer/explorer_ocmb_regs.rule @@ -223,3 +223,259 @@ capture group never; access write_only; }; + + ############################################################################ + # P9 Hardware Mark Stores + ############################################################################ + + register HW_MS0 + { + name "P9 Hardware Mark Store rank 0"; + scomaddr 0x08011C10; + capture group default; + }; + + register HW_MS1 + { + name "P9 Hardware Mark Store rank 1"; + scomaddr 0x08011C11; + capture group default; + }; + + register HW_MS2 + { + name "P9 Hardware Mark Store rank 2"; + scomaddr 0x08011C12; + capture group default; + }; + + register HW_MS3 + { + name "P9 Hardware Mark Store rank 3"; + scomaddr 0x08011C13; + capture group default; + }; + + register HW_MS4 + { + name "P9 Hardware Mark Store rank 4"; + scomaddr 0x08011C14; + capture group default; + }; + + register HW_MS5 + { + name "P9 Hardware Mark Store rank 5"; + scomaddr 0x08011C15; + capture group default; + }; + + register HW_MS6 + { + name "P9 Hardware Mark Store rank 6"; + scomaddr 0x08011C16; + capture group default; + }; + + register HW_MS7 + { + name "P9 Hardware Mark Store rank 7"; + scomaddr 0x08011C17; + capture group default; + }; + + ############################################################################ + # P9 Firmware Mark Stores + ############################################################################ + + register FW_MS0 + { + name "P9 Firmware Mark Store 0"; + scomaddr 0x08011C18; + capture group default; + }; + + register FW_MS1 + { + name "P9 Firmware Mark Store 1"; + scomaddr 0x08011C19; + capture group default; + }; + + register FW_MS2 + { + name "P9 Firmware Mark Store 2"; + scomaddr 0x08011C1A; + capture group default; + }; + + register FW_MS3 + { + name "P9 Firmware Mark Store 3"; + scomaddr 0x08011C1B; + capture group default; + }; + + register FW_MS4 + { + name "P9 Firmware Mark Store 4"; + scomaddr 0x08011C1C; + capture group default; + }; + + register FW_MS5 + { + name "P9 Firmware Mark Store 5"; + scomaddr 0x08011C1D; + capture group default; + }; + + register FW_MS6 + { + name "P9 Firmware Mark Store 6"; + scomaddr 0x08011C1E; + capture group default; + }; + + register FW_MS7 + { + name "P9 Firmware Mark Store 7"; + scomaddr 0x08011C1F; + capture group default; + }; + + ########################################################################### + # P9 OCMB target OMIDLFIR + ########################################################################### + + register DL0_ERROR_HOLD + { + name "P9 OCMB target DL0 Error Hold Register"; + scomaddr 0x08012813; + capture group default; + }; + + ########################################################################### + # P9 OCMB target TLXFIR + ########################################################################### + + register TLXFIR_AND + { + name "Explorer chip TLXFIR AND"; + scomaddr 0x08012401; + capture group never; + access write_only; + }; + + register TLXFIR_MASK_OR + { + name "Explorer chip TLXFIR MASK OR"; + scomaddr 0x08012405; + capture group never; + access write_only; + }; + + register TLX_ERR1_REPORT + { + name "P9 OCMB target TLX Error Report Register"; + scomaddr 0x0801241D; + reset (&, 0x0801241D); + mask (|, 0x08012415); + capture group default; + }; + + register TLX_ERR1_REPORT_MASK + { + name "P9 OCMB target TLX Error Report Register Mask"; + scomaddr 0x08012415; + capture group default; + }; + + ############################################################################ + # Explorer ECC Address Registers + ############################################################################ + + register MBNCER + { + name "Explorer Mainline NCE Address Trap Register"; + scomaddr 0x0801186A; + capture group default; + }; + + register MBRCER + { + name "Explorer Mainline RCE Address Trap Register"; + scomaddr 0x0801186B; + capture group default; + }; + + register MBMPER + { + name "Explorer Mainline MPE Address Trap Register"; + scomaddr 0x0801186C; + capture group default; + }; + + register MBUER + { + name "Explorer Mainline UE Address Trap Register"; + scomaddr 0x0801186D; + capture group default; + }; + + register MBAUER + { + name "Explorer Mainline AUE Address Trap Register"; + scomaddr 0x0801186E; + capture group default; + }; + + ############################################################################ + # Misc + ############################################################################ + + register FARB0 + { + name "MB_SIM.SRQ.MBA_FARB0Q"; + scomaddr 0x08011415; + capture group default; + }; + + register EXP_MSR + { + name "Explorer Mark Shadow Register"; + scomaddr 0x08011C0C; + capture group default; + }; + + register MC_ADDR_TRANS + { + name "P9 OCMB target address translation register0"; + scomaddr 0x0801186F; + capture group default; + }; + + register MC_ADDR_TRANS1 + { + name "P9 OCMB target address translation register1"; + scomaddr 0x08011870; + capture group default; + }; + + register MC_ADDR_TRANS2 + { + name "P9 OCMB target address translation register2"; + scomaddr 0x08011871; + capture group default; + }; + + ############################################################################ + # Interrupt status register + ############################################################################ + + register INTER_STATUS_REG + { + name "TPTOP.PIB.PCBMS.INTERRUPT_TYPE_REG"; + scomaddr 0x000F001A; + capture group default; + }; diff --git a/src/usr/diag/prdf/common/plat/explorer/prdfExplorerPlugins_common.C b/src/usr/diag/prdf/common/plat/explorer/prdfExplorerPlugins_common.C new file mode 100644 index 000000000..de385aab9 --- /dev/null +++ b/src/usr/diag/prdf/common/plat/explorer/prdfExplorerPlugins_common.C @@ -0,0 +1,574 @@ +/* IBM_PROLOG_BEGIN_TAG */ +/* This is an automatically generated prolog. */ +/* */ +/* $Source: src/usr/diag/prdf/common/plat/explorer/prdfExplorerPlugins_common.C $ */ +/* */ +/* OpenPOWER HostBoot Project */ +/* */ +/* Contributors Listed Below - COPYRIGHT 2019 */ +/* [+] International Business Machines Corp. */ +/* */ +/* */ +/* Licensed under the Apache License, Version 2.0 (the "License"); */ +/* you may not use this file except in compliance with the License. */ +/* You may obtain a copy of the License at */ +/* */ +/* http://www.apache.org/licenses/LICENSE-2.0 */ +/* */ +/* Unless required by applicable law or agreed to in writing, software */ +/* distributed under the License is distributed on an "AS IS" BASIS, */ +/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */ +/* implied. See the License for the specific language governing */ +/* permissions and limitations under the License. */ +/* */ +/* IBM_PROLOG_END_TAG */ + +// Framework includes +#include <iipServiceDataCollector.h> +#include <prdfExtensibleChip.H> +#include <prdfPluginMap.H> + +// Platform includes +#include <prdfMemDbUtils.H> +#include <prdfMemEccAnalysis.H> +#include <prdfMemUtils.H> +#include <prdfPlatServices.H> + +using namespace TARGETING; + +namespace PRDF +{ + +using namespace PlatServices; + +namespace explorer_ocmb +{ + +//############################################################################## +// +// Special plugins +// +//############################################################################## + +/** + * @brief Plugin that initializes the data bundle. + * @param i_chip An OCMB chip. + * @return SUCCESS + */ +int32_t Initialize( ExtensibleChip * i_chip ) +{ + i_chip->getDataBundle() = new OcmbDataBundle( i_chip ); + return SUCCESS; +} +PRDF_PLUGIN_DEFINE( explorer_ocmb, Initialize ); + +/** + * @brief Plugin function called after analysis is complete but before PRD + * exits. + * @param i_chip An OCMB chip. + * @param io_sc The step code data struct. + * @note This is especially useful for any analysis that still needs to be + * done after the framework clears the FIR bits that were at attention. + * @return SUCCESS. + */ +int32_t PostAnalysis( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc ) +{ + #define PRDF_FUNC "[explorer_ocmb::PostAnalysis] " + + #ifdef __HOSTBOOT_RUNTIME + + // If the IUE threshold in our data bundle has been reached, we trigger + // a port fail. Once we trigger the port fail, the system may crash + // right away. Since PRD is running in the hypervisor, it is possible we + // may not get the error log. To better our chances, we trigger the port + // fail here after the error log has been committed. + if ( MemEcc::queryIueTh<TYPE_OCMB_CHIP>(i_chip, io_sc) ) + { + if ( SUCCESS != MemEcc::triggerPortFail<TYPE_OCMB_CHIP>(i_chip) ) + { + PRDF_ERR( PRDF_FUNC "triggerPortFail(0x%08x) failed", + i_chip->getHuid() ); + } + } + + #endif // __HOSTBOOT_RUNTIME + + // Cleanup processor FIR bits on the other side of the channel. + MemUtils::cleanupChnlAttns<TYPE_OCMB_CHIP>( i_chip, io_sc ); + + return SUCCESS; + + #undef PRDF_FUNC +} +PRDF_PLUGIN_DEFINE( explorer_ocmb, PostAnalysis ); + + +//############################################################################## +// +// OCMB_LFIR +// +//############################################################################## + +/** + * @brief OCMB_LFIR[38] - DDR4 PHY interrupt + * @param i_chip An OCMB chip. + * @param io_sc The step code data struct. + * @return SUCCESS + */ +int32_t Ddr4PhyInterrupt( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + #define PRDF_FUNC "[explorer_ocmb::Ddr4PhyInterrupt] " + + SCAN_COMM_REGISTER_CLASS * rdffir = i_chip->getRegister( "RDFFIR" ); + + // If Mainline UE (RDFFIR[14]) or Maint UE (RDFFIR[34]) are on at the same + // time as this: + if ( rdffir->IsBitSet(14) || rdffir->IsBitSet(34) ) + { + // callout Explorer on 1st + io_sc.service_data->SetThresholdMaskId(0); + + // mask maint and mainline UE which are assumed to be side-effects + SCAN_COMM_REGISTER_CLASS * rdffir_mask_or = + i_chip->getRegister( "RDFFIR_MASK_OR" ); + + rdffir_mask_or->SetBit(14); + rdffir_mask_or->SetBit(34); + + if ( SUCCESS != rdffir_mask_or->Write() ) + { + PRDF_ERR( PRDF_FUNC "Write() failed on RDFFIR_MASK_OR: 0x%08x", + i_chip->getHuid() ); + } + } + else + { + //TODO RTC 200583 + // callout Explorer on threshold (5/day) + // NOTE: in this case we will have to clear both hw driven checkers + // manually before clearing the FIR + } + + return SUCCESS; + + #undef PRDF_FUNC +} +PRDF_PLUGIN_DEFINE( explorer_ocmb, Ddr4PhyInterrupt ); + +//------------------------------------------------------------------------------ + +/** + * @brief OCMB_LFIR[39:46] - Foxhound Fatal + * @param i_chip An OCMB chip. + * @param io_sc The step code data struct. + * @return SUCCESS + */ +int32_t FoxhoundFatal( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc ) +{ + #define PRDF_FUNC "[explorer_ocmb::FoxhoundFatal] " + + //TODO RTC 200583 + + return SUCCESS; + + #undef PRDF_FUNC +} +PRDF_PLUGIN_DEFINE( explorer_ocmb, FoxhoundFatal ); + +//############################################################################## +// +// OMIDLFIR +// +//############################################################################## + +/** + * @brief OMIDLFIR[0] - OMI-DL0 Fatal Error + * @param i_chip An OCMB chip. + * @param io_sc The step code data struct. + * @return PRD_SCAN_COMM_REGISTER_ZERO for the bus callout, else SUCCESS + */ +int32_t DlFatalError( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc ) +{ + #define PRDF_FUNC "[explorer_ocmb::DlFatalError] " + + int32_t rc = SUCCESS; + + do + { + // Check DL0_ERROR_HOLD[52:63] to determine callout + SCAN_COMM_REGISTER_CLASS * dl0_error_hold = + i_chip->getRegister( "DL0_ERROR_HOLD" ); + + if ( SUCCESS != dl0_error_hold->Read() ) + { + PRDF_ERR( PRDF_FUNC "Read() Failed on DL0_ERROR_HOLD: " + "i_chip=0x%08x", i_chip->getHuid() ); + break; + } + + if ( dl0_error_hold->IsBitSet(53) || + dl0_error_hold->IsBitSet(55) || + dl0_error_hold->IsBitSet(57) || + dl0_error_hold->IsBitSet(58) || + dl0_error_hold->IsBitSet(59) || + dl0_error_hold->IsBitSet(60) || + dl0_error_hold->IsBitSet(62) || + dl0_error_hold->IsBitSet(63) ) + { + // callout OCMB + io_sc.service_data->SetCallout( i_chip->getTrgt() ); + } + else if ( dl0_error_hold->IsBitSet(54) || + dl0_error_hold->IsBitSet(56) || + dl0_error_hold->IsBitSet(61) ) + { + // callout the OMI target, the OMI bus, and the OCMB. + // Return PRD_SCAN_COMM_REGISTER_ZERO so the rule code knows to + // make the correct callout. + rc = PRD_SCAN_COMM_REGISTER_ZERO; + } + + }while(0); + + return rc; + + #undef PRDF_FUNC +} +PRDF_PLUGIN_DEFINE( explorer_ocmb, DlFatalError ); + +//############################################################################## +// +// RDFFIR +// +//############################################################################## + +/** + * @brief Adds all attached DIMMs at HIGH priority. + * @param i_chip An OCMB chip. + * @param io_sc The step code data struct. + * @return SUCCESS + */ +int32_t CalloutAttachedDimmsHigh( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + for ( auto & dimm : getConnected(i_chip->getTrgt(), TYPE_DIMM) ) + io_sc.service_data->SetCallout( dimm, MRU_HIGH ); + + return SUCCESS; // nothing to return to rule code +} +PRDF_PLUGIN_DEFINE( explorer_ocmb, CalloutAttachedDimmsHigh ); + +//------------------------------------------------------------------------------ + +/** + * @brief RDF RCD Parity Error + * @param i_chip An OCMB chip. + * @param io_sc The step code data struct. + * @return SUCCESS + */ +int32_t RdfRcdParityError( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + #define PRDF_FUNC "[explorer_ocmb::RdfRcdParityError] " + + do + { + SCAN_COMM_REGISTER_CLASS * rdffir = i_chip->getRegister( "RDFFIR" ); + if ( SUCCESS != rdffir->Read() ) + { + PRDF_ERR( PRDF_FUNC "Read() Failed on RDFFIR: " + "i_chip=0x%08x", i_chip->getHuid() ); + break; + } + + // If RDFFIR[40] on at the same time, this is 'missing rddata valid' + // case, which returns SUE + if ( rdffir->IsBitSet(40) ) + { + // callout MEM_PORT on 1st occurrence + TargetHandle_t memPort = + getConnectedChild( i_chip->getTrgt(), TYPE_MEM_PORT, 0 ); + io_sc.service_data->SetCallout( memPort ); + } + // Else this is 'confirmed RCD parity error' case + else + { + // callout DIMM high priority, MEM_PORT low on 1st occurrence + CalloutAttachedDimmsHigh( i_chip, io_sc ); + TargetHandle_t memPort = + getConnectedChild( i_chip->getTrgt(), TYPE_MEM_PORT, 0 ); + io_sc.service_data->SetCallout( memPort, MRU_LOW ); + } + + // Mask bit 40 as well + SCAN_COMM_REGISTER_CLASS * rdffir_mask_or = + i_chip->getRegister( "RDFFIR_MASK_OR" ); + + rdffir_mask_or->SetBit(40); + if ( SUCCESS != rdffir_mask_or->Write() ) + { + PRDF_ERR( PRDF_FUNC "Write() Failed on RDFFIR_MASK_OR: " + "i_chip=0x%08x", i_chip->getHuid() ); + break; + } + + }while(0); + + return SUCCESS; + + #undef PRDF_FUNC +} +PRDF_PLUGIN_DEFINE( explorer_ocmb, RdfRcdParityError ); + +//------------------------------------------------------------------------------ + +/** + * @brief RDFFIR[0:7] - Mainline MPE. + * @param i_chip OCMB chip. + * @param io_sc The step code data struct. + * @return SUCCESS + */ +#define PLUGIN_FETCH_MPE_ERROR( RANK ) \ +int32_t AnalyzeFetchMpe_##RANK( ExtensibleChip * i_chip, \ + STEP_CODE_DATA_STRUCT & io_sc ) \ +{ \ + MemRank rank ( RANK ); \ + MemEcc::analyzeFetchMpe<TYPE_OCMB_CHIP>( i_chip, rank, io_sc ); \ + return SUCCESS; \ +} \ +PRDF_PLUGIN_DEFINE( explorer_ocmb, AnalyzeFetchMpe_##RANK ); + +PLUGIN_FETCH_MPE_ERROR( 0 ) +PLUGIN_FETCH_MPE_ERROR( 1 ) +PLUGIN_FETCH_MPE_ERROR( 2 ) +PLUGIN_FETCH_MPE_ERROR( 3 ) +PLUGIN_FETCH_MPE_ERROR( 4 ) +PLUGIN_FETCH_MPE_ERROR( 5 ) +PLUGIN_FETCH_MPE_ERROR( 6 ) +PLUGIN_FETCH_MPE_ERROR( 7 ) + +#undef PLUGIN_FETCH_MPE_ERROR + +//------------------------------------------------------------------------------ + +/** + * @brief RDFFIR[8:9] - Mainline NCE and/or TCE. + * @param i_chip OCMB chip. + * @param io_sc The step code data struct. + * @return SUCCESS + */ +int32_t AnalyzeFetchNceTce( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + MemEcc::analyzeFetchNceTce<TYPE_OCMB_CHIP>( i_chip, io_sc ); + return SUCCESS; // nothing to return to rule code +} +PRDF_PLUGIN_DEFINE( explorer_ocmb, AnalyzeFetchNceTce ); + +//------------------------------------------------------------------------------ + +/** + * @brief RDFFIR[14] - Mainline UE. + * @param i_chip OCMB chip. + * @param io_sc The step code data struct. + * @return SUCCESS + */ +int32_t AnalyzeFetchUe( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + MemEcc::analyzeFetchUe<TYPE_OCMB_CHIP>( i_chip, io_sc ); + return SUCCESS; // nothing to return to rule code +} +PRDF_PLUGIN_DEFINE( explorer_ocmb, AnalyzeFetchUe ); + +//------------------------------------------------------------------------------ + +/** + * @brief RDFFIR[17] - Mainline read IUE. + * @param i_chip OCMB chip. + * @param io_sc The step code data struct. + * @return PRD_NO_CLEAR_FIR_BITS if IUE threshold is reached, else SUCCESS. + */ +int32_t AnalyzeMainlineIue( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + int32_t rc = SUCCESS; + MemEcc::analyzeMainlineIue<TYPE_OCMB_CHIP>( i_chip, io_sc ); + + #ifdef __HOSTBOOT_MODULE + + if ( MemEcc::queryIueTh<TYPE_OCMB_CHIP>(i_chip, io_sc) ) + rc = PRD_NO_CLEAR_FIR_BITS; + + #endif + + return rc; // nothing to return to rule code +} +PRDF_PLUGIN_DEFINE( explorer_ocmb, AnalyzeMainlineIue ); + +//------------------------------------------------------------------------------ + +/** + * @brief RDFFIR[37] - Maint IUE. + * @param i_chip OCMB chip. + * @param io_sc The step code data struct. + * @return PRD_NO_CLEAR_FIR_BITS if IUE threshold is reached, else SUCCESS. + */ +int32_t AnalyzeMaintIue( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + int32_t rc = SUCCESS; + MemEcc::analyzeMaintIue<TYPE_OCMB_CHIP>( i_chip, io_sc ); + + #ifdef __HOSTBOOT_MODULE + + if ( MemEcc::queryIueTh<TYPE_OCMB_CHIP>(i_chip, io_sc) ) + rc = PRD_NO_CLEAR_FIR_BITS; + + #endif + + return rc; // nothing to return to rule code +} +PRDF_PLUGIN_DEFINE( explorer_ocmb, AnalyzeMaintIue ); + +//------------------------------------------------------------------------------ + +/** + * @brief RDFFIR[19,39] - Mainline and Maint IMPE + * @param i_chip OCMB chip. + * @param io_sc The step code data struct. + * @return SUCCESS + */ +int32_t AnalyzeImpe( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc ) +{ + MemEcc::analyzeImpe<TYPE_OCMB_CHIP>( i_chip, io_sc ); + return SUCCESS; // nothing to return to rule code +} +PRDF_PLUGIN_DEFINE( explorer_ocmb, AnalyzeImpe ); + +//------------------------------------------------------------------------------ + +/** + * @brief RDFFIR[13,16] - Mainline AUE and IAUE + * @param i_chip OCMB chip. + * @param io_sc The step code data struct. + * @return SUCCESS + */ +int32_t AnalyzeFetchAueIaue( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + #define PRDF_FUNC "[explorer_ocmb::AnalyzeFetchAueIaue] " + + MemAddr addr; + if ( SUCCESS != getMemReadAddr<TYPE_OCMB_CHIP>(i_chip, + MemAddr::READ_AUE_ADDR, + addr) ) + { + PRDF_ERR( PRDF_FUNC "getMemReadAddr(0x%08x,READ_AUE_ADDR) failed", + i_chip->getHuid() ); + } + else + { + MemRank rank = addr.getRank(); + MemoryMru mm { i_chip->getTrgt(), rank, MemoryMruData::CALLOUT_RANK }; + io_sc.service_data->SetCallout( mm, MRU_HIGH ); + } + + return SUCCESS; // nothing to return to rule code + + #undef PRDF_FUNC +} +PRDF_PLUGIN_DEFINE( explorer_ocmb, AnalyzeFetchAueIaue ); + +//------------------------------------------------------------------------------ + +/** + * @brief RDFFIR[33] - Maintenance AUE + * @param i_chip OCMB chip. + * @param io_sc The step code data struct. + * @return SUCCESS + */ +int32_t AnalyzeMaintAue( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + #define PRDF_FUNC "[explorer_ocmb::AnalyzeMaintAue] " + + MemAddr addr; + if ( SUCCESS != getMemMaintAddr<TYPE_OCMB_CHIP>(i_chip, addr) ) + { + PRDF_ERR( PRDF_FUNC "getMemMaintAddr(0x%08x) failed", + i_chip->getHuid() ); + } + else + { + MemRank rank = addr.getRank(); + MemoryMru mm { i_chip->getTrgt(), rank, MemoryMruData::CALLOUT_RANK }; + io_sc.service_data->SetCallout( mm, MRU_HIGH ); + } + + return SUCCESS; // nothing to return to rule code + + #undef PRDF_FUNC +} +PRDF_PLUGIN_DEFINE( explorer_ocmb, AnalyzeMaintAue ); + + +//############################################################################## +// +// TLXFIR +// +//############################################################################## + +/** + * @brief Clear/Mask TLXFIR[9] + * @param i_chip An OCMB chip. + * @param io_sc The step code data struct. + * @return SUCCESS + */ +int32_t clearAndMaskTlxtRe( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + #define PRDF_FUNC "[explorer_ocmb::clearAndMaskTlxtRe] " + + do + { + // If we are at threshold, mask TLXFIR[9]. + if ( io_sc.service_data->IsAtThreshold() ) + { + SCAN_COMM_REGISTER_CLASS * tlxfir_mask_or = + i_chip->getRegister( "TLXFIR_MASK_OR" ); + + tlxfir_mask_or->SetBit(9); + if ( SUCCESS != tlxfir_mask_or->Write() ) + { + PRDF_ERR( PRDF_FUNC "Write() Failed on TLXFIR_MASK_OR: " + "i_chip=0x%08x", i_chip->getHuid() ); + break; + } + } + + // Clear TLXFIR[9] + SCAN_COMM_REGISTER_CLASS * tlxfir_and = + i_chip->getRegister( "TLXFIR_AND" ); + tlxfir_and->setAllBits(); + + tlxfir_and->ClearBit(9); + if ( SUCCESS != tlxfir_and->Write() ) + { + PRDF_ERR( PRDF_FUNC "Write() Failed on TLXFIR_AND: " + "i_chip=0x%08x", i_chip->getHuid() ); + break; + } + }while(0); + + return SUCCESS; + + #undef PRDF_FUNC +} +PRDF_PLUGIN_DEFINE( explorer_ocmb, clearAndMaskTlxtRe ); + +} // end namespace explorer_ocmb + +} // end namespace PRDF + diff --git a/src/usr/diag/prdf/common/plat/explorer/prdf_plat_explorer.mk b/src/usr/diag/prdf/common/plat/explorer/prdf_plat_explorer.mk new file mode 100644 index 000000000..b79d5cc30 --- /dev/null +++ b/src/usr/diag/prdf/common/plat/explorer/prdf_plat_explorer.mk @@ -0,0 +1,39 @@ +# IBM_PROLOG_BEGIN_TAG +# This is an automatically generated prolog. +# +# $Source: src/usr/diag/prdf/common/plat/explorer/prdf_plat_explorer.mk $ +# +# OpenPOWER HostBoot Project +# +# Contributors Listed Below - COPYRIGHT 2019 +# [+] International Business Machines Corp. +# +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# IBM_PROLOG_END_TAG + +################################################################################ +# Paths common to both FSP and Hostboot +################################################################################ + +prd_vpath += ${PRD_SRC_PATH}/common/plat/explorer + +prd_incpath += ${PRD_SRC_PATH}/common/plat/explorer + +################################################################################ +# Object files common to both FSP and Hostboot +################################################################################ + +# rule plugin related +prd_rule_plugin += prdfExplorerPlugins_common.o diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemAddress.C b/src/usr/diag/prdf/common/plat/mem/prdfMemAddress.C index 1227afeb8..654b39ba0 100644 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemAddress.C +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemAddress.C @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2016,2018 */ +/* Contributors Listed Below - COPYRIGHT 2016,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -46,8 +46,8 @@ using namespace PlatServices; // Class MemAddr //------------------------------------------------------------------------------ -template<> -MemAddr MemAddr::fromReadAddr<TYPE_MCBIST>( uint64_t i_addr ) +template<TARGETING::TYPE T> +MemAddr MemAddr::fromReadAddr( uint64_t i_addr ) { uint64_t mrnk = (i_addr >> 59) & 0x7; // 2: 4 uint64_t srnk = (i_addr >> 56) & 0x7; // 5: 7 @@ -58,6 +58,12 @@ MemAddr MemAddr::fromReadAddr<TYPE_MCBIST>( uint64_t i_addr ) return MemAddr( MemRank(mrnk, srnk), bnk, row, col ); } +template +MemAddr MemAddr::fromReadAddr<TYPE_MCBIST>( uint64_t i_addr ); +template +MemAddr MemAddr::fromReadAddr<TYPE_OCMB_CHIP>( uint64_t i_addr ); + + template<> MemAddr MemAddr::fromReadAddr<TYPE_MEMBUF>( uint64_t i_addr ) { @@ -73,8 +79,8 @@ MemAddr MemAddr::fromReadAddr<TYPE_MEMBUF>( uint64_t i_addr ) return MemAddr( MemRank(mrnk, srnk), bnk, row, col ); } -template<> -MemAddr MemAddr::fromMaintAddr<TYPE_MCBIST>( uint64_t i_addr ) +template<TARGETING::TYPE T> +MemAddr MemAddr::fromMaintAddr( uint64_t i_addr ) { uint64_t rslct = (i_addr >> 59) & 0x3; // 3: 4 uint64_t srnk = (i_addr >> 56) & 0x7; // 5: 7 @@ -88,6 +94,12 @@ MemAddr MemAddr::fromMaintAddr<TYPE_MCBIST>( uint64_t i_addr ) return MemAddr( MemRank(mrnk, srnk), bnk, row, col ); } +template +MemAddr MemAddr::fromMaintAddr<TYPE_MCBIST>( uint64_t i_addr ); +template +MemAddr MemAddr::fromMaintAddr<TYPE_OCMB_CHIP>( uint64_t i_addr ); + + template<> MemAddr MemAddr::fromMaintAddr<TYPE_MBA>( uint64_t i_addr ) { @@ -169,6 +181,53 @@ uint32_t getMemReadAddr<TYPE_MCBIST>( ExtensibleChip * i_chip, uint32_t i_pos, //------------------------------------------------------------------------------ template<> +uint32_t getMemReadAddr<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + MemAddr::ReadReg i_reg, + MemAddr & o_addr ) +{ + #define PRDF_FUNC "[getMemReadAddr<TYPE_OCMB_CHIP>] " + + uint32_t o_rc = SUCCESS; + + // Check parameters + PRDF_ASSERT( nullptr != i_chip ); + PRDF_ASSERT( TYPE_OCMB_CHIP == i_chip->getType() ); + + // Get the register string. + const char * reg_str = ""; + switch ( i_reg ) + { + case MemAddr::READ_NCE_ADDR: reg_str = "MBNCER"; break; + case MemAddr::READ_RCE_ADDR: reg_str = "MBRCER"; break; + case MemAddr::READ_MPE_ADDR: reg_str = "MBMPER"; break; + case MemAddr::READ_UE_ADDR : reg_str = "MBUER" ; break; + case MemAddr::READ_AUE_ADDR: reg_str = "MBAUER"; break; + default: PRDF_ASSERT( false ); + } + + // Read the address register + SCAN_COMM_REGISTER_CLASS * reg = i_chip->getRegister( reg_str ); + o_rc = reg->Read(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "Read() failed on %s: i_chip=0x%08x", + reg_str, i_chip->getHuid() ); + } + else + { + // Get the address object. + uint64_t addr = reg->GetBitFieldJustified( 0, 64 ); + o_addr = MemAddr::fromReadAddr<TYPE_OCMB_CHIP>( addr ); + } + + return o_rc; + + #undef PRDF_FUNC +} + +//------------------------------------------------------------------------------ + +template<> uint32_t getMemReadAddr<TYPE_MEMBUF>( ExtensibleChip * i_chip, uint32_t i_pos, MemAddr::ReadReg i_reg, MemAddr & o_addr ) { @@ -247,15 +306,14 @@ uint32_t getMemReadAddr<TYPE_MBA>( ExtensibleChip * i_chip, //------------------------------------------------------------------------------ -template<> -uint32_t getMemMaintAddr<TYPE_MCBIST>( ExtensibleChip * i_chip, - MemAddr & o_addr ) +template<TARGETING::TYPE T> +uint32_t getMemMaintAddr( ExtensibleChip * i_chip, MemAddr & o_addr ) { - #define PRDF_FUNC "[getMemMaintAddr<TYPE_MCBIST>] " + #define PRDF_FUNC "[getMemMaintAddr<T>] " // Check parameters PRDF_ASSERT( nullptr != i_chip ); - PRDF_ASSERT( TYPE_MCBIST == i_chip->getType() ); + PRDF_ASSERT( T == i_chip->getType() ); // Read the address register SCAN_COMM_REGISTER_CLASS * reg = i_chip->getRegister( "MCBMCAT" ); @@ -269,7 +327,7 @@ uint32_t getMemMaintAddr<TYPE_MCBIST>( ExtensibleChip * i_chip, { // Get the address object. uint64_t addr = reg->GetBitFieldJustified( 0, 64 ); - o_addr = MemAddr::fromMaintAddr<TYPE_MCBIST>( addr ); + o_addr = MemAddr::fromMaintAddr<T>( addr ); } return o_rc; @@ -277,6 +335,13 @@ uint32_t getMemMaintAddr<TYPE_MCBIST>( ExtensibleChip * i_chip, #undef PRDF_FUNC } +template +uint32_t getMemMaintAddr<TYPE_MCBIST>( ExtensibleChip * i_chip, + MemAddr & o_addr ); +template +uint32_t getMemMaintAddr<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + MemAddr & o_addr ); + //------------------------------------------------------------------------------ template<> @@ -389,8 +454,9 @@ uint32_t getMemMaintEndAddr<TYPE_MBA>( ExtensibleChip * i_chip, #ifdef __HOSTBOOT_MODULE -uint32_t getMcbistMaintPort( ExtensibleChip * i_mcbChip, - std::vector<ExtensibleChip *> & o_mcaList ) +template<> +uint32_t getMcbistMaintPort<TYPE_MCBIST>( ExtensibleChip * i_mcbChip, + ExtensibleChipList & o_mcaList ) { #define PRDF_FUNC "[getMcbistMaintPort] " @@ -402,9 +468,9 @@ uint32_t getMcbistMaintPort( ExtensibleChip * i_mcbChip, o_mcaList.clear(); - SCAN_COMM_REGISTER_CLASS * mcbagra = i_mcbChip->getRegister( "MCBAGRA" ); - SCAN_COMM_REGISTER_CLASS * mcbmcat = i_mcbChip->getRegister( "MCBMCAT" ); - SCAN_COMM_REGISTER_CLASS * mcb_cntl = i_mcbChip->getRegister( "MCB_CNTL" ); + SCAN_COMM_REGISTER_CLASS * mcbagra = i_mcbChip->getRegister( "MCBAGRA" ); + SCAN_COMM_REGISTER_CLASS * mcbmcat = i_mcbChip->getRegister( "MCBMCAT" ); + SCAN_COMM_REGISTER_CLASS * mcb_cntl = i_mcbChip->getRegister( "MCB_CNTL" ); do { @@ -446,7 +512,7 @@ uint32_t getMcbistMaintPort( ExtensibleChip * i_mcbChip, } // Get MCAs from all targeted ports. - for ( uint8_t p = 0; p < 4; p++ ) + for ( uint8_t p = 0; p < MAX_MCA_PER_MCBIST; p++ ) { if ( 0 == (portMask & (0x8 >> p)) ) continue; diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemAddress.H b/src/usr/diag/prdf/common/plat/mem/prdfMemAddress.H index 8dc192672..f5120b3b5 100644 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemAddress.H +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemAddress.H @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2016,2018 */ +/* Contributors Listed Below - COPYRIGHT 2016,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -167,7 +167,7 @@ uint32_t getMemReadAddr( ExtensibleChip * i_chip, uint32_t i_pos, /** * @brief Reads the specified mainline memory read address from hardware. - * @param i_chip MCA or MBA. + * @param i_chip MCA, MBA, or OCMB. * @param i_reg The target address register. * @param o_addr The returned address from hardware. * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. @@ -189,7 +189,7 @@ uint32_t getMemReadAddr( ExtensibleChip * i_chip, MemAddr::ReadReg i_reg, * mode or not. Therefore, users must call getMcbistMaintPort() to get the port * information. * - * @param i_chip An MBA or MCBIST chip. + * @param i_chip An MBA, MCBIST, or OCMB chip. * @param o_addr The returned address from hardware. * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. */ @@ -228,11 +228,12 @@ uint32_t getMemMaintEndAddr( ExtensibleChip * i_chip, MemAddr & o_addr ); * * @note Only supported for MCBIST. * @param i_mcbChip An MCBIST chip. - * @param o_mcaList A list of all MCAs targeted by the command. + * @param o_portList A list of all MCAs targeted by the command. * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. */ +template<TARGETING::TYPE T> uint32_t getMcbistMaintPort( ExtensibleChip * i_mcbChip, - std::vector<ExtensibleChip *> & o_mcaList ); + ExtensibleChipList & o_portList ); #endif diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemCaptureData.C b/src/usr/diag/prdf/common/plat/mem/prdfMemCaptureData.C index ebef7ae29..4d55c7c50 100644 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemCaptureData.C +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemCaptureData.C @@ -39,6 +39,7 @@ #include <prdfCenMbaDataBundle.H> #include <prdfPlatServices.H> #include <prdfP9McaDataBundle.H> +#include <prdfOcmbDataBundle.H> #include <prdfMemRowRepair.H> @@ -65,8 +66,16 @@ void addExtMemMruData( const MemoryMru & i_memMru, errlHndl_t io_errl ) { TargetHandle_t trgt = i_memMru.getTrgt(); - // Get the DRAM width. - extMemMru.isX4Dram = isDramWidthX4( trgt ) ? 1 : 0; + if ( TYPE_OCMB_CHIP == getTargetType(trgt) ) + { + TargetHandle_t dimm = getConnectedDimm( trgt, i_memMru.getRank() ); + extMemMru.isX4Dram = isDramWidthX4( dimm ) ? 1 : 0; + } + else + { + // Get the DRAM width. + extMemMru.isX4Dram = isDramWidthX4( trgt ) ? 1 : 0; + } // Get the DIMM type. if ( TYPE_MBA == getTargetType(trgt) ) @@ -97,9 +106,9 @@ void addExtMemMruData( const MemoryMru & i_memMru, errlHndl_t io_errl ) { getDimmDqAttr<TYPE_DIMM>(partList[0], extMemMru.dqMapping); } - else if ( TYPE_MEM_PORT == getTargetType(trgt) ) + else if ( TYPE_OCMB_CHIP == getTargetType(trgt) ) { - getDimmDqAttr<TYPE_MEM_PORT>( trgt, extMemMru.dqMapping ); + getDimmDqAttr<TYPE_OCMB_CHIP>( trgt, extMemMru.dqMapping ); } else { @@ -172,7 +181,6 @@ void captureDramRepairsData( TARGETING::TargetHandle_t i_trgt, if( CEN_VPD_DIMM_SPARE_NO_SPARE != spareConfig ) data.header.isSpareDram = true; - // Iterate all ranks to get DRAM repair data for ( auto & rank : masterRanks ) { @@ -220,8 +228,11 @@ void captureDramRepairsData( TARGETING::TargetHandle_t i_trgt, if ( data.rankDataList.size() > 0 ) { data.header.rankCount = data.rankDataList.size(); - data.header.isEccSp = ( isDramWidthX4( i_trgt ) && - (TYPE_MBA == getTargetType(i_trgt)) ); + data.header.isEccSp = false; + if ( TYPE_MBA == getTargetType(i_trgt) ) + { + data.header.isEccSp = isDramWidthX4( i_trgt ); + } UtilMem dramStream; dramStream << data; @@ -459,6 +470,33 @@ void captureIueCounts<McaDataBundle*>( TARGETING::TargetHandle_t i_trgt, //------------------------------------------------------------------------------ template<> +void captureIueCounts<OcmbDataBundle*>( TARGETING::TargetHandle_t i_trgt, + OcmbDataBundle * i_db, + CaptureData & io_cd ) +{ + #ifdef __HOSTBOOT_MODULE + + uint8_t sz_capData = i_db->iv_iueTh.size()*2; + uint8_t capData[sz_capData] = {}; + uint8_t idx = 0; + + for ( auto & th_pair : i_db->iv_iueTh ) + { + capData[idx] = th_pair.first; + capData[idx+1] = th_pair.second.getCount(); + idx += 2; + } + + // Add data to capture data. + BitString bs ( sz_capData*8, (CPU_WORD *) &capData ); + io_cd.Add( i_trgt, Util::hashString("IUE_COUNTS"), bs ); + + #endif +} + +//------------------------------------------------------------------------------ + +template<> void addEccData<TYPE_MCA>( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc ) { @@ -497,6 +535,33 @@ void addEccData<TYPE_MCBIST>( ExtensibleChip * i_chip, } template<> +void addEccData<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + PRDF_ASSERT( TYPE_OCMB_CHIP == i_chip->getType() ); + + CaptureData & cd = io_sc.service_data->GetCaptureData(); + OcmbDataBundle * db = getOcmbDataBundle( i_chip ); + + TargetHandle_t ocmbTrgt = i_chip->getTrgt(); + + // Add DRAM repairs data from hardware. + captureDramRepairsData<TYPE_OCMB_CHIP>( ocmbTrgt, cd ); + + // Add DRAM repairs data from VPD. + captureDramRepairsVpd<TYPE_OCMB_CHIP>( ocmbTrgt, cd ); + + // Add IUE counts to capture data. + captureIueCounts<OcmbDataBundle*>( ocmbTrgt, db, cd ); + + // Add CE table to capture data. + db->iv_ceTable.addCapData( cd ); + + // Add UE table to capture data. + db->iv_ueTable.addCapData( cd ); +} + +template<> void addEccData<TYPE_MBA>( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc ) { @@ -558,6 +623,22 @@ void addEccData<TYPE_MBA>( TargetHandle_t i_trgt, errlHndl_t io_errl ) ErrDataService::AddCapData( cd, io_errl ); } +template<> +void addEccData<TYPE_OCMB_CHIP>( TargetHandle_t i_trgt, + errlHndl_t io_errl ) +{ + PRDF_ASSERT( TYPE_OCMB_CHIP == getTargetType(i_trgt) ); + + CaptureData cd; + + // Add DRAM repairs data from hardware. + captureDramRepairsData<TYPE_OCMB_CHIP>( i_trgt, cd ); + + // Add DRAM repairs data from VPD. + captureDramRepairsVpd<TYPE_OCMB_CHIP>( i_trgt, cd ); + + ErrDataService::AddCapData( cd, io_errl ); +} //------------------------------------------------------------------------------ diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemCeTable.C b/src/usr/diag/prdf/common/plat/mem/prdfMemCeTable.C index 16645586b..799e32e67 100644 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemCeTable.C +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemCeTable.C @@ -281,7 +281,7 @@ void MemCeTable<T>::addCapData( CaptureData & io_cd ) // Avoid linker errors with the template. template class MemCeTable<TYPE_MCA>; template class MemCeTable<TYPE_MBA>; -template class MemCeTable<TYPE_MEM_PORT>; +template class MemCeTable<TYPE_OCMB_CHIP>; //------------------------------------------------------------------------------ diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemDbUtils.H b/src/usr/diag/prdf/common/plat/mem/prdfMemDbUtils.H index 7605a82fa..80586976e 100644 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemDbUtils.H +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemDbUtils.H @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2018 */ +/* Contributors Listed Below - COPYRIGHT 2018,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -28,6 +28,8 @@ #include <prdfCenMbaDataBundle.H> #include <prdfP9McaDataBundle.H> +#include <prdfOcmbDataBundle.H> +#include <prdfTargetServices.H> namespace PRDF { @@ -62,6 +64,16 @@ uint32_t addCeTableEntry<TARGETING::TYPE_MCA>( ExtensibleChip * i_chip, } template<> inline +uint32_t addCeTableEntry<TARGETING::TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemAddr & i_addr, + const MemSymbol & i_symbol, + bool i_isHard ) +{ + return getOcmbDataBundle(i_chip)->iv_ceTable.addEntry( i_addr, i_symbol, + i_isHard ); +} + +template<> inline uint32_t addCeTableEntry<TARGETING::TYPE_MBA>( ExtensibleChip * i_chip, const MemAddr & i_addr, const MemSymbol & i_symbol, @@ -91,6 +103,14 @@ void addUeTableEntry<TARGETING::TYPE_MCA>( ExtensibleChip * i_chip, } template<> inline +void addUeTableEntry<TARGETING::TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + UE_TABLE::Type i_type, + const MemAddr & i_addr ) +{ + getOcmbDataBundle(i_chip)->iv_ueTable.addEntry( i_type, i_addr ); +} + +template<> inline void addUeTableEntry<TARGETING::TYPE_MBA>( ExtensibleChip * i_chip, UE_TABLE::Type i_type, const MemAddr & i_addr ) @@ -118,6 +138,14 @@ void resetEccFfdc<TARGETING::TYPE_MCA>( ExtensibleChip * i_chip, } template<> inline +void resetEccFfdc<TARGETING::TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemRank & i_rank, + AddrRangeType i_type ) +{ + getOcmbDataBundle(i_chip)->iv_ceTable.deactivateRank( i_rank, i_type ); +} + +template<> inline void resetEccFfdc<TARGETING::TYPE_MBA>( ExtensibleChip * i_chip, const MemRank & i_rank, AddrRangeType i_type ) @@ -134,7 +162,7 @@ void resetEccFfdc<TARGETING::TYPE_MBA>( ExtensibleChip * i_chip, /** * @brief Generic wrapper to push a TdEntry to the Targeted Diagnostics queue. - * @param i_chip MCA or MBA. + * @param i_chip MCA, MBA, or MEM_PORT. * @param i_entry The new TdEntry. * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. */ @@ -155,6 +183,13 @@ void pushToQueue<TARGETING::TYPE_MBA>( ExtensibleChip * i_chip, getMbaDataBundle(i_chip)->getTdCtlr()->pushToQueue( i_entry ); } +template<> inline +void pushToQueue<TARGETING::TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + TdEntry * i_entry ) +{ + getOcmbDataBundle(i_chip)->getTdCtlr()->pushToQueue( i_entry ); +} + #endif // Hostboot IPL/Runtime //############################################################################## @@ -179,6 +214,13 @@ MemIplCeStats<TARGETING::TYPE_MCA> * getIplCeStats( ExtensibleChip * i_chip ) } template<> inline +MemIplCeStats<TARGETING::TYPE_OCMB_CHIP> * getIplCeStats( + ExtensibleChip * i_chip ) +{ + return getOcmbDataBundle(i_chip)->getIplCeStats(); +} + +template<> inline MemIplCeStats<TARGETING::TYPE_MBA> * getIplCeStats( ExtensibleChip * i_chip ) { return getMbaDataBundle(i_chip)->getIplCeStats(); @@ -211,6 +253,13 @@ uint32_t handleTdEvent<TARGETING::TYPE_MCA>( ExtensibleChip * i_chip, } template<> inline +uint32_t handleTdEvent<TARGETING::TYPE_OCMB_CHIP>(ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc) +{ + return getOcmbDataBundle(i_chip)->getTdCtlr()->handleTdEvent( io_sc ); +} + +template<> inline uint32_t handleTdEvent<TARGETING::TYPE_MBA>( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc ) { @@ -242,6 +291,16 @@ void banTps<TARGETING::TYPE_MBA>( ExtensibleChip * i_chip, getMbaDataBundle(i_chip)->getTdCtlr()->banTps( i_chip, i_rank ); } +template<> inline +void banTps<TARGETING::TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemRank & i_rank ) +{ + // Ban TPS on this rank. + getOcmbDataBundle(i_chip)->getTdCtlr()->banTps( i_chip, i_rank ); + // Permanently mask mainline NCEs and TCEs because of the TPS ban. + getOcmbDataBundle(i_chip)->iv_maskMainlineNceTce = true; +} + #endif // Hostboot Runtime only } // end namespace MemDbUtils diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemDqBitmap.C b/src/usr/diag/prdf/common/plat/mem/prdfMemDqBitmap.C index 308e25dab..5db522818 100644 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemDqBitmap.C +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemDqBitmap.C @@ -450,6 +450,9 @@ std::vector<MemSymbol> MemDqBitmap::getSymbolList( uint8_t i_portSlct ) case TYPE_MEM_PORT: symbol = dq2Symbol<TYPE_MEM_PORT>( dq, i_portSlct ); break; + case TYPE_OCMB_CHIP: + symbol = dq2Symbol<TYPE_OCMB_CHIP>(dq, i_portSlct); + break; default: PRDF_ERR( "Invalid trgt type" ); PRDF_ASSERT( false ); @@ -700,7 +703,7 @@ uint32_t MemDqBitmap::setEccSpare( uint8_t i_pins ) // Utility Functions //############################################################################## -uint32_t setDramInVpd( ExtensibleChip * i_chip, const MemRank & i_rank, +uint32_t setDramInVpd( TargetHandle_t i_trgt, const MemRank & i_rank, MemSymbol i_symbol ) { #define PRDF_FUNC "[MemDqBitmap::__setDramInVpd] " @@ -709,14 +712,12 @@ uint32_t setDramInVpd( ExtensibleChip * i_chip, const MemRank & i_rank, do { - TARGETING::TargetHandle_t trgt = i_chip->getTrgt(); - MemDqBitmap dqBitmap; - o_rc = getBadDqBitmap( trgt, i_rank, dqBitmap ); + o_rc = getBadDqBitmap( i_trgt, i_rank, dqBitmap ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "getBadDqBitmap(0x%08x, 0x%02x) failed.", - getHuid(trgt), i_rank.getKey() ); + getHuid(i_trgt), i_rank.getKey() ); break; } @@ -727,11 +728,11 @@ uint32_t setDramInVpd( ExtensibleChip * i_chip, const MemRank & i_rank, break; } - o_rc = setBadDqBitmap( trgt, i_rank, dqBitmap ); + o_rc = setBadDqBitmap( i_trgt, i_rank, dqBitmap ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "setBadDqBitmap(0x%08x, 0x%02x) failed.", - getHuid(trgt), i_rank.getKey() ); + getHuid(i_trgt), i_rank.getKey() ); break; } }while(0); @@ -743,7 +744,7 @@ uint32_t setDramInVpd( ExtensibleChip * i_chip, const MemRank & i_rank, //------------------------------------------------------------------------------ -uint32_t clearDramInVpd( ExtensibleChip * i_chip, const MemRank & i_rank, +uint32_t clearDramInVpd( TargetHandle_t i_trgt, const MemRank & i_rank, MemSymbol i_symbol ) { #define PRDF_FUNC "[MemDqBitmap::__clearDramInVpd] " @@ -752,14 +753,12 @@ uint32_t clearDramInVpd( ExtensibleChip * i_chip, const MemRank & i_rank, do { - TARGETING::TargetHandle_t trgt = i_chip->getTrgt(); - MemDqBitmap dqBitmap; - o_rc = getBadDqBitmap( trgt, i_rank, dqBitmap ); + o_rc = getBadDqBitmap( i_trgt, i_rank, dqBitmap ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "getBadDqBitmap(0x%08x, 0x%02x) failed.", - getHuid(trgt), i_rank.getKey() ); + getHuid(i_trgt), i_rank.getKey() ); break; } @@ -770,11 +769,11 @@ uint32_t clearDramInVpd( ExtensibleChip * i_chip, const MemRank & i_rank, break; } - o_rc = setBadDqBitmap( trgt, i_rank, dqBitmap ); + o_rc = setBadDqBitmap( i_trgt, i_rank, dqBitmap ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "setBadDqBitmap(0x%08x, 0x%02x) failed.", - getHuid(trgt), i_rank.getKey() ); + getHuid(i_trgt), i_rank.getKey() ); break; } }while(0); diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemDqBitmap.H b/src/usr/diag/prdf/common/plat/mem/prdfMemDqBitmap.H index b407d9835..c3648dbc5 100644 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemDqBitmap.H +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemDqBitmap.H @@ -73,7 +73,22 @@ class MemDqBitmap /** @brief Constructor from components */ MemDqBitmap( TARGETING::TargetHandle_t i_trgt, const MemRank & i_rank, BitmapData i_d ) : iv_trgt(i_trgt), iv_rank(i_rank), - iv_x4Dram(PlatServices::isDramWidthX4(i_trgt)), iv_data(i_d){} + iv_x4Dram(true), iv_data(i_d) + { + if ( TARGETING::TYPE_MEM_PORT == PlatServices::getTargetType(iv_trgt) || + TARGETING::TYPE_OCMB_CHIP == + PlatServices::getTargetType(iv_trgt) ) + { + // TODO RTC 210072 - Support multiple ports + TARGETING::TargetHandle_t dimm = + PlatServices::getConnectedDimm( iv_trgt, iv_rank ); + iv_x4Dram = PlatServices::isDramWidthX4( dimm ); + } + else + { + iv_x4Dram = PlatServices::isDramWidthX4( iv_trgt ); + } + } public: // functions @@ -224,7 +239,7 @@ class MemDqBitmap private: // instance variables - TARGETING::TargetHandle_t iv_trgt; ///< Target MBA/MCA/MEM_PORT + TARGETING::TargetHandle_t iv_trgt; ///< Target MBA/MCA/MEM_PORT/OCMB_CHIP MemRank iv_rank; ///< Target rank bool iv_x4Dram; ///< TRUE if iv_trgt uses x4 DRAMs @@ -238,20 +253,21 @@ class MemDqBitmap /** * @brief Sets the inputted dram in DRAM repairs VPD. - * @param i_chip MBA or MCA chip. + * @param i_trgt MBA, MCA, MEM_PORT, or OCMB chip. * @param i_rank Target rank. * @return Non-SUCCESS if an internal function fails. SUCCESS otherwise. */ -uint32_t setDramInVpd( ExtensibleChip * i_chip, const MemRank & i_rank, +uint32_t setDramInVpd( TARGETING::TargetHandle_t i_trgt, const MemRank & i_rank, MemSymbol i_symbol ); /** * @brief Clears the inputted dram in DRAM repairs VPD. - * @param i_chip MBA or MCA chip. + * @param i_trgt MBA, MCA, MEM_PORT, or OCMB chip. * @param i_rank Target rank. * @return Non-SUCCESS if an internal function fails. SUCCESS otherwise. */ -uint32_t clearDramInVpd( ExtensibleChip * i_chip, const MemRank & i_rank, +uint32_t clearDramInVpd( TARGETING::TargetHandle_t i_trgt, + const MemRank & i_rank, MemSymbol i_symbol ); } // end namespace PRDF diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.C b/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.C index 9869a8c08..f206a074e 100644 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.C +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.C @@ -127,6 +127,87 @@ uint32_t handleMemUe<TYPE_MCA>( ExtensibleChip * i_chip, const MemAddr & i_addr, i_chip->getHuid(), i_type ); break; } + + #ifdef __HOSTBOOT_RUNTIME + // Increment the UE counter and store the rank we're on, resetting + // the UE and CE counts if we have stopped on a new rank. + ExtensibleChip * mcb = getConnectedParent( i_chip, TYPE_MCBIST ); + McbistDataBundle * mcbdb = getMcbistDataBundle(mcb); + if ( mcbdb->iv_ceUeRank != i_addr.getRank() ) + { + mcbdb->iv_ceStopCounter.reset(); + mcbdb->iv_ueStopCounter.reset(); + } + mcbdb->iv_ueStopCounter.inc( io_sc ); + mcbdb->iv_ceUeRank = i_addr.getRank(); + #endif + } + + } while (0); + + return o_rc; + + #undef PRDF_FUNC +} + +template<> +uint32_t handleMemUe<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemAddr & i_addr, + UE_TABLE::Type i_type, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + #define PRDF_FUNC "[MemEcc::handleMemUe<TYPE_OCMB_CHIP>] " + + PRDF_ASSERT( nullptr != i_chip ); + PRDF_ASSERT( TYPE_OCMB_CHIP == i_chip->getType() ); + + uint32_t o_rc = SUCCESS; + + do + { + // First check to see if this is a side-effect UE. + SCAN_COMM_REGISTER_CLASS * fir = i_chip->getRegister("OCMB_LFIR"); + o_rc = fir->Read(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "Read() failed on OCMB_LFIR: i_chip=0x%08x", + i_chip->getHuid() ); + break; + } + + // Check OCMB_LFIR[38] to determine if this is a side-effect. + if ( fir->IsBitSet(38) ) + { + // This is a side-effect. Callout the OCMB. + PRDF_TRAC( PRDF_FUNC "Memory UE is side-effect of DDRPHY error" ); + io_sc.service_data->SetCallout( i_chip->getTrgt() ); + io_sc.service_data->setServiceCall(); + } + else + { + // Handle the memory UE. + o_rc = __handleMemUe<TYPE_OCMB_CHIP>( i_chip, i_addr, i_type, + io_sc ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "__handleMemUe(0x%08x,%d) failed", + i_chip->getHuid(), i_type ); + break; + } + + #ifdef __HOSTBOOT_RUNTIME + // Increment the UE counter and store the rank we're on, resetting + // the UE and CE counts if we have stopped on a new rank. + OcmbDataBundle * ocmbdb = getOcmbDataBundle(i_chip); + if ( ocmbdb->iv_ceUeRank != i_addr.getRank() ) + { + ocmbdb->iv_ceStopCounter.reset(); + ocmbdb->iv_ueStopCounter.reset(); + } + ocmbdb->iv_ueStopCounter.inc( io_sc ); + ocmbdb->iv_ceUeRank = i_addr.getRank(); + #endif + } } while (0); @@ -328,6 +409,52 @@ uint32_t maskMemPort<TYPE_MCA>( ExtensibleChip * i_chip ) #undef PRDF_FUNC } +template<> +uint32_t maskMemPort<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip ) +{ + #define PRDF_FUNC "[MemEcc::maskMemPort<TYPE_OCMB_CHIP>] " + + PRDF_ASSERT( nullptr != i_chip ); + PRDF_ASSERT( TYPE_OCMB_CHIP == i_chip->getType() ); + + uint32_t o_rc = SUCCESS; + + do + { + // Mask all FIRs on the OCMB in the chiplet FIRs. + SCAN_COMM_REGISTER_CLASS * chipletMask = + i_chip->getRegister("OCMB_CHIPLET_FIR_MASK"); + SCAN_COMM_REGISTER_CLASS * chipletSpaMask = + i_chip->getRegister("OCMB_CHIPLET_SPA_FIR_MASK"); + + chipletMask->setAllBits(); + chipletSpaMask->setAllBits(); + + o_rc = chipletMask->Write() | chipletSpaMask->Write(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "Write() failed on 0x%08x", i_chip->getHuid() ); + break; + } + + #ifdef __HOSTBOOT_RUNTIME + + // Dynamically deallocate the port. + if ( SUCCESS != MemDealloc::port<TYPE_OCMB_CHIP>( i_chip ) ) + { + PRDF_ERR( PRDF_FUNC "MemDealloc::port<TYPE_OCMB_CHIP>(0x%08x) " + "failed", i_chip->getHuid() ); + } + + #endif + + } while (0); + + return o_rc; + + #undef PRDF_FUNC +} + #endif // __HOSTBOOT_MODULE //------------------------------------------------------------------------------ @@ -390,6 +517,62 @@ uint32_t triggerPortFail<TYPE_MCA>( ExtensibleChip * i_chip ) #undef PRDF_FUNC } +template<> +uint32_t triggerPortFail<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip ) +{ + #define PRDF_FUNC "[MemEcc::triggerPortFail<TYPE_OCMB_CHIP>] " + + PRDF_ASSERT( nullptr != i_chip ); + PRDF_ASSERT( TYPE_OCMB_CHIP == i_chip->getType() ); + + uint32_t o_rc = SUCCESS; + + OcmbDataBundle * db = getOcmbDataBundle( i_chip ); + + do + { + // trigger a port fail + // set FARB0[59] - MBA_FARB0Q_CFG_INJECT_PARITY_ERR_CONSTANT and + // FARB0[40] - MBA_FARB0Q_CFG_INJECT_PARITY_ERR_ADDR5 + SCAN_COMM_REGISTER_CLASS * farb0 = i_chip->getRegister("FARB0"); + + o_rc = farb0->Read(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "Read() FARB0 failed: i_chip=0x%08x", + i_chip->getHuid() ); + break; + } + + farb0->SetBit(59); + farb0->SetBit(40); + + o_rc = farb0->Write(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "Write() FARB0 failed: i_chip=0x%08x", + i_chip->getHuid() ); + break; + } + + // reset thresholds to prevent issuing multiple port failures on + // the same port + for ( auto & resetTh : db->iv_iueTh ) + { + resetTh.second.reset(); + } + + db->iv_iuePortFail = true; + + break; + }while(0); + + + return o_rc; + + #undef PRDF_FUNC +} + #endif // __HOSTBOOT_RUNTIME //------------------------------------------------------------------------------ @@ -420,6 +603,30 @@ bool queryIueTh<TYPE_MCA>( ExtensibleChip * i_chip, return iueAtTh; } +template<> +bool queryIueTh<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + PRDF_ASSERT( nullptr != i_chip ); + PRDF_ASSERT( TYPE_OCMB_CHIP == i_chip->getType() ); + + bool iueAtTh = false; + + OcmbDataBundle * db = getOcmbDataBundle( i_chip ); + + // Loop through all our thresholds + for ( auto & th : db->iv_iueTh ) + { + // If threshold reached + if ( th.second.thReached(io_sc) ) + { + iueAtTh = true; + } + } + + return iueAtTh; +} + #endif //------------------------------------------------------------------------------ @@ -493,6 +700,11 @@ template uint32_t handleMpe<TYPE_MBA>( ExtensibleChip * i_chip, const MemAddr & i_addr, UE_TABLE::Type i_type, STEP_CODE_DATA_STRUCT & io_sc ); +template +uint32_t handleMpe<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemAddr & i_addr, + UE_TABLE::Type i_type, + STEP_CODE_DATA_STRUCT & io_sc ); //------------------------------------------------------------------------------ @@ -581,6 +793,10 @@ template uint32_t analyzeFetchMpe<TYPE_MBA>( ExtensibleChip * i_chip, const MemRank & i_rank, STEP_CODE_DATA_STRUCT & io_sc ); +template +uint32_t analyzeFetchMpe<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemRank & i_rank, + STEP_CODE_DATA_STRUCT & io_sc ); //------------------------------------------------------------------------------ @@ -794,6 +1010,9 @@ uint32_t analyzeFetchNceTce<TYPE_MCA>( ExtensibleChip * i_chip, template uint32_t analyzeFetchNceTce<TYPE_MBA>( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc ); +template +uint32_t analyzeFetchNceTce<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ); //------------------------------------------------------------------------------ @@ -871,6 +1090,9 @@ uint32_t analyzeFetchUe<TYPE_MCA>( ExtensibleChip * i_chip, template uint32_t analyzeFetchUe<TYPE_MBA>( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc ); +template +uint32_t analyzeFetchUe<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ); //------------------------------------------------------------------------------ @@ -955,16 +1177,97 @@ uint32_t handleMemIue<TYPE_MCA>( ExtensibleChip * i_chip, #undef PRDF_FUNC } -//------------------------------------------------------------------------------ - template<> -uint32_t analyzeMainlineIue<TYPE_MCA>( ExtensibleChip * i_chip, +uint32_t handleMemIue<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemRank & i_rank, STEP_CODE_DATA_STRUCT & io_sc ) { + #define PRDF_FUNC "[MemEcc::handleMemIue] " + + PRDF_ASSERT( nullptr != i_chip ); + PRDF_ASSERT( TYPE_OCMB_CHIP == i_chip->getType() ); + + uint32_t o_rc = SUCCESS; + + // Add the DIMM to the callout list. + MemoryMru mm { i_chip->getTrgt(), i_rank, MemoryMruData::CALLOUT_RANK }; + io_sc.service_data->SetCallout( mm ); + + #ifdef __HOSTBOOT_MODULE + + do + { + // Nothing else to do if handling a system checkstop. + if ( CHECK_STOP == io_sc.service_data->getPrimaryAttnType() ) break; + + // Get the data bundle from chip. + OcmbDataBundle * db = getOcmbDataBundle( i_chip ); + + // If we have already caused a port fail, mask the IUE bits. + if ( true == db->iv_iuePortFail ) + { + SCAN_COMM_REGISTER_CLASS * mask_or = + i_chip->getRegister("RDFFIR_MASK_OR"); + + mask_or->SetBit(17); + mask_or->SetBit(37); + + o_rc = mask_or->Write(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "Write() failed on 0x%08x", + i_chip->getHuid() ); + break; + } + } + + // Get the DIMM select. + uint8_t ds = i_rank.getDimmSlct(); + + // Initialize threshold if it doesn't exist yet. + if ( 0 == db->iv_iueTh.count(ds) ) + { + db->iv_iueTh[ds] = TimeBasedThreshold( getIueTh() ); + } + + // Increment the count and check if at threshold. + if ( db->iv_iueTh[ds].inc(io_sc) ) + { + // Make the error log predictive. + io_sc.service_data->setServiceCall(); + + // The port fail will be triggered in the PostAnalysis plugin after + // the error log has been committed. + + // Mask off the entire port to avoid collateral. + o_rc = MemEcc::maskMemPort<TYPE_OCMB_CHIP>( i_chip ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "MemEcc::maskMemPort(0x%08x) failed", + i_chip->getHuid() ); + break; + } + } + + } while (0); + + #endif // __HOSTBOOT_MODULE + + return o_rc; + + #undef PRDF_FUNC +} + +//------------------------------------------------------------------------------ + +template<TARGETING::TYPE T> +uint32_t analyzeMainlineIue( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ) +{ #define PRDF_FUNC "[MemEcc::analyzeMainlineIue] " PRDF_ASSERT( nullptr != i_chip ); - PRDF_ASSERT( TYPE_MCA == i_chip->getType() ); + PRDF_ASSERT( T == i_chip->getType() ); uint32_t o_rc = SUCCESS; @@ -974,7 +1277,7 @@ uint32_t analyzeMainlineIue<TYPE_MCA>( ExtensibleChip * i_chip, // not likely that we will have two independent failure modes at the // same time. So we just assume the address is correct. MemAddr addr; - o_rc = getMemReadAddr<TYPE_MCA>( i_chip, MemAddr::READ_RCE_ADDR, addr ); + o_rc = getMemReadAddr<T>( i_chip, MemAddr::READ_RCE_ADDR, addr ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "getMemReadAddr(0x%08x, READ_RCE_ADDR) failed", @@ -983,7 +1286,7 @@ uint32_t analyzeMainlineIue<TYPE_MCA>( ExtensibleChip * i_chip, } MemRank rank = addr.getRank(); - o_rc = handleMemIue<TYPE_MCA>( i_chip, rank, io_sc ); + o_rc = handleMemIue<T>( i_chip, rank, io_sc ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "handleMemIue(0x%08x,m%ds%d) failed", @@ -998,16 +1301,23 @@ uint32_t analyzeMainlineIue<TYPE_MCA>( ExtensibleChip * i_chip, #undef PRDF_FUNC } +template +uint32_t analyzeMainlineIue<TYPE_MCA>( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ); +template +uint32_t analyzeMainlineIue<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ); + //------------------------------------------------------------------------------ -template<> -uint32_t analyzeMaintIue<TYPE_MCA>( ExtensibleChip * i_chip, - STEP_CODE_DATA_STRUCT & io_sc ) +template<TARGETING::TYPE T> +uint32_t analyzeMaintIue( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ) { #define PRDF_FUNC "[MemEcc::analyzeMaintIue] " PRDF_ASSERT( nullptr != i_chip ); - PRDF_ASSERT( TYPE_MCA == i_chip->getType() ); + PRDF_ASSERT( T == i_chip->getType() ); uint32_t o_rc = SUCCESS; @@ -1015,7 +1325,7 @@ uint32_t analyzeMaintIue<TYPE_MCA>( ExtensibleChip * i_chip, { // Use the current address in the MCBMCAT. MemAddr addr; - o_rc = getMemMaintAddr<TYPE_MCA>( i_chip, addr ); + o_rc = getMemMaintAddr<T>( i_chip, addr ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "getMemMaintAddr(0x%08x) failed", @@ -1024,7 +1334,7 @@ uint32_t analyzeMaintIue<TYPE_MCA>( ExtensibleChip * i_chip, } MemRank rank = addr.getRank(); - o_rc = handleMemIue<TYPE_MCA>( i_chip, rank, io_sc ); + o_rc = handleMemIue<T>( i_chip, rank, io_sc ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "handleMemIue(0x%08x,m%ds%d) failed", @@ -1039,6 +1349,13 @@ uint32_t analyzeMaintIue<TYPE_MCA>( ExtensibleChip * i_chip, #undef PRDF_FUNC } +template +uint32_t analyzeMaintIue<TYPE_MCA>( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ); +template +uint32_t analyzeMaintIue<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ); + //------------------------------------------------------------------------------ template<> @@ -1152,6 +1469,117 @@ uint32_t analyzeImpe<TYPE_MCA>( ExtensibleChip * i_chip, #undef PRDF_FUNC } +template<> +uint32_t analyzeImpe<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + + #define PRDF_FUNC "[MemEcc::analyzeImpe] " + + PRDF_ASSERT( TYPE_OCMB_CHIP == i_chip->getType() ); + + uint32_t o_rc = SUCCESS; + + do + { + // get the mark shadow register + SCAN_COMM_REGISTER_CLASS * msr = i_chip->getRegister("EXP_MSR"); + + o_rc = msr->Read(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "Read() failed on EXP_MSR: i_chip=0x%08x", + i_chip->getHuid() ); + break; + } + + TargetHandle_t trgt = i_chip->getTrgt(); + + // get galois field code - bits 8:15 of MSR + uint8_t galois = msr->GetBitFieldJustified( 8, 8 ); + + // get rank - bits 16:18 of MSR + uint8_t mrnk = msr->GetBitFieldJustified( 16, 3 ); + MemRank rank( mrnk ); + + // get symbol and DRAM + MemSymbol symbol = MemSymbol::fromGalois( trgt, rank, galois ); + if ( !symbol.isValid() ) + { + PRDF_ERR( PRDF_FUNC "Galois 0x%02x from EXP_MSR is invalid: 0x%08x," + "0x%02x", galois, i_chip->getHuid(), rank.getKey() ); + o_rc = FAIL; + break; + } + + // Add the DIMM to the callout list + MemoryMru memmru( trgt, rank, MemoryMruData::CALLOUT_RANK ); + io_sc.service_data->SetCallout( memmru ); + + #ifdef __HOSTBOOT_MODULE + // get data bundle from chip + OcmbDataBundle * db = getOcmbDataBundle( i_chip ); + uint8_t dram = symbol.getDram(); + + // Increment the count and check threshold. + if ( db->getImpeThresholdCounter()->inc(rank, dram, io_sc) ) + { + // Make the error log predictive if DRAM Repairs are disabled or if + // the number of DRAMs on this rank with IMPEs has reached threshold + if ( areDramRepairsDisabled() || + db->getImpeThresholdCounter()->queryDrams(rank, dram, io_sc) ) + { + io_sc.service_data->setServiceCall(); + } + else // Otherwise, place a chip mark on the failing DRAM. + { + MemMark chipMark( trgt, rank, galois ); + o_rc = MarkStore::writeChipMark<TYPE_OCMB_CHIP>( i_chip, rank, + chipMark ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "writeChipMark(0x%08x,0x%02x) failed", + i_chip->getHuid(), rank.getKey() ); + break; + } + + o_rc = MarkStore::chipMarkCleanup<TYPE_OCMB_CHIP>( i_chip, rank, + io_sc ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "chipMarkCleanup(0x%08x,0x%02x) failed", + i_chip->getHuid(), rank.getKey() ); + break; + } + } + } + + // If a predictive callout is made, mask both mainline and maintenance + // attentions. + if ( io_sc.service_data->queryServiceCall() ) + { + SCAN_COMM_REGISTER_CLASS * mask + = i_chip->getRegister( "RDFFIR_MASK_OR" ); + mask->SetBit(19); // mainline + mask->SetBit(39); // maintenance + o_rc = mask->Write(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "Write() failed on RDFFIR_MASK_OR: " + "0x%08x", i_chip->getHuid() ); + break; + } + } + #endif // __HOSTBOOT_MODULE + + } while (0); + + + return o_rc; + + #undef PRDF_FUNC +} + //------------------------------------------------------------------------------ template<> diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.H b/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.H index 735ae436f..0fd71dd8b 100644 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.H +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.H @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2016,2018 */ +/* Contributors Listed Below - COPYRIGHT 2016,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -51,7 +51,7 @@ namespace MemEcc * @brief Adds the memory CE to the callout list and CE table. Will also issue * dynamic memory deallocation when appropriate. Returns true if TPS is * required. - * @param i_chip MCA or MBA. + * @param i_chip MCA, MBA, or OCMB. * @param i_addr Failed address. * @param i_symbol Failed symbol. * @param o_doTps True if TPS is required. False otherwise. @@ -74,7 +74,7 @@ uint32_t handleMemCe( ExtensibleChip * i_chip, const MemAddr & i_addr, * of the DIMMs, the UE table will not be updated and no dynamic memory * deallocation. * - * @param i_chip MCA or MBA. + * @param i_chip MCA, OCMB, or MBA. * @param i_addr Failed address. * @param i_type The type of UE. * @param io_sc The step code data struct. @@ -96,7 +96,7 @@ uint32_t handleMemUe( ExtensibleChip * i_chip, const MemAddr & i_addr, * the port failure is issued in the PostAnalysis plugin after the error log has * been committed. * - * @param i_chip MCA chip. + * @param i_chip MCA or OCMB chip. * @param i_rank Rank containing the IUE. * @param io_sc The step code data struct. * @return Non-SUCCESS if an interal function fails, SUCCESS otherwise. @@ -107,7 +107,7 @@ uint32_t handleMemIue( ExtensibleChip * i_chip, const MemRank & i_rank, /** * @brief Handles a MPE attention. - * @param i_chip MCA or MBA. + * @param i_chip MCA, OCMB, or MBA. * @param i_addr Failed address. * @param i_type The type of UE. * @param io_sc The step code data struct. @@ -119,7 +119,7 @@ uint32_t handleMpe( ExtensibleChip * i_chip, const MemAddr & i_addr, /** * @brief Handles a MPE attention. - * @param i_chip MCA or MBA. + * @param i_chip MCA, OCMB, or MBA. * @param i_rank Target rank. * @param i_type The type of UE. * @param io_sc The step code data struct. @@ -135,7 +135,7 @@ uint32_t handleMpe( ExtensibleChip * i_chip, const MemRank & i_rank, /** * @brief Analyzes a fetch MPE attention. - * @param i_chip MCA or MBA. + * @param i_chip MCA, OCMB, or MBA. * @param i_rank Target rank. * @param io_sc The step code data struct. * @return Non-SUCCESS if an interal function fails, SUCCESS otherwise. @@ -146,7 +146,7 @@ uint32_t analyzeFetchMpe( ExtensibleChip * i_chip, const MemRank & i_rank, /** * @brief Analyzes a fetch NCE/TCE attention. - * @param i_chip MCA or MBA. + * @param i_chip MCA, OCMB, or MBA. * @param io_sc The step code data struct. * @return Non-SUCCESS if an interal function fails, SUCCESS otherwise. */ @@ -156,7 +156,7 @@ uint32_t analyzeFetchNceTce( ExtensibleChip * i_chip, /** * @brief Analyzes a fetch UE attention. - * @param i_chip MCA or MBA. + * @param i_chip MCA, OCMB, or MBA. * @param io_sc The step code data struct. * @return Non-SUCCESS if an interal function fails, SUCCESS otherwise. */ @@ -166,7 +166,7 @@ uint32_t analyzeFetchUe( ExtensibleChip * i_chip, /** * @brief Analyzes a fetch mainline IUE attention. - * @param i_chip MCA or MBA. + * @param i_chip MCA, OCMB, or MBA. * @param io_sc The step code data struct. * @return Non-SUCCESS if an interal function fails, SUCCESS otherwise. */ @@ -177,7 +177,7 @@ uint32_t analyzeMainlineIue( ExtensibleChip * i_chip, /** * @brief Analyzes a fetch maint IUE attention. - * @param i_chip MCA or MBA. + * @param i_chip MCA, OCMB, or MBA. * @param io_sc The step code data struct. * @return Non-SUCCESS if an interal function fails, SUCCESS otherwise. */ @@ -187,7 +187,7 @@ uint32_t analyzeMaintIue( ExtensibleChip * i_chip, /** * @brief Analyzes a maint or mainline IMPE attention. - * @param i_chip MCA or MBA. + * @param i_chip MCA, OCMB, or MBA. * @param io_sc The step code data struct. * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. */ @@ -208,7 +208,7 @@ uint32_t analyzeFetchRcePue( ExtensibleChip * i_chip, /** * @brief Will trigger a port fail. - * @param i_chip MCA chip + * @param i_chip MCA/OCMB chip * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise */ template<TARGETING::TYPE T> @@ -221,7 +221,7 @@ uint32_t triggerPortFail( ExtensibleChip * i_chip ); /** * @brief Will query the data bundle and return if the IUE threshold has been * reached. - * @param i_chip MCA chip + * @param i_chip MCA/OCMB chip * @param io_sc The step code data struct. * @return True if IUE threshold is reached, false if not. */ @@ -231,7 +231,7 @@ bool queryIueTh( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc ); /** * @brief Will mask off an entire memory port. At runtime will issue dynamic * memory deallocation of the port. - * @param i_chip MCA chip + * @param i_chip MCA/OCMB chip * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise */ template<TARGETING::TYPE T> diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemExtraSig.H b/src/usr/diag/prdf/common/plat/mem/prdfMemExtraSig.H index 08b79922e..7bcf0e573 100644 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemExtraSig.H +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemExtraSig.H @@ -88,7 +88,7 @@ PRDR_ERROR_SIGNATURE(VttLost, 0xffff0084, "", "NVDIMM VTT Lost"); PRDR_ERROR_SIGNATURE(NotSelfRefr, 0xffff0085, "", "NVDIMM Dram Not Self Refresh"); PRDR_ERROR_SIGNATURE(CtrlHwErr, 0xffff0086, "", "NVDIMM Controller Hardware Error"); PRDR_ERROR_SIGNATURE(NvmCtrlErr, 0xffff0087, "", "NVDIMM NVM Controller Error"); -PRDR_ERROR_SIGNATURE(NvmLifeErr, 0xffff0088, "", "NVDIMM NVM Lifetime Error"); +PRDR_ERROR_SIGNATURE(NvmLifeErr, 0xffff0088, "", "NVDIMM Final NVM Lifetime Error"); PRDR_ERROR_SIGNATURE(InsuffEnergy, 0xffff0089, "", "NVDIMM Not enough energy for CSAVE"); PRDR_ERROR_SIGNATURE(InvFwErr, 0xffff008A, "", "NVDIMM Invalid Firmware Error"); @@ -98,8 +98,22 @@ PRDR_ERROR_SIGNATURE(EsPolNotSet, 0xffff008D, "", "NVDIMM Energy Source Policy PRDR_ERROR_SIGNATURE(EsHwFail, 0xffff008E, "", "NVDIMM Energy Source Hardware Fail"); PRDR_ERROR_SIGNATURE(EsHlthAssess, 0xffff008F, "", "NVDIMM Energy Source Health Assessment Error"); -PRDR_ERROR_SIGNATURE(EsLifeErr, 0xffff0090, "", "NVDIMM Energy Source Lifetime Error"); -PRDR_ERROR_SIGNATURE(EsTmpErr, 0xffff0091, "", "NVDIMM Energy Source Temp Error"); +PRDR_ERROR_SIGNATURE(EsLifeErr, 0xffff0090, "", "NVDIMM Final Energy Source Lifetime Error"); +PRDR_ERROR_SIGNATURE(EsTmpErrHigh, 0xffff0091, "", "NVDIMM Energy Source Temperature Error - High Temp Threshold"); +PRDR_ERROR_SIGNATURE(EsTmpErrLow, 0xffff0092, "", "NVDIMM Energy Source Temperature Error - Low Temp Threshold"); + +PRDR_ERROR_SIGNATURE(NvmLifeWarn1, 0xffff0093, "", "NVDIMM First NVM Lifetime Warning"); +PRDR_ERROR_SIGNATURE(NvmLifeWarn2, 0xffff0094, "", "NVDIMM Second NVM Lifetime Warning"); +PRDR_ERROR_SIGNATURE(EsLifeWarn1, 0xffff0095, "", "NVDIMM First Energy Source Lifetime Warning"); +PRDR_ERROR_SIGNATURE(EsLifeWarn2, 0xffff0096, "", "NVDIMM Second Energy Source Lifetime Warning"); +PRDR_ERROR_SIGNATURE(EsTmpWarnHigh, 0xffff0097, "", "NVDIMM Energy Source Temperature Warning - High Temp Threshold"); +PRDR_ERROR_SIGNATURE(EsTmpWarnLow, 0xffff0098, "", "NVDIMM Energy Source Temperature Warning - Low Temp Threshold"); +PRDR_ERROR_SIGNATURE(BelowWarnTh, 0xffff0099, "", "NVDIMM Below Warning Threshold"); +PRDR_ERROR_SIGNATURE(IntNvdimmErr, 0xffff009A, "", "NVDIMM Intermittent error"); +PRDR_ERROR_SIGNATURE(NotifStatErr, 0xffff009B, "", "NVDIMM Set Event Notification Status Error"); +PRDR_ERROR_SIGNATURE(FirEvntGone, 0xffff009C, "", "NVDIMM Event Triggering the FIR no longer present"); +PRDR_ERROR_SIGNATURE(EsTmpWarnFa, 0xffff009D, "", "NVDIMM Energy Source Temperature Warning - False Alarm"); +PRDR_ERROR_SIGNATURE(EsTmpErrFa, 0xffff009E, "", "NVDIMM Energy Source Temperature Error - False Alarm"); #endif // __prdfMemExtraSig_H diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemMark.C b/src/usr/diag/prdf/common/plat/mem/prdfMemMark.C index 83bff1876..e43d844c4 100644 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemMark.C +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemMark.C @@ -46,7 +46,7 @@ namespace MarkStore { //############################################################################## -// Utilities to read/write markstore (MCA) +// Utilities to read/write markstore //############################################################################## // - We have the ability to set chip marks via the FWMSx registers, but there @@ -62,15 +62,19 @@ namespace MarkStore // mark per master rank. This matches the P8 behavior. This could be improved // upon later if we have the time, but doubtful. // - Summary: -// - Chip marks will use HWMS0-7 registers (0x07010AD0-0x07010AD7). -// - Symbol marks will use FWMS0-7 registers (0x07010AD8-0x07010ADF). +// - Chip marks will use HWMS0-7 registers: +// Nimbus: (0x07010AD0-0x07010AD7) +// Axone: (0x08011C10-0x08011C17) +// - Symbol marks will use FWMS0-7 registers: +// Nimbus: (0x07010AD8-0x07010ADF) +// Axone: (0x08011C18-0x08011C1F) // - Each register maps to master ranks 0-7. -template<> -uint32_t readChipMark<TYPE_MCA>( ExtensibleChip * i_chip, - const MemRank & i_rank, MemMark & o_mark ) +template<TARGETING::TYPE T> +uint32_t readChipMark( ExtensibleChip * i_chip, const MemRank & i_rank, + MemMark & o_mark ) { - #define PRDF_FUNC "[readChipMark<TYPE_MCA>] " + #define PRDF_FUNC "[readChipMark<T>] " uint32_t o_rc = SUCCESS; o_mark = MemMark(); // ensure invalid @@ -110,14 +114,21 @@ uint32_t readChipMark<TYPE_MCA>( ExtensibleChip * i_chip, #undef PRDF_FUNC } +template +uint32_t readChipMark<TYPE_MCA>( ExtensibleChip * i_chip, + const MemRank & i_rank, MemMark & o_mark ); +template +uint32_t readChipMark<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemRank & i_rank, + MemMark & o_mark ); + //------------------------------------------------------------------------------ -template<> -uint32_t writeChipMark<TYPE_MCA>( ExtensibleChip * i_chip, - const MemRank & i_rank, - const MemMark & i_mark ) +template<TARGETING::TYPE T> +uint32_t writeChipMark( ExtensibleChip * i_chip, const MemRank & i_rank, + const MemMark & i_mark ) { - #define PRDF_FUNC "[writeChipMark<TYPE_MCA>] " + #define PRDF_FUNC "[writeChipMark<T>] " PRDF_ASSERT( i_mark.isValid() ); @@ -153,13 +164,21 @@ uint32_t writeChipMark<TYPE_MCA>( ExtensibleChip * i_chip, #undef PRDF_FUNC } +template +uint32_t writeChipMark<TYPE_MCA>( ExtensibleChip * i_chip, + const MemRank & i_rank, + const MemMark & i_mark ); +template +uint32_t writeChipMark<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemRank & i_rank, + const MemMark & i_mark ); + //------------------------------------------------------------------------------ -template<> -uint32_t clearChipMark<TYPE_MCA>( ExtensibleChip * i_chip, - const MemRank & i_rank ) +template<TARGETING::TYPE T> +uint32_t clearChipMark( ExtensibleChip * i_chip, const MemRank & i_rank ) { - #define PRDF_FUNC "[clearChipMark<TYPE_MCA>] " + #define PRDF_FUNC "[clearChipMark<T>] " uint32_t o_rc = SUCCESS; @@ -185,13 +204,20 @@ uint32_t clearChipMark<TYPE_MCA>( ExtensibleChip * i_chip, #undef PRDF_FUNC } +template +uint32_t clearChipMark<TYPE_MCA>( ExtensibleChip * i_chip, + const MemRank & i_rank ); +template +uint32_t clearChipMark<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemRank & i_rank ); + //------------------------------------------------------------------------------ -template<> -uint32_t readSymbolMark<TYPE_MCA>( ExtensibleChip * i_chip, - const MemRank & i_rank, MemMark & o_mark ) +template<TARGETING::TYPE T> +uint32_t readSymbolMark( ExtensibleChip * i_chip, + const MemRank & i_rank, MemMark & o_mark ) { - #define PRDF_FUNC "[readSymbolMark<TYPE_MCA>] " + #define PRDF_FUNC "[readSymbolMark<T>] " uint32_t o_rc = SUCCESS; o_mark = MemMark(); // ensure invalid @@ -247,14 +273,21 @@ uint32_t readSymbolMark<TYPE_MCA>( ExtensibleChip * i_chip, #undef PRDF_FUNC } +template +uint32_t readSymbolMark<TYPE_MCA>( ExtensibleChip * i_chip, + const MemRank & i_rank, MemMark & o_mark ); +template +uint32_t readSymbolMark<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemRank & i_rank, + MemMark & o_mark ); + //------------------------------------------------------------------------------ -template<> -uint32_t writeSymbolMark<TYPE_MCA>( ExtensibleChip * i_chip, - const MemRank & i_rank, - const MemMark & i_mark ) +template<TARGETING::TYPE T> +uint32_t writeSymbolMark( ExtensibleChip * i_chip, const MemRank & i_rank, + const MemMark & i_mark ) { - #define PRDF_FUNC "[writeSymbolMark<TYPE_MCA>] " + #define PRDF_FUNC "[writeSymbolMark<T>] " PRDF_ASSERT( i_mark.isValid() ); @@ -294,36 +327,47 @@ uint32_t writeSymbolMark<TYPE_MCA>( ExtensibleChip * i_chip, msName, i_chip->getHuid() ); } - // Nimbus symbol mark performance workaround - // When a symbol mark is placed at runtime - #ifdef __HOSTBOOT_RUNTIME + // Nimbus only symbol mark performance workaround + if ( T == TYPE_MCA ) + { + // When a symbol mark is placed at runtime + #ifdef __HOSTBOOT_RUNTIME - // Trigger WAT logic to 'disable bypass' - // Get the ECC Debug/WAT Control register - SCAN_COMM_REGISTER_CLASS * dbgr = i_chip->getRegister( "DBGR" ); + // Trigger WAT logic to 'disable bypass' + // Get the ECC Debug/WAT Control register + SCAN_COMM_REGISTER_CLASS * dbgr = i_chip->getRegister( "DBGR" ); - // Set DBGR[8] = 0b1 - dbgr->SetBit( 8 ); - o_rc = dbgr->Write(); - if ( SUCCESS != o_rc ) - { - PRDF_ERR( PRDF_FUNC "Write() failed on DBGR: mca=0x%08x", - i_chip->getHuid() ); + // Set DBGR[8] = 0b1 + dbgr->SetBit( 8 ); + o_rc = dbgr->Write(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "Write() failed on DBGR: mca=0x%08x", + i_chip->getHuid() ); + } + #endif } - #endif return o_rc; #undef PRDF_FUNC } +template +uint32_t writeSymbolMark<TYPE_MCA>( ExtensibleChip * i_chip, + const MemRank & i_rank, + const MemMark & i_mark ); +template +uint32_t writeSymbolMark<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemRank & i_rank, + const MemMark & i_mark ); + //------------------------------------------------------------------------------ -template<> -uint32_t clearSymbolMark<TYPE_MCA>( ExtensibleChip * i_chip, - const MemRank & i_rank ) +template<TARGETING::TYPE T> +uint32_t clearSymbolMark( ExtensibleChip * i_chip, const MemRank & i_rank ) { - #define PRDF_FUNC "[clearSymbolMark<TYPE_MCA>] " + #define PRDF_FUNC "[clearSymbolMark<T>] " uint32_t o_rc = SUCCESS; @@ -349,6 +393,13 @@ uint32_t clearSymbolMark<TYPE_MCA>( ExtensibleChip * i_chip, #undef PRDF_FUNC } +template +uint32_t clearSymbolMark<TYPE_MCA>( ExtensibleChip * i_chip, + const MemRank & i_rank ); +template +uint32_t clearSymbolMark<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemRank & i_rank ); + //############################################################################## // Utilities to read/write markstore (MBA) //############################################################################## @@ -958,7 +1009,7 @@ void __addCallout( ExtensibleChip * i_chip, const MemRank & i_rank, //------------------------------------------------------------------------------ template<TARGETING::TYPE T> -uint32_t __addRowRepairCallout( ExtensibleChip * i_chip, +uint32_t __addRowRepairCallout( TargetHandle_t i_trgt, const MemRank & i_rank, STEP_CODE_DATA_STRUCT & io_sc ) { @@ -967,7 +1018,7 @@ uint32_t __addRowRepairCallout( ExtensibleChip * i_chip, uint32_t o_rc = SUCCESS; // Get the dimms on this rank on either port. - TargetHandleList dimmList = getConnectedDimms( i_chip->getTrgt(), i_rank ); + TargetHandleList dimmList = getConnectedDimms( i_trgt, i_rank ); // Check for row repairs on each dimm. for ( auto const & dimm : dimmList ) @@ -1073,8 +1124,8 @@ uint32_t __applyRasPolicies<TYPE_MBA>( ExtensibleChip * i_chip, __addCallout( i_chip, i_rank, ecc, io_sc ); // Add the row repairs to the callout list if they exist - o_rc = __addRowRepairCallout<TARGETING::TYPE_MBA>( i_chip, i_rank, - io_sc ); + o_rc = __addRowRepairCallout<TARGETING::TYPE_MBA>( + i_chip->getTrgt(), i_rank, io_sc ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "__addRowRepairCallout(0x%08x,0x%02x) " @@ -1136,6 +1187,125 @@ uint32_t __applyRasPolicies<TYPE_MBA>( ExtensibleChip * i_chip, #undef PRDF_FUNC } +template<> +uint32_t __applyRasPolicies<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemRank & i_rank, + STEP_CODE_DATA_STRUCT & io_sc, + const MemMark & i_chipMark, + const MemMark & i_symMark, + TdEntry * & o_dsdEvent, + bool & o_allRepairsUsed ) +{ + #define PRDF_FUNC "[__applyRasPolicies<TYPE_OCMB_CHIP>] " + + uint32_t o_rc = SUCCESS; + + do + { + const uint8_t ps = i_chipMark.getSymbol().getPortSlct(); + const uint8_t dram = i_chipMark.getSymbol().getDram(); + + TargetHandle_t memPort = getConnectedChild( i_chip->getTrgt(), + TYPE_MEM_PORT, ps ); + + TargetHandle_t dimmTrgt = getConnectedDimm( memPort, i_rank, ps ); + + const bool isX4 = isDramWidthX4( dimmTrgt ); + + // Determine if DRAM sparing is enabled. + bool isEnabled = false; + o_rc = isDramSparingEnabled<TYPE_MEM_PORT>( memPort, i_rank, ps, + isEnabled ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "isDramSparingEnabled() failed." ); + break; + } + + if ( isEnabled ) + { + // Sparing is enabled. Get the current spares in hardware. + MemSymbol sp0, sp1, ecc; + o_rc = mssGetSteerMux<TARGETING::TYPE_OCMB_CHIP>( i_chip->getTrgt(), + i_rank, sp0, sp1, + ecc ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "mssGetSteerMux(0x%08x,0x%02x) failed", + i_chip->getHuid(), i_rank.getKey() ); + break; + } + + // Add the spares to the callout list if they exist. + __addCallout( i_chip, i_rank, sp0, io_sc ); + __addCallout( i_chip, i_rank, sp1, io_sc ); + __addCallout( i_chip, i_rank, ecc, io_sc ); + + // Add the row repairs to the callout list if they exist + o_rc = __addRowRepairCallout<TARGETING::TYPE_OCMB_CHIP>( memPort, + i_rank, + io_sc ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "__addRowRepairCallout(0x%08x,0x%02x) " + "failed.", i_chip->getHuid(), i_rank.getKey() ); + break; + } + + // If the chip mark is on a spare then the spare is bad and hardware + // can not steer it to another DRAM even if one is available (e.g. + // the ECC spare). In this this case, make error log predictive. + if ( ( (0 == ps) && sp0.isValid() && (dram == sp0.getDram()) ) || + ( (1 == ps) && sp1.isValid() && (dram == sp1.getDram()) ) || + ( isX4 && ecc.isValid() && (dram == ecc.getDram()) ) ) + { + o_allRepairsUsed = true; + io_sc.service_data->setSignature( i_chip->getHuid(), + PRDFSIG_VcmBadSpare ); + break; // Nothing more to do. + } + + // Certain DIMMs may have had spares intentially made unavailable by + // the manufacturer. Check the VPD for available spares. + bool spAvail, eccAvail; + o_rc = isSpareAvailable<TYPE_MEM_PORT>( memPort, i_rank, + ps, spAvail, eccAvail ); + if ( spAvail ) + { + // A spare DRAM is available. + o_dsdEvent = new DsdEvent<TYPE_OCMB_CHIP>{ i_chip, i_rank, + i_chipMark }; + } + else if ( eccAvail ) + { + // The ECC spare is available. + o_dsdEvent = new DsdEvent<TYPE_OCMB_CHIP>{ i_chip, i_rank, + i_chipMark, true }; + } + else + { + // Chip mark is in place and sparing is not possible. + o_allRepairsUsed = true; + io_sc.service_data->setSignature( i_chip->getHuid(), + PRDFSIG_AllDramRepairs ); + } + } + // There is no DRAM sparing so simply check if both the chip and symbol + // mark have been used. + else if ( i_chipMark.isValid() && i_symMark.isValid() ) + { + o_allRepairsUsed = true; + io_sc.service_data->setSignature( i_chip->getHuid(), + PRDFSIG_AllDramRepairs ); + } + + } while (0); + + return o_rc; + + #undef PRDF_FUNC +} + //------------------------------------------------------------------------------ template<TARGETING::TYPE T> @@ -1220,6 +1390,9 @@ uint32_t applyRasPolicies( ExtensibleChip * i_chip, const MemRank & i_rank, { io_sc.service_data->setServiceCall(); + // We want to try to avoid garding NVDIMMs, so clear gard for them now. + io_sc.service_data->clearNvdimmMruListGard(); + #ifdef __HOSTBOOT_RUNTIME // No more repairs left so no point doing any more TPS procedures. MemDbUtils::banTps<T>( i_chip, i_rank ); @@ -1241,6 +1414,11 @@ uint32_t applyRasPolicies<TYPE_MBA>( ExtensibleChip * i_chip, const MemRank & i_rank, STEP_CODE_DATA_STRUCT & io_sc, TdEntry * & o_dsdEvent ); +template +uint32_t applyRasPolicies<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemRank & i_rank, + STEP_CODE_DATA_STRUCT & io_sc, + TdEntry * & o_dsdEvent ); //------------------------------------------------------------------------------ @@ -1290,7 +1468,8 @@ uint32_t chipMarkCleanup( ExtensibleChip * i_chip, const MemRank & i_rank, // Set the chip mark in the DRAM Repairs VPD. if ( !areDramRepairsDisabled() ) { - o_rc = setDramInVpd( i_chip, i_rank, chipMark.getSymbol() ); + o_rc = setDramInVpd( i_chip->getTrgt(), i_rank, + chipMark.getSymbol() ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "setDramInVpd(0x%08x,0x%02x) failed", @@ -1314,6 +1493,10 @@ template uint32_t chipMarkCleanup<TYPE_MBA>( ExtensibleChip * i_chip, const MemRank & i_rank, STEP_CODE_DATA_STRUCT & io_sc ); +template +uint32_t chipMarkCleanup<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemRank & i_rank, + STEP_CODE_DATA_STRUCT & io_sc ); #endif // not supported on FSP diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemMark.H b/src/usr/diag/prdf/common/plat/mem/prdfMemMark.H index 2cd28b8dd..86ffa1dc9 100644 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemMark.H +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemMark.H @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2016,2018 */ +/* Contributors Listed Below - COPYRIGHT 2016,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -57,7 +57,7 @@ class MemMark /** * @brief Constructor from components. - * @param i_trgt MBA or MCA target. + * @param i_trgt MBA, MCA, or OCMB target. * @param i_rank The rank this mark is on. * @param i_galois The Galois field. */ @@ -68,7 +68,7 @@ class MemMark /** * @brief Constructor from components. - * @param i_trgt MBA or MCA target. + * @param i_trgt MBA, MCA, or OCMB target. * @param i_rank The rank this mark is on. * @param i_symbol The symbol representing this mark. */ @@ -112,7 +112,7 @@ namespace MarkStore /** * @brief Reads markstore and returns the chip mark for the given rank. - * @param i_chip MBA or MCA chip. + * @param i_chip MBA, MCA, or OCMB chip. * @param i_rank Target rank. * @param o_mark The returned chip mark. * @return Non-SUCCESS if an internal function fails. SUCCESS otherwise. @@ -123,7 +123,7 @@ uint32_t readChipMark( ExtensibleChip * i_chip, const MemRank & i_rank, /** * @brief Writes a chip mark into markstore for the given rank. - * @param i_chip MBA or MCA chip. + * @param i_chip MBA, MCA, or OCMB chip. * @param i_rank Target rank. * @param i_mark Target chip mark. * @return Non-SUCCESS if an internal function fails. SUCCESS otherwise. @@ -134,7 +134,7 @@ uint32_t writeChipMark( ExtensibleChip * i_chip, const MemRank & i_rank, /** * @brief Clear chip mark in markstore for the given rank. - * @param i_chip MBA or MCA chip. + * @param i_chip MBA, MCA, or OCMB chip. * @param i_rank Target rank. * @return Non-SUCCESS if an internal function fails. SUCCESS otherwise. */ @@ -143,7 +143,7 @@ uint32_t clearChipMark( ExtensibleChip * i_chip, const MemRank & i_rank ); /** * @brief Reads markstore and returns the symbol mark for the given rank. - * @param i_chip MBA or MCA chip. + * @param i_chip MBA, MCA. or OCMB chip. * @param i_rank Target rank. * @param o_mark The returned symbol mark. * @return Non-SUCCESS if an internal function fails. SUCCESS otherwise. @@ -154,7 +154,7 @@ uint32_t readSymbolMark( ExtensibleChip * i_chip, const MemRank & i_rank, /** * @brief Writes a symbol mark into markstore for the given rank. - * @param i_chip MBA or MCA chip. + * @param i_chip MBA, MCA, or OCMB chip. * @param i_rank Target rank. * @param i_mark Target symbol mark. * @return Non-SUCCESS if an internal function fails. SUCCESS otherwise. @@ -165,7 +165,7 @@ uint32_t writeSymbolMark( ExtensibleChip * i_chip, const MemRank & i_rank, /** * @brief Clear symbol mark in markstore for the given rank. - * @param i_chip MBA or MCA chip. + * @param i_chip MBA, MCA. or OCMB chip. * @param i_rank Target rank. * @return Non-SUCCESS if an internal function fails. SUCCESS otherwise. */ @@ -187,7 +187,7 @@ uint32_t clearSymbolMark( ExtensibleChip * i_chip, const MemRank & i_rank ); * repairs have been used. * - Returns a new DsdEvent if DRAM sparing is available. * - * @param i_chip MBA or MCA chip. + * @param i_chip MBA, MCA, or OCMB chip. * @param i_rank Target rank. * @param io_sc The step code data struct. * @param o_dsdEvent A new DsdEvent if DRAM sparing is available. Otherwise, @@ -211,7 +211,7 @@ uint32_t applyRasPolicies( ExtensibleChip * i_chip, const MemRank & i_rank, * - Sets the DRAM in the DRAM Repair VPD if DRAM repairs. * - Adds a DSD procedure to the TD queue if a DRAM spare is available * - * @param i_chip MBA or MCA chip. + * @param i_chip MBA, MCA, or OCMB chip. * @param i_rank Target rank. * @param io_sc The step code data struct. * @return Non-SUCCESS if an internal function fails. SUCCESS otherwise. diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemRowRepair.C b/src/usr/diag/prdf/common/plat/mem/prdfMemRowRepair.C index 8ebe6cea8..3ff6cd099 100644 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemRowRepair.C +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemRowRepair.C @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2018 */ +/* Contributors Listed Below - COPYRIGHT 2018,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -126,6 +126,22 @@ uint32_t getRowRepairData<TYPE_MCA>( TargetHandle_t i_dimm, o_rowRepair ); } +template<> +uint32_t getRowRepairData<TYPE_MEM_PORT>( TargetHandle_t i_dimm, + const MemRank & i_rank, MemRowRepair & o_rowRepair ) +{ + return __getRowRepairData<TYPE_MEM_PORT, fapi2::TARGET_TYPE_MEM_PORT>( + i_dimm, i_rank, o_rowRepair ); +} + +template<> +uint32_t getRowRepairData<TYPE_OCMB_CHIP>( TargetHandle_t i_dimm, + const MemRank & i_rank, MemRowRepair & o_rowRepair ) +{ + return __getRowRepairData<TYPE_OCMB_CHIP, fapi2::TARGET_TYPE_OCMB_CHIP>( + i_dimm, i_rank, o_rowRepair ); +} + //------------------------------------------------------------------------------ template<TARGETING::TYPE T, fapi2::TargetType F> @@ -190,34 +206,19 @@ uint32_t setRowRepairData<TYPE_MCA>( TargetHandle_t i_dimm, i_rowRepair ); } -//------------------------------------------------------------------------------ - -template<TARGETING::TYPE T> -void __setRowRepairDataHelper( const MemAddr & i_addr, uint32_t & io_tmp ); - template<> -void __setRowRepairDataHelper<TYPE_MBA>( const MemAddr & i_addr, - uint32_t & io_tmp ) +uint32_t setRowRepairData<TYPE_OCMB_CHIP>( TargetHandle_t i_dimm, + const MemRank & i_rank, + const MemRowRepair & i_rowRepair ) { - #ifdef __HOSTBOOT_MODULE - - // Bank is stored as MBA "(DDR4): bg1-bg0,b1-b0 (4-bit)" in a MemAddr. - // bank group - 2 bits (bg1-bg0) - io_tmp = ( io_tmp << 2 ) | ( (i_addr.getBank() >> 2) & 0x03 ); - - // bank - 3 bits (b2-b0) - io_tmp = ( io_tmp << 3 ) | ( i_addr.getBank() & 0x03 ); - - // Row is stored as "MBA: r17-r0 (18-bit)" in a MemAddr. - // row - 18 bits (r17-r0) - io_tmp = ( io_tmp << 18 ) | ( i_addr.getRow() & 0x0003ffff ); - - #endif // __HOSTBOOT_MODULE + return __setRowRepairData<TYPE_OCMB_CHIP, fapi2::TARGET_TYPE_OCMB_CHIP>( + i_dimm, i_rank, i_rowRepair ); } -template<> -void __setRowRepairDataHelper<TYPE_MCA>( const MemAddr & i_addr, - uint32_t & io_tmp ) +//------------------------------------------------------------------------------ + +template<TARGETING::TYPE T> +void __setRowRepairDataHelper( const MemAddr & i_addr, uint32_t & io_tmp ) { #ifdef __HOSTBOOT_MODULE @@ -242,6 +243,32 @@ void __setRowRepairDataHelper<TYPE_MCA>( const MemAddr & i_addr, #endif // __HOSTBOOT_MODULE } +template +void __setRowRepairDataHelper<TYPE_MCA>( const MemAddr & i_addr, + uint32_t & io_tmp ); +template +void __setRowRepairDataHelper<TYPE_OCMB_CHIP>( const MemAddr & i_addr, + uint32_t & io_tmp ); + +template<> +void __setRowRepairDataHelper<TYPE_MBA>( const MemAddr & i_addr, + uint32_t & io_tmp ) +{ + #ifdef __HOSTBOOT_MODULE + + // Bank is stored as MBA "(DDR4): bg1-bg0,b1-b0 (4-bit)" in a MemAddr. + // bank group - 2 bits (bg1-bg0) + io_tmp = ( io_tmp << 2 ) | ( (i_addr.getBank() >> 2) & 0x03 ); + + // bank - 3 bits (b2-b0) + io_tmp = ( io_tmp << 3 ) | ( i_addr.getBank() & 0x03 ); + + // Row is stored as "MBA: r17-r0 (18-bit)" in a MemAddr. + // row - 18 bits (r17-r0) + io_tmp = ( io_tmp << 18 ) | ( i_addr.getRow() & 0x0003ffff ); + + #endif // __HOSTBOOT_MODULE +} //------------------------------------------------------------------------------ @@ -297,7 +324,7 @@ uint32_t setRowRepairData( TargetHandle_t i_dimm, MemRowRepair l_rowRepair( i_dimm, i_rank, l_data ); - o_rc = setRowRepairData<TYPE_MBA>( i_dimm, i_rank, l_rowRepair ); + o_rc = setRowRepairData<T>( i_dimm, i_rank, l_rowRepair ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "setRowRepairData() failed" ); @@ -323,6 +350,11 @@ uint32_t setRowRepairData<TYPE_MCA>( TargetHandle_t i_dimm, const MemRank & i_rank, const MemAddr & i_addr, uint8_t i_dram ); +template +uint32_t setRowRepairData<TYPE_OCMB_CHIP>( TargetHandle_t i_dimm, + const MemRank & i_rank, + const MemAddr & i_addr, + uint8_t i_dram ); //------------------------------------------------------------------------------ @@ -362,6 +394,9 @@ uint32_t clearRowRepairData<TYPE_MBA>( TargetHandle_t i_dimm, template uint32_t clearRowRepairData<TYPE_MCA>( TargetHandle_t i_dimm, const MemRank & i_rank ); +template +uint32_t clearRowRepairData<TYPE_OCMB_CHIP>( TargetHandle_t i_dimm, + const MemRank & i_rank ); //------------------------------------------------------------------------------ diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemSymbol.C b/src/usr/diag/prdf/common/plat/mem/prdfMemSymbol.C index 561c11dda..d58d6a177 100755 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemSymbol.C +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemSymbol.C @@ -53,7 +53,7 @@ MemSymbol::MemSymbol( TARGETING::TargetHandle_t i_trgt, const MemRank & i_rank, PRDF_ASSERT( nullptr != i_trgt ); PRDF_ASSERT( TYPE_MBA == getTargetType(i_trgt) || TYPE_MCA == getTargetType(i_trgt) || - TYPE_MEM_PORT == getTargetType(i_trgt) ); + TYPE_OCMB_CHIP == getTargetType(i_trgt) ); // Allowing an invalid symbol. Use isValid() to check validity. PRDF_ASSERT( i_pins <= CEN_SYMBOL::BOTH_SYMBOL_DQS ); } @@ -83,9 +83,9 @@ MemSymbol MemSymbol::fromGalois( TargetHandle_t i_trgt, const MemRank & i_rank, if ( 0 != (i_mask & 0xaa) ) pins |= EVEN_SYMBOL_DQ; if ( 0 != (i_mask & 0x55) ) pins |= ODD_SYMBOL_DQ; } - else if ( TYPE_MCA == trgtType || TYPE_MEM_PORT == trgtType ) + else if ( TYPE_MCA == trgtType || TYPE_OCMB_CHIP == trgtType ) { - // 1 pin for MCA/MEM_PORT. + // 1 pin for MCA/TYPE_OCMB_CHIP. if ( 0 != (i_mask & 0xff) ) pins |= ODD_SYMBOL_DQ; } else @@ -112,9 +112,9 @@ uint8_t MemSymbol::getDq() const { dq = symbol2Dq<TYPE_MCA>( iv_symbol ); } - else if ( TYPE_MEM_PORT == trgtType ) + else if ( TYPE_OCMB_CHIP == trgtType ) { - dq = symbol2Dq<TYPE_MEM_PORT>( iv_symbol ); + dq = symbol2Dq<TYPE_OCMB_CHIP>( iv_symbol ); } else { @@ -140,9 +140,9 @@ uint8_t MemSymbol::getPortSlct() const { portSlct = symbol2PortSlct<TYPE_MCA>( iv_symbol ); } - else if ( TYPE_MEM_PORT == trgtType ) + else if ( TYPE_OCMB_CHIP == trgtType ) { - portSlct = symbol2PortSlct<TYPE_MEM_PORT>( iv_symbol ); + portSlct = symbol2PortSlct<TYPE_OCMB_CHIP>( iv_symbol ); } else { @@ -159,22 +159,26 @@ uint8_t MemSymbol::getDram() const { uint8_t dram = 0; TYPE trgtType = getTargetType( iv_trgt ); - bool isX4 = isDramWidthX4( iv_trgt ); + bool isX4 = true; if ( TYPE_MBA == trgtType ) { + isX4 = isDramWidthX4( iv_trgt ); dram = isX4 ? symbol2Nibble<TYPE_MBA>( iv_symbol ) : symbol2Byte <TYPE_MBA>( iv_symbol ); } else if ( TYPE_MCA == trgtType ) { + isX4 = isDramWidthX4( iv_trgt ); dram = isX4 ? symbol2Nibble<TYPE_MCA>( iv_symbol ) : symbol2Byte <TYPE_MCA>( iv_symbol ); } - else if ( TYPE_MEM_PORT == trgtType ) + else if ( TYPE_OCMB_CHIP == trgtType ) { - dram = isX4 ? symbol2Nibble<TYPE_MEM_PORT>( iv_symbol ) - : symbol2Byte <TYPE_MEM_PORT>( iv_symbol ); + TargetHandle_t dimm = getConnectedDimm(iv_trgt, iv_rank, getPortSlct()); + isX4 = isDramWidthX4( dimm ); + dram = isX4 ? symbol2Nibble<TYPE_OCMB_CHIP>( iv_symbol ) + : symbol2Byte <TYPE_OCMB_CHIP>( iv_symbol ); } else { @@ -200,14 +204,24 @@ uint8_t MemSymbol::getDramRelCenDqs() const const uint8_t X4_DRAM_SPARE_UPPER = 19; const uint8_t X8_DRAM_SPARE = 9; + bool isX4 = true; + if ( TYPE_OCMB_CHIP == getTargetType(iv_trgt) ) + { + TargetHandle_t dimm = getConnectedDimm(iv_trgt, iv_rank, getPortSlct()); + isX4 = isDramWidthX4( dimm ); + } + else + { + isX4 = isDramWidthX4( iv_trgt ); + } - uint8_t l_dramWidth = ( isDramWidthX4(iv_trgt) ) ? 4 : 8; + uint8_t l_dramWidth = ( isX4 ) ? 4 : 8; uint8_t l_dram = getDq() / l_dramWidth; // (x8: 0-9, x4: 0-19) // Adjust for spares if ( isDramSpared() ) { - if ( isDramWidthX4(iv_trgt) ) + if ( isX4 ) { uint8_t l_bit = getDq() % DQS_PER_BYTE; l_dram = ( l_bit < 4 ) ? X4_DRAM_SPARE_LOWER : X4_DRAM_SPARE_UPPER; @@ -219,7 +233,7 @@ uint8_t MemSymbol::getDramRelCenDqs() const } else if ( isEccSpared() ) { - l_dram = ( isDramWidthX4(iv_trgt) ) ? X4_ECC_SPARE : X8_ECC_SPARE; + l_dram = ( isX4 ) ? X4_ECC_SPARE : X8_ECC_SPARE; } return l_dram; @@ -231,7 +245,16 @@ uint8_t MemSymbol::getDramRelCenDqs() const uint8_t MemSymbol::getDramPins() const { TYPE trgtType = getTargetType( iv_trgt ); - bool isX4 = isDramWidthX4( iv_trgt ); + bool isX4 = true; + if ( TYPE_OCMB_CHIP == trgtType ) + { + TargetHandle_t dimm = getConnectedDimm(iv_trgt, iv_rank, getPortSlct()); + isX4 = isDramWidthX4( dimm ); + } + else + { + isX4 = isDramWidthX4( iv_trgt ); + } uint32_t dps = 0; uint32_t spd = 0; @@ -241,7 +264,7 @@ uint8_t MemSymbol::getDramPins() const dps = MBA_DQS_PER_SYMBOL; spd = isX4 ? MBA_SYMBOLS_PER_NIBBLE : MBA_SYMBOLS_PER_BYTE; } - else if ( TYPE_MCA == trgtType || TYPE_MEM_PORT == trgtType ) + else if ( TYPE_MCA == trgtType || TYPE_OCMB_CHIP == trgtType ) { dps = MEM_DQS_PER_SYMBOL; spd = isX4 ? MEM_SYMBOLS_PER_NIBBLE : MEM_SYMBOLS_PER_BYTE; @@ -261,7 +284,16 @@ uint8_t MemSymbol::getDramSymbol() const { uint8_t dramSymbol = SYMBOLS_PER_RANK; TYPE trgtType = getTargetType( iv_trgt ); - bool isX4 = isDramWidthX4( iv_trgt ); + bool isX4 = true; + if ( TYPE_OCMB_CHIP == trgtType ) + { + TargetHandle_t dimm = getConnectedDimm(iv_trgt, iv_rank, getPortSlct()); + isX4 = isDramWidthX4( dimm ); + } + else + { + isX4 = isDramWidthX4( iv_trgt ); + } uint8_t dram = getDram(); if ( TYPE_MBA == trgtType ) @@ -274,10 +306,10 @@ uint8_t MemSymbol::getDramSymbol() const dramSymbol = isX4 ? nibble2Symbol<TYPE_MCA>( dram ) : byte2Symbol <TYPE_MCA>( dram ); } - else if ( TYPE_MEM_PORT == trgtType ) + else if ( TYPE_OCMB_CHIP == trgtType ) { - dramSymbol = isX4 ? nibble2Symbol<TYPE_MEM_PORT>( dram ) - : byte2Symbol <TYPE_MEM_PORT>( dram ); + dramSymbol = isX4 ? nibble2Symbol<TYPE_OCMB_CHIP>( dram ) + : byte2Symbol <TYPE_OCMB_CHIP>( dram ); } else { @@ -435,16 +467,16 @@ uint32_t getMemReadSymbol<TYPE_MBA>( ExtensibleChip * i_chip, //------------------------------------------------------------------------------ template<> -uint32_t getMemReadSymbol<TYPE_MEM_PORT>( ExtensibleChip * i_chip, - const MemRank & i_rank, - MemSymbol & o_sym1, - MemSymbol & o_sym2 ) +uint32_t getMemReadSymbol<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemRank & i_rank, + MemSymbol & o_sym1, + MemSymbol & o_sym2 ) { - #define PRDF_FUNC "[getMemReadSymbol<TYPE_MEM_PORT>] " + #define PRDF_FUNC "[getMemReadSymbol<TYPE_OCMB_CHIP>] " // Check parameters PRDF_ASSERT( nullptr != i_chip ); - PRDF_ASSERT( TYPE_MEM_PORT == i_chip->getType() ); + PRDF_ASSERT( TYPE_OCMB_CHIP == i_chip->getType() ); uint32_t o_rc = SUCCESS; @@ -453,14 +485,12 @@ uint32_t getMemReadSymbol<TYPE_MEM_PORT>( ExtensibleChip * i_chip, do { // Get the NCE/TCE galois and mask from hardware. - ExtensibleChip * ocmbChip = getConnectedParent(i_chip, TYPE_OCMB_CHIP); - - SCAN_COMM_REGISTER_CLASS * reg = ocmbChip->getRegister("MBSEVR0"); + SCAN_COMM_REGISTER_CLASS * reg = i_chip->getRegister("MBSEVR0"); o_rc = reg->Read(); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "Read() failed on MBSEVR0: " - "ocmbChip=0x%08x", ocmbChip->getHuid() ); + "i_chip=0x%08x", i_chip->getHuid() ); break; } @@ -480,8 +510,8 @@ uint32_t getMemReadSymbol<TYPE_MEM_PORT>( ExtensibleChip * i_chip, tceGalois, tceMask ); MemSymbol sp0, sp1, ecc; - o_rc = mssGetSteerMux<TYPE_MEM_PORT>( i_chip->getTrgt(), i_rank, - sp0, sp1, ecc ); + o_rc = mssGetSteerMux<TYPE_OCMB_CHIP>( i_chip->getTrgt(), i_rank, + sp0, sp1, ecc ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "mssGetSteerMux() failed. HUID: 0x%08x " diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemSymbol.H b/src/usr/diag/prdf/common/plat/mem/prdfMemSymbol.H index c16972fd8..00b0c7cfd 100755 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemSymbol.H +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemSymbol.H @@ -79,7 +79,7 @@ class MemSymbol /** * @brief Creates a MemSymbol from a symbol. - * @param i_trgt MBA, MCA, or MEM_PORT target. + * @param i_trgt MBA, MCA, or OCMB_CHIP target. * @param i_rank The rank this symbol is on. * @param i_symbol The input symbol. * @param i_pins See enum DqMask. @@ -95,7 +95,7 @@ class MemSymbol /** * @brief Creates a MemSymbol from a Galois field. - * @param i_trgt MBA, MCA, or MEM_PORT target. + * @param i_trgt MBA, MCA, or OCMB_CHIP target. * @param i_rank The rank this symbol is on. * @param i_galois The Galois field. * @param i_mask The bit mask. @@ -122,7 +122,7 @@ class MemSymbol MemRank getRank() const { return iv_rank; }; /** @return The port select for this symbol. Only relevant on MBA. Will - * always return 0 for MCA and MEM_PORT. */ + * always return 0 for MCA and OCMB. */ uint8_t getPortSlct() const; /** @return The DRAM index for this symbol. */ @@ -218,7 +218,7 @@ class MemSymbol /** * @brief Reads the memory NCE/TCE vector trap register from hardware. - * @param i_chip MCA, MBA, or MEM_PORT. + * @param i_chip MCA, MBA, or OCMB_CHIP. * @param i_rank The rank this symbol is on. * @param o_sym1 The first symbol. Should always be valid for both NCE/TCE. * @param o_sym2 The second symbol. Only valid for TCEs. diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemThresholds.C b/src/usr/diag/prdf/common/plat/mem/prdfMemThresholds.C index f6403f219..f9c73b739 100755 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemThresholds.C +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemThresholds.C @@ -173,7 +173,8 @@ void getMnfgMemCeTh( ExtensibleChip * i_chip, const MemRank & i_rank, else { // Get DRAM size - uint8_t size = MemUtils::getDramSize<T>( i_chip, i_rank.getDimmSlct() ); + uint8_t size = MemUtils::getDramSize<T>( i_chip->getTrgt(), + i_rank.getDimmSlct() ); // Get number of ranks per DIMM select. uint8_t rankCount = getNumRanksPerDimm<T>( i_chip->getTrgt(), @@ -209,7 +210,7 @@ void getMnfgMemCeTh<TYPE_MBA>( ExtensibleChip * i_chip, const MemRank & i_rank, uint32_t & o_cePerDimm ); template -void getMnfgMemCeTh<TYPE_MEM_PORT>( ExtensibleChip * i_chip, +void getMnfgMemCeTh<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, const MemRank & i_rank, uint32_t & o_cePerDram, uint32_t & o_cePerRank, uint32_t & o_cePerDimm ); @@ -236,14 +237,8 @@ uint32_t getScrubCeThreshold( ExtensibleChip * i_chip, const MemRank & i_rank ) // need these templates to avoid linker errors template -uint32_t getScrubCeThreshold<TYPE_MCA>( ExtensibleChip * i_chip, - const MemRank & i_rank ); -template uint32_t getScrubCeThreshold<TYPE_MBA>( ExtensibleChip * i_chip, const MemRank & i_rank ); -template -uint32_t getScrubCeThreshold<TYPE_MEM_PORT>( ExtensibleChip * i_chip, - const MemRank & i_rank ); } // end namespace PRDF diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemUtils.C b/src/usr/diag/prdf/common/plat/mem/prdfMemUtils.C index 744e55e69..64677f1ae 100755 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemUtils.C +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemUtils.C @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2013,2019 */ +/* Contributors Listed Below - COPYRIGHT 2013,2020 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -31,11 +31,14 @@ // Framework includes #include <iipServiceDataCollector.h> +#include <iipSystem.h> #include <prdfExtensibleChip.H> +#include <prdfGlobal_common.H> #include <UtilHash.H> // Platform includes #include <prdfCenMbaDataBundle.H> +#include <prdfOcmbDataBundle.H> #include <prdfCenMembufDataBundle.H> #include <prdfCenMembufExtraSig.H> #include <prdfMemSymbol.H> @@ -224,12 +227,12 @@ int32_t collectCeStats<TYPE_MCA>( ExtensibleChip * i_chip, //------------------------------------------------------------------------------ template<> -int32_t collectCeStats<TYPE_MEM_PORT>( ExtensibleChip * i_chip, - const MemRank & i_rank, - MaintSymbols & o_maintStats, - MemSymbol & o_chipMark, uint8_t i_thr ) +int32_t collectCeStats<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemRank & i_rank, + MaintSymbols & o_maintStats, + MemSymbol & o_chipMark, uint8_t i_thr ) { - #define PRDF_FUNC "[MemUtils::collectCeStats<TYPE_MEM_PORT>] " + #define PRDF_FUNC "[MemUtils::collectCeStats<TYPE_OCMB_CHIP>] " int32_t o_rc = SUCCESS; o_chipMark = MemSymbol(); // Initially invalid. @@ -238,10 +241,13 @@ int32_t collectCeStats<TYPE_MEM_PORT>( ExtensibleChip * i_chip, { PRDF_ASSERT( 0 != i_thr ); - TargetHandle_t memPortTrgt = i_chip->getTrgt(); - ExtensibleChip * ocmbChip = getConnectedParent(i_chip, TYPE_OCMB_CHIP); + TargetHandle_t ocmbTrgt = i_chip->getTrgt(); - const bool isX4 = isDramWidthX4(memPortTrgt); + // TODO RTC 210072 - support for multiple ports + TargetHandle_t memPortTrgt = getConnectedChild( ocmbTrgt, + TYPE_MEM_PORT, 0 ); + TargetHandle_t dimm = getConnectedDimm( memPortTrgt, i_rank ); + const bool isX4 = isDramWidthX4( dimm ); // Use this map to keep track of the total counts per DRAM. DramCountMap dramCounts; @@ -252,7 +258,7 @@ int32_t collectCeStats<TYPE_MEM_PORT>( ExtensibleChip * i_chip, for ( uint8_t regIdx = 0; regIdx < CE_REGS_PER_PORT; regIdx++ ) { reg_str = ocmbCeStatReg[regIdx]; - reg = ocmbChip->getRegister( reg_str ); + reg = i_chip->getRegister( reg_str ); o_rc = reg->Read(); if ( SUCCESS != o_rc ) @@ -272,8 +278,8 @@ int32_t collectCeStats<TYPE_MEM_PORT>( ExtensibleChip * i_chip, uint8_t sym = baseSymbol + i; PRDF_ASSERT( sym < SYMBOLS_PER_RANK ); - uint8_t dram = isX4 ? symbol2Nibble<TYPE_MEM_PORT>( sym ) - : symbol2Byte <TYPE_MEM_PORT>( sym ); + uint8_t dram = isX4 ? symbol2Nibble<TYPE_OCMB_CHIP>( sym ) + : symbol2Byte <TYPE_OCMB_CHIP>( sym ); // Keep track of the total DRAM counts. dramCounts[dram].totalCount += count; @@ -286,7 +292,7 @@ int32_t collectCeStats<TYPE_MEM_PORT>( ExtensibleChip * i_chip, dramCounts[dram].symbolCount++; SymbolData symData; - symData.symbol = MemSymbol::fromSymbol( memPortTrgt, i_rank, + symData.symbol = MemSymbol::fromSymbol( ocmbTrgt, i_rank, sym, CEN_SYMBOL::ODD_SYMBOL_DQ ); if ( !symData.symbol.isValid() ) { @@ -329,11 +335,11 @@ int32_t collectCeStats<TYPE_MEM_PORT>( ExtensibleChip * i_chip, if ( 0 != highestCount ) { - uint8_t sym = isX4 ? nibble2Symbol<TYPE_MEM_PORT>( highestDram ) - : byte2Symbol <TYPE_MEM_PORT>( highestDram ); + uint8_t sym = isX4 ? nibble2Symbol<TYPE_OCMB_CHIP>( highestDram ) + : byte2Symbol <TYPE_OCMB_CHIP>( highestDram ); PRDF_ASSERT( sym < SYMBOLS_PER_RANK ); - o_chipMark = MemSymbol::fromSymbol( memPortTrgt, i_rank, sym ); + o_chipMark = MemSymbol::fromSymbol( ocmbTrgt, i_rank, sym ); } } while(0); @@ -514,19 +520,18 @@ int32_t collectCeStats<TYPE_MBA>( ExtensibleChip * i_chip, //------------------------------------------------------------------------------ template<> -uint8_t getDramSize<TYPE_MCA>(ExtensibleChip *i_chip, uint8_t i_dimmSlct) +uint8_t getDramSize<TYPE_MCA>( TargetHandle_t i_trgt, uint8_t i_dimmSlct ) { #define PRDF_FUNC "[MemUtils::getDramSize] " - PRDF_ASSERT( TYPE_MCA == i_chip->getType() ); + PRDF_ASSERT( TYPE_MCA == getTargetType(i_trgt) ); PRDF_ASSERT( i_dimmSlct < DIMM_SLCT_PER_PORT ); - TargetHandle_t mcaTrgt = i_chip->getTrgt(); - TargetHandle_t mcsTrgt = getConnectedParent( mcaTrgt, TYPE_MCS ); + TargetHandle_t mcsTrgt = getConnectedParent( i_trgt, TYPE_MCS ); PRDF_ASSERT( nullptr != mcsTrgt ); - uint8_t mcaRelPos = i_chip->getPos() % MAX_MCA_PER_MCS; + uint8_t mcaRelPos = getTargetPosition(i_trgt) % MAX_MCA_PER_MCS; uint8_t tmp[MAX_MCA_PER_MCS][DIMM_SLCT_PER_PORT]; @@ -542,19 +547,22 @@ uint8_t getDramSize<TYPE_MCA>(ExtensibleChip *i_chip, uint8_t i_dimmSlct) } template<> -uint8_t getDramSize<TYPE_MBA>(ExtensibleChip *i_chip, uint8_t i_dimmSlct) +uint8_t getDramSize<TYPE_MBA>( TargetHandle_t i_trgt, uint8_t i_dimmSlct ) { #define PRDF_FUNC "[MemUtils::getDramSize] " - PRDF_ASSERT( TYPE_MBA == i_chip->getType() ); + PRDF_ASSERT( TYPE_MBA == getTargetType(i_trgt) ); uint8_t o_size = 0; do { - ExtensibleChip * membufChip = getConnectedParent(i_chip, TYPE_MEMBUF); + TargetHandle_t membuf = getConnectedParent(i_trgt, TYPE_MEMBUF); + ExtensibleChip * membufChip = + (ExtensibleChip*)systemPtr->GetChip(membuf); + PRDF_ASSERT( nullptr != membufChip ); - uint32_t pos = i_chip->getPos(); + uint32_t pos = getTargetPosition(i_trgt); const char * reg_str = (0 == pos) ? "MBA0_MBAXCR" : "MBA1_MBAXCR"; SCAN_COMM_REGISTER_CLASS * reg = membufChip->getRegister( reg_str ); @@ -562,7 +570,7 @@ uint8_t getDramSize<TYPE_MBA>(ExtensibleChip *i_chip, uint8_t i_dimmSlct) if ( SUCCESS != rc ) { PRDF_ERR( PRDF_FUNC "Read() failed on %s. Target=0x%08x", - reg_str, i_chip->getHuid() ); + reg_str, getHuid(i_trgt) ); break; } @@ -579,18 +587,16 @@ uint8_t getDramSize<TYPE_MBA>(ExtensibleChip *i_chip, uint8_t i_dimmSlct) } template<> -uint8_t getDramSize<TYPE_MEM_PORT>(ExtensibleChip *i_chip, uint8_t i_dimmSlct) +uint8_t getDramSize<TYPE_MEM_PORT>( TargetHandle_t i_trgt, uint8_t i_dimmSlct ) { #define PRDF_FUNC "[MemUtils::getDramSize] " - PRDF_ASSERT( TYPE_MEM_PORT == i_chip->getType() ); + PRDF_ASSERT( TYPE_MEM_PORT == getTargetType(i_trgt) ); PRDF_ASSERT( i_dimmSlct < DIMM_SLCT_PER_PORT ); - TargetHandle_t memPortTrgt = i_chip->getTrgt(); - uint8_t tmp[DIMM_SLCT_PER_PORT]; - if ( !memPortTrgt->tryGetAttr<TARGETING::ATTR_MEM_EFF_DRAM_DENSITY>(tmp) ) + if ( !i_trgt->tryGetAttr<TARGETING::ATTR_MEM_EFF_DRAM_DENSITY>(tmp) ) { PRDF_ERR( PRDF_FUNC "Failed to get ATTR_MEM_EFF_DRAM_DENSITY" ); PRDF_ASSERT( false ); @@ -601,6 +607,25 @@ uint8_t getDramSize<TYPE_MEM_PORT>(ExtensibleChip *i_chip, uint8_t i_dimmSlct) #undef PRDF_FUNC } +template<> +uint8_t getDramSize<TYPE_OCMB_CHIP>( TargetHandle_t i_trgt, uint8_t i_dimmSlct ) +{ + #define PRDF_FUNC "[MemUtils::getDramSize] " + + PRDF_ASSERT( TYPE_OCMB_CHIP == getTargetType(i_trgt) ); + PRDF_ASSERT( i_dimmSlct < DIMM_SLCT_PER_PORT ); + + // TODO RTC 210072 - Explorer only has one port, however, multiple ports + // will be supported in the future. Updates will need to be made here so we + // can get the relevant port. + + TargetHandle_t memPort = getConnectedChild( i_trgt, TYPE_MEM_PORT, 0 ); + + return getDramSize<TYPE_MEM_PORT>( memPort, i_dimmSlct ); + + #undef PRDF_FUNC +} + //------------------------------------------------------------------------------ template<> @@ -639,6 +664,34 @@ void cleanupChnlAttns<TYPE_MEMBUF>( ExtensibleChip * i_chip, #undef PRDF_FUNC } +template<> +void cleanupChnlAttns<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + #define PRDF_FUNC "[MemUtils::cleanupChnlAttns] " + + PRDF_ASSERT( nullptr != i_chip ); + PRDF_ASSERT( TYPE_OCMB_CHIP == i_chip->getType() ); + + // No cleanup if this is a checkstop attention. + if ( CHECK_STOP == io_sc.service_data->getPrimaryAttnType() ) return; + + #ifdef __HOSTBOOT_MODULE // only do cleanup in Hostboot, no-op in FSP + + // Clear the associated FIR bits for all attention types. DSTLFIR[0:7] + ExtensibleChip * mcc = getConnectedParent( i_chip, TYPE_MCC ); + + SCAN_COMM_REGISTER_CLASS * reg = mcc->getRegister( "DSTLFIR_AND" ); + + reg->setAllBits(); + reg->SetBitFieldJustified( 0, 8, 0 ); + reg->Write(); + + #endif // Hostboot only + + #undef PRDF_FUNC +} + //------------------------------------------------------------------------------ template<TARGETING::TYPE T> @@ -1288,6 +1341,361 @@ bool analyzeChnlFail<TYPE_MC>( ExtensibleChip * i_chip, //------------------------------------------------------------------------------ +bool __queryUcsOmic( ExtensibleChip * i_omic, ExtensibleChip * i_mcc, + TargetHandle_t i_omi ) +{ + PRDF_ASSERT( nullptr != i_omic ); + PRDF_ASSERT( nullptr != i_mcc ); + PRDF_ASSERT( nullptr != i_omi ); + PRDF_ASSERT( TYPE_OMIC == i_omic->getType() ); + PRDF_ASSERT( TYPE_MCC == i_mcc->getType() ); + PRDF_ASSERT( TYPE_OMI == getTargetType(i_omi) ); + + bool o_activeAttn = false; + + do + { + // Get the DSTLCFG2 register to check whether channel fail is enabled + // NOTE: DSTLCFG2[22] = 0b0 to enable chnl fail for subchannel A + // NOTE: DSTLCFG2[23] = 0b0 to enable chnl fail for subchannel B + SCAN_COMM_REGISTER_CLASS * cnfg = i_mcc->getRegister( "DSTLCFG2" ); + + // Get the position of the inputted OMI relative to the parent MCC (0-1) + // to determine which channel we need to check. + uint8_t omiPosRelMcc = getTargetPosition(i_omi) % MAX_OMI_PER_MCC; + + // If channel fail isn't configured, no need to continue. + if ( cnfg->IsBitSet(22 + omiPosRelMcc) ) break; + + // Check the OMIDLFIR for UCS (relevant bits: 0,20,40) + SCAN_COMM_REGISTER_CLASS * fir = i_omic->getRegister("OMIDLFIR"); + SCAN_COMM_REGISTER_CLASS * mask = i_omic->getRegister("OMIDLFIR_MASK"); + SCAN_COMM_REGISTER_CLASS * act0 = i_omic->getRegister("OMIDLFIR_ACT0"); + SCAN_COMM_REGISTER_CLASS * act1 = i_omic->getRegister("OMIDLFIR_ACT1"); + + if ( SUCCESS == ( fir->Read() | mask->Read() | + act0->Read() | act1->Read() ) ) + { + // Get the position of the inputted OMI relative to the parent + // OMIC (0-2). We'll need to use ATTR_OMI_DL_GROUP_POS for this. + uint8_t omiPosRelOmic = i_omi->getAttr<ATTR_OMI_DL_GROUP_POS>(); + + // Get the bit offset for the bit relevant to the inputted OMI. + // 0 : OMI-DL 0 + // 20: OMI-DL 1 + // 40: OMI-DL 2 + uint8_t bitOff = omiPosRelOmic * 20; + + // Check if there is a UNIT_CS for the relevant bits in the OMIDLFIR + // Note: The OMIDLFIR can't actually be set up to report UNIT_CS + // attentions, instead, as a workaround, the relevant channel fail + // bits will be set as recoverable bits and we will manually set + // the attention types to UNIT_CS in our handling of those errors. + if ( fir->IsBitSet(bitOff) && !mask->IsBitSet(bitOff) && + !act0->IsBitSet(bitOff) && act1->IsBitSet(bitOff) ) + { + o_activeAttn = true; + } + } + }while(0); + + return o_activeAttn; +} + +bool __queryUcsMcc( ExtensibleChip * i_mcc, TargetHandle_t i_omi ) +{ + PRDF_ASSERT( nullptr != i_mcc ); + PRDF_ASSERT( nullptr != i_omi ); + PRDF_ASSERT( TYPE_MCC == i_mcc->getType() ); + PRDF_ASSERT( TYPE_OMI == getTargetType(i_omi) ); + + bool o_activeAttn = false; + + // Get the position of the inputted OMI relative to the parent MCC (0-1) + // to determine which channel we need to check. + uint8_t omiPos = getTargetPosition(i_omi) % MAX_OMI_PER_MCC; + + // Maps of the DSTLFIR UCS bits to their relevant channel fail + // configuration bit in DSTLCFG2. Ex: {12,28} = DSTLFIR[12], DSTLCFG2[28] + // NOTE: there is a separate map for each subchannel. + const std::map<uint8_t,uint8_t> dstlfirMapChanA = + { {12,28}, {16,30}, {22,24} }; + + const std::map<uint8_t,uint8_t> dstlfirMapChanB = + { {13,29}, {17,31}, {23,25} }; + + // Check the DSTLFIR for UCS + SCAN_COMM_REGISTER_CLASS * fir = i_mcc->getRegister( "DSTLFIR" ); + SCAN_COMM_REGISTER_CLASS * mask = i_mcc->getRegister( "DSTLFIR_MASK" ); + SCAN_COMM_REGISTER_CLASS * act0 = i_mcc->getRegister( "DSTLFIR_ACT0" ); + SCAN_COMM_REGISTER_CLASS * act1 = i_mcc->getRegister( "DSTLFIR_ACT1" ); + SCAN_COMM_REGISTER_CLASS * cnfg = i_mcc->getRegister( "DSTLCFG2" ); + + if ( SUCCESS == (fir->Read() | mask->Read() | act0->Read() | act1->Read() | + cnfg->Read()) ) + { + // Get which relevant channel we need to check. + std::map<uint8_t,uint8_t> dstlfirMap; + dstlfirMap = (0 == omiPos) ? dstlfirMapChanA : dstlfirMapChanB; + + for ( auto const & bits : dstlfirMap ) + { + uint8_t firBit = bits.first; + uint8_t cnfgBit = bits.second; + + // NOTE: Channel fail is enabled if the config bit is set to 0b0 + if ( !cnfg->IsBitSet(cnfgBit) && fir->IsBitSet(firBit) && + !mask->IsBitSet(firBit) && act0->IsBitSet(firBit) && + act1->IsBitSet(firBit) ) + { + o_activeAttn = true; + } + } + } + + // Maps of the USTLFIR UCS bits to their relevant channel fail + // config bit in USTLFAILMASK. Ex: {0,54} = USTLFIR[0], USTLFAILMASK[54] + // NOTE: there is a separate map for each subchannel. + const std::map<uint8_t,uint8_t> ustlfirMapChanA = + { { 0,54}, { 2,48}, {27,56}, {35,49}, {37,50}, {39,51}, {41,52}, {43,53}, + {49,55}, {51,50}, {53,50}, {55,48}, {59,56} }; + const std::map<uint8_t,uint8_t> ustlfirMapChanB = + { { 1,54}, { 3,48}, {28,56}, {36,49}, {38,50}, {40,51}, {42,52}, {44,53}, + {50,55}, {52,50}, {54,50}, {56,48}, {60,56} }; + + // Check the USTLFIR for UCS + fir = i_mcc->getRegister( "USTLFIR" ); + mask = i_mcc->getRegister( "USTLFIR_MASK" ); + act0 = i_mcc->getRegister( "USTLFIR_ACT0" ); + act1 = i_mcc->getRegister( "USTLFIR_ACT1" ); + cnfg = i_mcc->getRegister( "USTLFAILMASK" ); + + if ( SUCCESS == (fir->Read() | mask->Read() | act0->Read() | act1->Read() | + cnfg->Read()) ) + { + // Get which relevant channel we need to check. + std::map<uint8_t,uint8_t> ustlfirMap; + ustlfirMap = (0 == omiPos) ? ustlfirMapChanA : ustlfirMapChanB; + + for ( auto const & bits : ustlfirMap ) + { + uint8_t firBit = bits.first; + uint8_t cnfgBit = bits.second; + + // NOTE: Channel fail is enabled if the config bit is set to 0b0 + if ( !cnfg->IsBitSet(cnfgBit) && fir->IsBitSet(firBit) && + !mask->IsBitSet(firBit) && act0->IsBitSet(firBit) && + act1->IsBitSet(firBit) ) + { + o_activeAttn = true; + } + } + } + + return o_activeAttn; +} + +bool __queryUcsOcmb( ExtensibleChip * i_ocmb ) +{ + PRDF_ASSERT( nullptr != i_ocmb ); + PRDF_ASSERT( TYPE_OCMB_CHIP == i_ocmb->getType() ); + + bool o_activeAttn = false; + + // We can't use the GLOBAL_CS_FIR. It will not clear automatically when a + // channel has failed because the hardware clocks have stopped. Also, since + // it is a virtual register there really is no way to clear it. Fortunately + // we have the INTER_STATUS_REG that will tell us if there is an active + // attention. Note that we clear this register as part of the channel + // failure cleanup. So we can rely on this register to determine if there is + // a new channel failure. + + SCAN_COMM_REGISTER_CLASS * fir = i_ocmb->getRegister("INTER_STATUS_REG"); + + if ( SUCCESS == fir->Read() ) + { + o_activeAttn = fir->IsBitSet(2); // Checkstop bit. + } + + return o_activeAttn; +} + +//------------------------------------------------------------------------------ + +template<TARGETING::TYPE T> +bool __analyzeChnlFail( TargetHandle_t i_trgt, + STEP_CODE_DATA_STRUCT & io_sc ); + +template<> +bool __analyzeChnlFail<TYPE_OMI>( TargetHandle_t i_omi, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + #define PRDF_FUNC "[MemUtils::__analyzeChnlFail<TYPE_OMI>] " + + PRDF_ASSERT( nullptr != i_omi ); + PRDF_ASSERT( TYPE_OMI == getTargetType(i_omi) ); + + uint32_t o_analyzed = false; + + do + { + // Skip if currently analyzing a host attention. This is a required for + // a rare scenario when a channel failure occurs after PRD is called to + // handle the host attention. + if ( HOST_ATTN == io_sc.service_data->getPrimaryAttnType() ) break; + + // Get the needed ExtensibleChips for analysis + TargetHandle_t ocmb = getConnectedChild( i_omi, TYPE_OCMB_CHIP, 0 ); + ExtensibleChip * ocmbChip = (ExtensibleChip *)systemPtr->GetChip(ocmb); + + TargetHandle_t omic = getConnectedParent( i_omi, TYPE_OMIC ); + ExtensibleChip * omicChip = (ExtensibleChip *)systemPtr->GetChip(omic); + + TargetHandle_t mcc = getConnectedParent( i_omi, TYPE_MCC ); + ExtensibleChip * mccChip = (ExtensibleChip *)systemPtr->GetChip(mcc); + + // Do an initial query for channel fail attentions from the targets. + // This is to check whether we actually have an active channel fail + // attention before checking whether it is a side effect of some + // recoverable attention or not. + if ( !__queryUcsOmic(omicChip, mccChip, i_omi) && + !__queryUcsMcc(mccChip, i_omi) && + !__queryUcsOcmb(ocmbChip) ) + { + // If no channel fail attentions found, just break out. + break; + } + + // There was a channel fail found, so take the following actions. + + // Set the MEM_CHNL_FAIL flag in the SDC to indicate a channel failure + // has been detected and there is no need to check again. + io_sc.service_data->setMemChnlFail(); + + // Make the error log predictive and set threshold. + io_sc.service_data->setFlag( ServiceDataCollector::SERVICE_CALL ); + io_sc.service_data->setFlag( ServiceDataCollector::AT_THRESHOLD ); + + // Channel failures will always send SUEs. + io_sc.service_data->setFlag( ServiceDataCollector::UERE ); + + // Indicate cleanup is required on this channel. + getOcmbDataBundle(ocmbChip)->iv_doChnlFailCleanup = true; + + // Check for recoverable attentions that could have a channel failure + // as a side effect. These include: N/A + // TODO RTC 243518 -requires more input from the test team to determine + + // Check OMIC for unit checkstops + if ( __queryUcsOmic( omicChip, mccChip, i_omi ) ) + { + // Analyze UNIT_CS on the OMIC chip + // Note: The OMIDLFIR can't actually be set up to report UNIT_CS + // attentions, instead, as a workaround, the relevant channel fail + // bits will be set as recoverable bits and we will manually set + // the attention types to UNIT_CS in our handling of those errors. + if ( SUCCESS == omicChip->Analyze(io_sc, RECOVERABLE) ) + { + o_analyzed = true; + break; + } + } + + // Check MCC for unit checkstops + if ( __queryUcsMcc( mccChip, i_omi ) ) + { + // Analyze UNIT_CS on the MCC chip + if ( SUCCESS == mccChip->Analyze(io_sc, UNIT_CS) ) + { + o_analyzed = true; + break; + } + } + + // Check OCMB for unit checkstops + if ( __queryUcsOcmb( ocmbChip ) ) + { + // Analyze UNIT_CS on the OCMB chip + if ( SUCCESS == ocmbChip->Analyze(io_sc, UNIT_CS) ) + { + o_analyzed = true; + break; + } + + } + PRDF_INF( PRDF_FUNC "Failed channel detected on 0x%08x, but no active " + "attentions found", getHuid(i_omi) ); + }while(0); + + return o_analyzed; + + #undef PRDF_FUNC +} + +template<> +bool analyzeChnlFail<TYPE_MCC>( ExtensibleChip * i_mcc, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + PRDF_ASSERT( nullptr != i_mcc ); + PRDF_ASSERT( TYPE_MCC == i_mcc->getType() ); + + uint32_t o_analyzed = false; + + if ( !io_sc.service_data->isMemChnlFail() ) + { + // Loop through all the connected OMIs + for ( auto & omi : getConnected(i_mcc->getTrgt(), TYPE_OMI) ) + { + o_analyzed = __analyzeChnlFail<TYPE_OMI>( omi, io_sc ); + if ( o_analyzed ) break; + } + } + + return o_analyzed; +} + +template<> +bool analyzeChnlFail<TYPE_OMIC>( ExtensibleChip * i_omic, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + PRDF_ASSERT( nullptr != i_omic ); + PRDF_ASSERT( TYPE_OMIC == i_omic->getType() ); + + uint32_t o_analyzed = false; + + if ( !io_sc.service_data->isMemChnlFail() ) + { + // Loop through all the connected OMIs + for ( auto & omi : getConnected(i_omic->getTrgt(), TYPE_OMI) ) + { + o_analyzed = __analyzeChnlFail<TYPE_OMI>( omi, io_sc ); + if ( o_analyzed ) break; + } + } + + return o_analyzed; +} + +template<> +bool analyzeChnlFail<TYPE_OCMB_CHIP>( ExtensibleChip * i_ocmb, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + PRDF_ASSERT( nullptr != i_ocmb ); + PRDF_ASSERT( TYPE_OCMB_CHIP == i_ocmb->getType() ); + + uint32_t o_analyzed = false; + + if ( !io_sc.service_data->isMemChnlFail() ) + { + TargetHandle_t omi = getConnectedParent( i_ocmb->getTrgt(), TYPE_OMI ); + o_analyzed = __analyzeChnlFail<TYPE_OMI>( omi, io_sc ); + } + + return o_analyzed; +} + +//------------------------------------------------------------------------------ + template<TARGETING::TYPE T1, TARGETING::TYPE T2, TARGETING::TYPE T3> void __cleanupChnlFail( ExtensibleChip * i_chip1, ExtensibleChip * i_chip2, ExtensibleChip * i_chip3, @@ -1415,6 +1823,158 @@ void cleanupChnlFail<TYPE_MEMBUF>( ExtensibleChip * i_chip, cleanupChnlFail<TYPE_DMI>( dmiChip, io_sc ); } +template<TARGETING::TYPE T> +void __cleanupChnlFail( TargetHandle_t i_trgt, STEP_CODE_DATA_STRUCT & io_sc ); + +template<> +void __cleanupChnlFail<TYPE_OMI>( TargetHandle_t i_omi, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + #define PRDF_FUNC "[MemUtils::__cleanupChnlFail] " + + PRDF_ASSERT( nullptr != i_omi ); + PRDF_ASSERT( TYPE_OMI == getTargetType(i_omi) ); + + do + { + // No cleanup if this is a checkstop attention. + if ( CHECK_STOP == io_sc.service_data->getPrimaryAttnType() ) break; + + TargetHandle_t ocmb = getConnectedChild(i_omi, TYPE_OCMB_CHIP, 0); + ExtensibleChip * ocmbChip = (ExtensibleChip *)systemPtr->GetChip(ocmb); + + // Check if cleanup is still required or has already been done. + if ( !getOcmbDataBundle(ocmbChip)->iv_doChnlFailCleanup ) break; + + // Cleanup is complete and no longer required on this channel. + getOcmbDataBundle(ocmbChip)->iv_doChnlFailCleanup = false; + + #ifdef __HOSTBOOT_MODULE // only do cleanup in Hostboot, no-op in FSP + + TargetHandle_t omic = getConnectedParent( i_omi, TYPE_OMIC ); + ExtensibleChip * omicChip = (ExtensibleChip *)systemPtr->GetChip(omic); + + TargetHandle_t mcc = getConnectedParent( i_omi, TYPE_MCC ); + ExtensibleChip * mccChip = (ExtensibleChip *)systemPtr->GetChip(mcc); + + // Get the OMI position relative to the OMIC (0,1,2) and the MCC (0,1) + uint8_t omiPosRelOmic = i_omi->getAttr<ATTR_OMI_DL_GROUP_POS>(); + uint8_t omiPosRelMcc = getTargetPosition(i_omi) % MAX_OMI_PER_MCC; + + // Note that this is a clean up function. If there are any SCOM errors + // we will just move on and try the rest. + SCAN_COMM_REGISTER_CLASS * reg = nullptr; + + // Mask off attentions from the OMIDLFIR in the OMIC based on the + // OMI position. 0-19, 20-39, 40-59 + reg = omicChip->getRegister( "OMIDLFIR_MASK_OR" ); + reg->SetBitFieldJustified( (omiPosRelOmic * 20), 20, 0xfffff ); + reg->Write(); + + // Mask off attentions from the DSTLFIR and USTLFIR in the MCC based on + // the OMI position. + // DSTLFIR Generic Bits: 8,9,10,11,24,25,26,27 + uint64_t mask = 0x00f000f000000000ull; + if ( 0 == omiPosRelMcc ) + { + // DSTLFIR Subchannel A Bits: 0,1,2,3,12,14,16,18,20,22 + mask |= 0xf00aaa0000000000ull; + } + else + { + // DSTLFIR Subchannel B Bits: 4,5,6,7,13,15,17,19,21,23 + mask |= 0x0f05550000000000ull; + } + reg = mccChip->getRegister( "DSTLFIR_MASK_OR" ); + reg->SetBitFieldJustified( 0, 64, mask ); + reg->Write(); + + // USTLFIR Generic Bits: 6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21, + // 22,23,24,25,26,57,58,61,62,63 + mask = 0x03ffffe000000067ull; + if ( 0 == omiPosRelMcc ) + { + // USTLFIR Subchannel A Bits: 0,2,4,27,29,31,33,35,37,39,41,43,45, + // 47,49,51,53,55,59 + mask |= 0xa800001555555510ull; + } + else + { + // USTLFIR Subchannel B Bits: 1,3,5,28,30,32,34,36,38,40,42,44,46, + // 48,50,52,54,56,60 + mask |= 0x5400000aaaaaaa88ull; + } + reg = mccChip->getRegister( "USTLFIR_MASK_OR" ); + reg->SetBitFieldJustified( 0, 64, mask ); + reg->Write(); + + // Mask off all attentions from the chiplet FIRs in the OCMB + reg = ocmbChip->getRegister( "OCMB_CHIPLET_FIR_MASK" ); + reg->setAllBits(); // Blindly mask everything + reg->Write(); + + + // To ensure FSP ATTN doesn't think there is an active attention on this + // OCMB, manually clear the interrupt status register. + reg = ocmbChip->getRegister( "INTER_STATUS_REG" ); + reg->clearAllBits(); // Blindly clear everything + reg->Write(); + + // During runtime, send a dynamic memory deallocation message. + // During Memory Diagnostics, tell MDIA to stop pattern tests. + #ifdef __HOSTBOOT_RUNTIME + MemDealloc::port<TYPE_OCMB_CHIP>( ocmbChip ); + #else + if ( isInMdiaMode() ) + { + mdiaSendEventMsg( ocmb, MDIA::STOP_TESTING ); + } + #endif + + #endif // Hostboot only + + }while(0); + + #undef PRDF_FUNC +} + +template<> +void cleanupChnlFail<TYPE_MCC>( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + PRDF_ASSERT( nullptr != i_chip ); + PRDF_ASSERT( TYPE_MCC == i_chip->getType() ); + + for ( auto & omi : getConnected(i_chip->getTrgt(), TYPE_OMI) ) + { + __cleanupChnlFail<TYPE_OMI>( omi, io_sc ); + } +} + +template<> +void cleanupChnlFail<TYPE_OMIC>( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + PRDF_ASSERT( nullptr != i_chip ); + PRDF_ASSERT( TYPE_OMIC == i_chip->getType() ); + + for ( auto & omi : getConnected(i_chip->getTrgt(), TYPE_OMI) ) + { + __cleanupChnlFail<TYPE_OMI>( omi, io_sc ); + } +} + +template<> +void cleanupChnlFail<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + PRDF_ASSERT( nullptr != i_chip ); + PRDF_ASSERT( TYPE_OCMB_CHIP == i_chip->getType() ); + + TargetHandle_t omi = getConnectedParent( i_chip->getTrgt(), TYPE_OMI ); + __cleanupChnlFail<TYPE_OMI>( omi, io_sc ); +} + //------------------------------------------------------------------------------ uint64_t reverseBits( uint64_t i_val, uint64_t i_numBits ) diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemUtils.H b/src/usr/diag/prdf/common/plat/mem/prdfMemUtils.H index 9759cd010..39a6051fe 100755 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemUtils.H +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemUtils.H @@ -102,12 +102,12 @@ int32_t collectCeStats( ExtensibleChip * i_chip, const MemRank & i_rank, /** * @brief Gets DRAM size for an MBA, MCA, or MEM_PORT. - * @param i_chip MBA, MCA, or MEM_PORT chip. + * @param i_trgt MBA, MCA, or MEM_PORT target. * @param i_dimmSlct DIMM select. Optional for MBA chip. * @return size for a DRAM */ template<TARGETING::TYPE T> -uint8_t getDramSize( ExtensibleChip * i_chip, uint8_t i_dimmSlct = 0 ); +uint8_t getDramSize( TARGETING::TargetHandle_t i_trgt, uint8_t i_dimmSlct = 0 ); /** * @brief determines the type of Centaur based raw card associated with MBA. diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemoryMru.C b/src/usr/diag/prdf/common/plat/mem/prdfMemoryMru.C index bb911847e..4cd596514 100755 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemoryMru.C +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemoryMru.C @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2013,2018 */ +/* Contributors Listed Below - COPYRIGHT 2013,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -70,42 +70,78 @@ MemoryMru::MemoryMru( uint32_t i_memMru ) : PRDF_ASSERT( false ); } - // If our target is MBA, get the chnlPos from the membuf - if ( 0 == iv_memMruMeld.s.isMca ) + // If our target is MCA + if ( 1 == iv_memMruMeld.s.isMca ) { - TargetHandle_t membuf = getConnectedChild( proc, TYPE_MEMBUF, + iv_target = getConnectedChild( proc, TYPE_MCA, iv_memMruMeld.s.chnlPos ); - if ( NULL == membuf ) + if ( NULL == iv_target ) { - PRDF_ERR( PRDF_FUNC "Could not find functional membuf " + PRDF_ERR( PRDF_FUNC "Could not find functional mca " "attached to proc 0x%08X at pos: %u", getHuid( proc ), iv_memMruMeld.s.chnlPos ); PRDF_ASSERT( false ); } + } + // If our target is OCMB + else if ( 1 == iv_memMruMeld.s.isOcmb ) + { + // chnlPos specifies the position of the MCC relative to the proc + TargetHandle_t mcc = getConnectedChild( proc, TYPE_MCC, + iv_memMruMeld.s.chnlPos ); + if ( nullptr == mcc ) + { + PRDF_ERR( PRDF_FUNC "Could not find functional mcc attached to " + "proc 0x%08x at pos: %u", getHuid(proc), + iv_memMruMeld.s.chnlPos ); + PRDF_ASSERT( false ); + } - iv_target = getConnectedChild( membuf, TYPE_MBA, - iv_memMruMeld.s.mbaPos ); - if ( NULL == iv_target ) + // mbaPos specifies the position of the OMI relative to the MCC + TargetHandle_t omi = getConnectedChild( mcc, TYPE_OMI, + iv_memMruMeld.s.mbaPos ); + if ( nullptr == omi ) { - PRDF_ERR( PRDF_FUNC "Could not find functional mba attached " - "to 0x%08X at pos: %u", getHuid( membuf ), - iv_memMruMeld.s.mbaPos ); + PRDF_ERR( PRDF_FUNC "Could not find functional omi attached to " + "mcc 0x%08x at pos: %u", getHuid(mcc), + iv_memMruMeld.s.mbaPos ); + PRDF_ASSERT( false ); + } + + // There is only one OCMB attached per OMI + iv_target = getConnectedChild( omi, TYPE_OCMB_CHIP, 0 ); + if ( nullptr == iv_target ) + { + PRDF_ERR( PRDF_FUNC "Could not find functional ocmb attached to " + "omi 0x%08x", getHuid(mcc) ); PRDF_ASSERT( false ); } + + } + // If our target is MBA, get the chnlPos from the membuf else { - iv_target = getConnectedChild( proc, TYPE_MCA, + TargetHandle_t membuf = getConnectedChild( proc, TYPE_MEMBUF, iv_memMruMeld.s.chnlPos ); - if ( NULL == iv_target ) + if ( nullptr == membuf ) { - PRDF_ERR( PRDF_FUNC "Could not find functional mca " + PRDF_ERR( PRDF_FUNC "Could not find functional membuf " "attached to proc 0x%08X at pos: %u", getHuid( proc ), iv_memMruMeld.s.chnlPos ); PRDF_ASSERT( false ); } - } + iv_target = getConnectedChild( membuf, TYPE_MBA, + iv_memMruMeld.s.mbaPos ); + if ( nullptr == iv_target ) + { + PRDF_ERR( PRDF_FUNC "Could not find functional mba attached " + "to 0x%08X at pos: %u", getHuid( membuf ), + iv_memMruMeld.s.mbaPos ); + PRDF_ASSERT( false ); + } + } // Get the rank iv_rank = MemRank( iv_memMruMeld.s.mrank, iv_memMruMeld.s.srank ); @@ -247,7 +283,8 @@ TargetHandleList MemoryMru::getCalloutList() const } } } - else if ( TARGETING::TYPE_MCA == getTargetType(iv_target) ) + else if ( TARGETING::TYPE_MCA == getTargetType(iv_target) || + TARGETING::TYPE_OCMB_CHIP == getTargetType(iv_target) ) { if ( CALLOUT_ALL_MEM == iv_special ) { @@ -304,6 +341,11 @@ void MemoryMru::getCommonVars() { proc = getConnectedParent( iv_target, TYPE_PROC ); } + else if ( TYPE_OCMB_CHIP == trgtType ) + { + TargetHandle_t mcc = getConnectedParent( iv_target, TYPE_MCC ); + proc = getConnectedParent( mcc, TYPE_PROC ); + } else { PRDF_ERR( PRDF_FUNC "Invalid target type" ); @@ -323,11 +365,27 @@ void MemoryMru::getCommonVars() } // If our target is an MCA, then chnlPos will specify the MCA position // and mbaPos will be an unused field - else + else if ( TYPE_MCA == getTargetType(iv_target) ) { iv_memMruMeld.s.isMca = 1; iv_memMruMeld.s.chnlPos = getTargetPosition( iv_target ); } + // If our target is an OCMB, then chnlPos will specify the MCC position and + // mbaPos will specify the OMI position. + else if ( TYPE_OCMB_CHIP == getTargetType(iv_target) ) + { + TargetHandle_t omi = getConnectedParent( iv_target, TYPE_OMI ); + TargetHandle_t mcc = getConnectedParent( omi, TYPE_MCC ); + + iv_memMruMeld.s.isOcmb = 1; + iv_memMruMeld.s.chnlPos = getTargetPosition(mcc) % MAX_MCC_PER_PROC; + iv_memMruMeld.s.mbaPos = getTargetPosition(omi) % MAX_OMI_PER_MCC; + } + else + { + PRDF_ERR( PRDF_FUNC "Invalid target type" ); + PRDF_ASSERT(false); + } iv_memMruMeld.s.nodePos = getTargetPosition( node ); iv_memMruMeld.s.procPos = getTargetPosition( proc ); diff --git a/src/usr/diag/prdf/common/plat/mem/prdfOcmbDataBundle.H b/src/usr/diag/prdf/common/plat/mem/prdfOcmbDataBundle.H new file mode 100644 index 000000000..75d7dd53e --- /dev/null +++ b/src/usr/diag/prdf/common/plat/mem/prdfOcmbDataBundle.H @@ -0,0 +1,247 @@ +/* IBM_PROLOG_BEGIN_TAG */ +/* This is an automatically generated prolog. */ +/* */ +/* $Source: src/usr/diag/prdf/common/plat/mem/prdfOcmbDataBundle.H $ */ +/* */ +/* OpenPOWER HostBoot Project */ +/* */ +/* Contributors Listed Below - COPYRIGHT 2019 */ +/* [+] International Business Machines Corp. */ +/* */ +/* */ +/* Licensed under the Apache License, Version 2.0 (the "License"); */ +/* you may not use this file except in compliance with the License. */ +/* You may obtain a copy of the License at */ +/* */ +/* http://www.apache.org/licenses/LICENSE-2.0 */ +/* */ +/* Unless required by applicable law or agreed to in writing, software */ +/* distributed under the License is distributed on an "AS IS" BASIS, */ +/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */ +/* implied. See the License for the specific language governing */ +/* permissions and limitations under the License. */ +/* */ +/* IBM_PROLOG_END_TAG */ + +#ifndef __prdfOcmbDataBundle_H +#define __prdfOcmbDataBundle_H + +/** @file prdfOcmbDataBundle.H + * @brief Contains the data bundle for a P9 OCMB_CHIP object. + */ + +// Framework includes +#include <prdfExtensibleChip.H> + +// Platform includes +#include <prdfPlatServices.H> +#include <prdfMemCeTable.H> +#include <prdfMemUeTable.H> + +#ifdef __HOSTBOOT_MODULE + +#include <prdfMemScrubUtils.H> +#include <prdfMemTdFalseAlarm.H> +#include <prdfMemThresholds.H> +#include <prdfMemTdCtlr.H> + +#ifndef __HOSTBOOT_RUNTIME +#include <prdfMemIplCeStats.H> +#endif + +#endif // __HOSTBOOT_MODULE + +namespace PRDF +{ + +/** @brief P9 OCMB data bundle. */ +class OcmbDataBundle : public DataBundle +{ + public: // functions + + /** + * @brief Constructor. + * @param i_ocmbChip The OCMB chip. + */ + explicit OcmbDataBundle( ExtensibleChip * i_ocmbChip ) : + iv_chip(i_ocmbChip), iv_ceTable(i_ocmbChip), iv_ueTable(i_ocmbChip) + {} + + /** @brief Destructor. */ + ~OcmbDataBundle() + { + #ifdef __HOSTBOOT_MODULE + #ifdef __HOSTBOOT_RUNTIME + delete iv_vcmFalseAlarmCounter; + delete iv_tpsFalseAlarmCounter; + #else // IPL only + delete iv_iplCeStats; + #endif + delete iv_tdCtlr; iv_tdCtlr = nullptr; + #endif // __HOSTBOOT_MODULE + } + + // Don't allow copy or assignment. + OcmbDataBundle( const OcmbDataBundle & ) = delete; + const OcmbDataBundle & operator=( const OcmbDataBundle & ) = delete; + + #ifdef __HOSTBOOT_MODULE + + /** @return The Targeted Diagnostics controller. */ + MemTdCtlr<TARGETING::TYPE_OCMB_CHIP> * getTdCtlr() + { + if ( nullptr == iv_tdCtlr ) + { + iv_tdCtlr = new MemTdCtlr<TARGETING::TYPE_OCMB_CHIP>{iv_chip}; + } + + return iv_tdCtlr; + } + + /** @return The IMPE threshold counter. */ + VcmFalseAlarm * getImpeThresholdCounter() + { + if ( nullptr == iv_impeThresholdCounter ) + { + iv_impeThresholdCounter = new VcmFalseAlarm( + TimeBasedThreshold { getImpeTh() } ); + } + + return iv_impeThresholdCounter; + } + + #ifdef __HOSTBOOT_RUNTIME + + /** @return The VCM false alarm counter. */ + VcmFalseAlarm * getVcmFalseAlarmCounter() + { + if ( nullptr == iv_vcmFalseAlarmCounter ) + { + iv_vcmFalseAlarmCounter = new VcmFalseAlarm( + TimeBasedThreshold { 4, ThresholdResolution::ONE_DAY } ); + } + + return iv_vcmFalseAlarmCounter; + } + + /** @return The TPS false alarm counter. */ + TpsFalseAlarm * getTpsFalseAlarmCounter() + { + if ( nullptr == iv_tpsFalseAlarmCounter ) + { + iv_tpsFalseAlarmCounter = new TpsFalseAlarm( + TimeBasedThreshold{ 3, ThresholdResolution::ONE_DAY } ); + } + + return iv_tpsFalseAlarmCounter; + } + + #else // IPL only + + /** @return The IPL CE statistics object. */ + MemIplCeStats<TARGETING::TYPE_OCMB_CHIP> * getIplCeStats() + { + if ( nullptr == iv_iplCeStats ) + { + iv_iplCeStats = + new MemIplCeStats<TARGETING::TYPE_OCMB_CHIP>( iv_chip ); + } + + return iv_iplCeStats; + } + + #endif + + #endif // __HOSTBOOT_MODULE + + private: // instance variables + + /** The OCMB chip associated with this data bundle. */ + ExtensibleChip * const iv_chip; + + #ifdef __HOSTBOOT_MODULE + + /** The Targeted Diagnostics controller. */ + MemTdCtlr<TARGETING::TYPE_OCMB_CHIP> * iv_tdCtlr = nullptr; + + /** IMPE threshold counter. */ + VcmFalseAlarm * iv_impeThresholdCounter = nullptr; + + #endif // __HOSTBOOT_MODULE + + public: // instance variables + + MemCeTable<TARGETING::TYPE_OCMB_CHIP> iv_ceTable; ///< CE table for FFDC + MemUeTable iv_ueTable; ///< UE table for FFDC + + /** If there is a channel failure detected on this bus, there will be some + * required cleanup after analysis to mask off all further attentions from + * the bus. A channel failure could occur on either side of the bus and it + * is possible the cleanup function could be called in multiple + * PostAnalysis plugins depending on where the channel failure occurred. + * Since we only want to do one cleanup, we will use this variable to + * indicate if a cleanup is still required or has already been done. */ + bool iv_doChnlFailCleanup = false; + + #ifdef __HOSTBOOT_MODULE + + /** Threshold table for RCD parity errors. */ + TimeBasedThreshold iv_rcdParityTh = TimeBasedThreshold( getRcdParityTh() ); + + /** Threshold table for IUEs. Threshold per DIMM */ + std::map<uint8_t, TimeBasedThreshold> iv_iueTh; + + /** Bool to indicate if we've triggered a port fail because of IUEs. */ + bool iv_iuePortFail = false; + + #ifdef __HOSTBOOT_RUNTIME + + /** VCM false alarm counter. */ + VcmFalseAlarm * iv_vcmFalseAlarmCounter = nullptr; + + /** TPS false alarm counter. */ + TpsFalseAlarm * iv_tpsFalseAlarmCounter = nullptr; + + /** Set to true if mainline NCEs and TCEs should be permanently masked. This + * is checked at the end of targeted diagnostics before background + * scrubbing is resumed. */ + bool iv_maskMainlineNceTce = false; + + // These are used to limit the number of times a scrub command will stop + // on a UE or CE on a rank. This is to prevent potential flooding of + // maintenance UEs or CEs. The threshold will be 16 per rank for each. + TimeBasedThreshold iv_ueStopCounter = + TimeBasedThreshold( 16, ThresholdResolution::TEN_HOURS ); + TimeBasedThreshold iv_ceStopCounter = + TimeBasedThreshold( 16, ThresholdResolution::TEN_HOURS );; + + // If we stop on a UE or a CE, we will need to store the rank that the + // error is on so that we can clear our respective thresholds if the + // next error we stop on is on a different rank. + MemRank iv_ceUeRank; + + #else // IPL only + + /** MNFG IPL CE statistics. */ + MemIplCeStats<TARGETING::TYPE_OCMB_CHIP> * iv_iplCeStats = nullptr; + + #endif + + #endif // __HOSTBOOT_MODULE + +}; + +/** + * @brief Wrapper function for the OcmbDataBundle. + * @param i_ocmbChip The OCMB chip. + * @return This MBA's data bundle. + */ +inline OcmbDataBundle * getOcmbDataBundle( ExtensibleChip * i_ocmbChip ) +{ + return static_cast<OcmbDataBundle *>(i_ocmbChip->getDataBundle()); +} + +} // end namespace PRDF + +#endif // __prdfOcmbDataBundle_H + diff --git a/src/usr/diag/prdf/common/plat/mem/prdf_plat_mem.mk b/src/usr/diag/prdf/common/plat/mem/prdf_plat_mem.mk index 087214ece..2ea0712d3 100644 --- a/src/usr/diag/prdf/common/plat/mem/prdf_plat_mem.mk +++ b/src/usr/diag/prdf/common/plat/mem/prdf_plat_mem.mk @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2016,2018 +# Contributors Listed Below - COPYRIGHT 2016,2019 # [+] International Business Machines Corp. # # @@ -51,6 +51,7 @@ prd_obj += prdfMemoryMru.o prd_obj += prdfMemUeTable.o prd_obj += prdfMemUtils.o prd_obj += prdfMemThresholds.o +prd_obj += prdfP9OcmbChipDomain.o # rule plugin related prd_rule_plugin += prdfP9Mca_common.o diff --git a/src/usr/diag/prdf/common/plat/nimbus/nimbus_mca.rule b/src/usr/diag/prdf/common/plat/nimbus/nimbus_mca.rule index eea254545..d1a6bc290 100644 --- a/src/usr/diag/prdf/common/plat/nimbus/nimbus_mca.rule +++ b/src/usr/diag/prdf/common/plat/nimbus/nimbus_mca.rule @@ -241,7 +241,7 @@ group gMCACALFIR /** MCACALFIR[0] * A MBA recoverable error has occurred. */ - (rMCACALFIR, bit(0)) ? self_th_1; + (rMCACALFIR, bit(0)) ? nvdimm_self_th_1; /** MCACALFIR[1] * MBA Nonrecoverable Error @@ -251,7 +251,7 @@ group gMCACALFIR /** MCACALFIR[2] * Excessive refreshes to a single rank. */ - (rMCACALFIR, bit(2)) ? self_th_32perDay; + (rMCACALFIR, bit(2)) ? nvdimm_self_th_32perDay; /** MCACALFIR[3] * Err detected in the MBA debug WAT logic @@ -266,7 +266,7 @@ group gMCACALFIR /** MCACALFIR[5] * Calibration complete indication xout */ - (rMCACALFIR, bit(5)) ? self_th_32perDay; + (rMCACALFIR, bit(5)) ? nvdimm_self_th_32perDay; /** MCACALFIR[6] * Emergency Throttle @@ -279,7 +279,7 @@ group gMCACALFIR (rMCACALFIR, bit(7)) ? self_th_1; /** MCACALFIR[8] - * event_n active on DDR interface + * Active NVDIMM Attention */ (rMCACALFIR, bit(8)) ? analyzeNvdimms; @@ -533,7 +533,7 @@ group gMCAECCFIR /** MCAECCFIR[42] * SCOM_PARITY_CLASS_RECOVERABLE */ - (rMCAECCFIR, bit(42)) ? self_th_1; + (rMCAECCFIR, bit(42)) ? nvdimm_self_th_1; /** MCAECCFIR[43] * SCOM_PARITY_CLASS_UNRECOVERABLE @@ -548,7 +548,7 @@ group gMCAECCFIR /** MCAECCFIR[45] * WRITE_RMW_CE */ - (rMCAECCFIR, bit(45)) ? self_th_32perDay; + (rMCAECCFIR, bit(45)) ? nvdimm_self_th_32perDay; /** MCAECCFIR[46] * WRITE_RMW_UE @@ -686,12 +686,12 @@ group gDDRPHYFIR /** DDRPHYFIR[60] * Register PE 4 bit impact */ - (rDDRPHYFIR, bit(60)) ? self_th_1; + (rDDRPHYFIR, bit(60)) ? nvdimm_self_th_1; /** DDRPHYFIR[61] * Register PE 1 bit impact */ - (rDDRPHYFIR, bit(61)) ? self_th_1; + (rDDRPHYFIR, bit(61)) ? nvdimm_self_th_1; }; diff --git a/src/usr/diag/prdf/common/plat/nimbus/nimbus_mca_actions.rule b/src/usr/diag/prdf/common/plat/nimbus/nimbus_mca_actions.rule index da3a73f82..6d5ab9018 100644 --- a/src/usr/diag/prdf/common/plat/nimbus/nimbus_mca_actions.rule +++ b/src/usr/diag/prdf/common/plat/nimbus/nimbus_mca_actions.rule @@ -70,6 +70,7 @@ actionclass rcd_parity_error calloutSelfLowNoGard; # Self LOW # Thresholding done in plugin funccall("RcdParityError"); # Run TPS on TH for all MCA ranks + funccall("ClearNvdimmGardState"); # Clear gard for NVDIMMs }; /** Handle Mainline IUEs */ @@ -125,7 +126,7 @@ actionclass maintenance_iaue_handling /** MCA/UE algroithm, threshold 5 per day */ actionclass mca_ue_algorithm_th_5perDay { - calloutSelfMed; + try( funccall("CheckForNvdimms"), calloutSelfMed ); threshold5pday; funccall("mcaUeAlgorithm"); # must be called last }; @@ -133,12 +134,29 @@ actionclass mca_ue_algorithm_th_5perDay /** MCA/UE algroithm, threshold 1 */ actionclass mca_ue_algorithm_th_1 { - calloutSelfMed; + try( funccall("CheckForNvdimms"), calloutSelfMed ); threshold1; funccall("mcaUeAlgorithm"); # must be called last }; ################################################################################ +# NVDIMM callouts # +################################################################################ + +# Simple callouts that will avoid gard for NVDIMMs at IPL +actionclass nvdimm_self_th_1 +{ + try( funccall("CheckForNvdimms"), calloutSelfMed ); + threshold1; +}; + +actionclass nvdimm_self_th_32perDay +{ + try( funccall("CheckForNvdimms"), calloutSelfMed ); + threshold32pday; +}; + +################################################################################ # Analyze groups ################################################################################ diff --git a/src/usr/diag/prdf/common/plat/nimbus/nimbus_mcbist.rule b/src/usr/diag/prdf/common/plat/nimbus/nimbus_mcbist.rule index 1f61719a7..0a3301e2a 100644 --- a/src/usr/diag/prdf/common/plat/nimbus/nimbus_mcbist.rule +++ b/src/usr/diag/prdf/common/plat/nimbus/nimbus_mcbist.rule @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2016,2018 +# Contributors Listed Below - COPYRIGHT 2016,2019 # [+] International Business Machines Corp. # # @@ -599,7 +599,7 @@ group gMCBISTFIR /** MCBISTFIR[13] * SCOM_RECOVERABLE_REG_PE */ - (rMCBISTFIR, bit(13)) ? self_th_1; + (rMCBISTFIR, bit(13)) ? nvdimm_self_th_1; /** MCBISTFIR[14] * SCOM_FATAL_REG_PE diff --git a/src/usr/diag/prdf/common/plat/nimbus/nimbus_mcbist_actions.rule b/src/usr/diag/prdf/common/plat/nimbus/nimbus_mcbist_actions.rule index 9b2127f3f..11d499e30 100644 --- a/src/usr/diag/prdf/common/plat/nimbus/nimbus_mcbist_actions.rule +++ b/src/usr/diag/prdf/common/plat/nimbus/nimbus_mcbist_actions.rule @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2016,2018 +# Contributors Listed Below - COPYRIGHT 2016,2019 # [+] International Business Machines Corp. # # @@ -36,6 +36,17 @@ actionclass command_addr_timeout funccall("commandAddrTimeout"); }; +################################################################################ +# NVDIMM callouts # +################################################################################ + +# Simple callouts that will avoid gard for NVDIMMs at IPL +actionclass nvdimm_self_th_1 +{ + try( funccall("CheckForNvdimms"), calloutSelfMed ); + threshold1; +}; + ############################################################################### # Analyze groups ############################################################################### diff --git a/src/usr/diag/prdf/common/plat/nimbus/nimbus_mcs.rule b/src/usr/diag/prdf/common/plat/nimbus/nimbus_mcs.rule index 71a0342ab..987d68afb 100644 --- a/src/usr/diag/prdf/common/plat/nimbus/nimbus_mcs.rule +++ b/src/usr/diag/prdf/common/plat/nimbus/nimbus_mcs.rule @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2016,2018 +# Contributors Listed Below - COPYRIGHT 2016,2019 # [+] International Business Machines Corp. # # @@ -148,7 +148,7 @@ group gMCFIR /** MCFIR[0] * mc internal recoverable eror */ - (rMCFIR, bit(0)) ? self_th_1; + (rMCFIR, bit(0)) ? nvdimm_self_th_1; /** MCFIR[1] * mc internal non recovervable error diff --git a/src/usr/diag/prdf/common/plat/nimbus/nimbus_mcs_actions.rule b/src/usr/diag/prdf/common/plat/nimbus/nimbus_mcs_actions.rule index 1497cdccb..35339ccc6 100644 --- a/src/usr/diag/prdf/common/plat/nimbus/nimbus_mcs_actions.rule +++ b/src/usr/diag/prdf/common/plat/nimbus/nimbus_mcs_actions.rule @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2018 +# Contributors Listed Below - COPYRIGHT 2018,2019 # [+] International Business Machines Corp. # # @@ -24,6 +24,17 @@ # IBM_PROLOG_END_TAG ################################################################################ +# NVDIMM callouts # +################################################################################ + +# Simple callouts that will avoid gard for NVDIMMs at IPL +actionclass nvdimm_self_th_1 +{ + try( funccall("CheckForNvdimms"), calloutSelfMed ); + threshold1; +}; + +################################################################################ # Analyze groups ################################################################################ diff --git a/src/usr/diag/prdf/common/plat/nimbus/nimbus_obus.rule b/src/usr/diag/prdf/common/plat/nimbus/nimbus_obus.rule index a4ce0d02d..790537acf 100644 --- a/src/usr/diag/prdf/common/plat/nimbus/nimbus_obus.rule +++ b/src/usr/diag/prdf/common/plat/nimbus/nimbus_obus.rule @@ -469,12 +469,12 @@ group gIOOLFIR /** IOOLFIR[8] * link0 nak received */ - (rIOOLFIR, bit(8)) ? defaultMaskedError; + (rIOOLFIR, bit(8)) ? threshold_and_mask_self_non_smp_only; /** IOOLFIR[9] * link1 nak received */ - (rIOOLFIR, bit(9)) ? defaultMaskedError; + (rIOOLFIR, bit(9)) ? threshold_and_mask_self_non_smp_only; /** IOOLFIR[10] * link0 replay buffer full @@ -499,22 +499,22 @@ group gIOOLFIR /** IOOLFIR[14] * link0 sl ecc correctable */ - (rIOOLFIR, bit(14)) ? threshold_and_mask_self; + (rIOOLFIR, bit(14)) ? threshold_and_mask_self_smp_only; /** IOOLFIR[15] * link1 sl ecc correctable */ - (rIOOLFIR, bit(15)) ? threshold_and_mask_self; + (rIOOLFIR, bit(15)) ? threshold_and_mask_self_smp_only; /** IOOLFIR[16] * link0 sl ecc ue */ - (rIOOLFIR, bit(16)) ? threshold_and_mask_self; + (rIOOLFIR, bit(16)) ? threshold_and_mask_self_smp_only; /** IOOLFIR[17] * link1 sl ecc ue */ - (rIOOLFIR, bit(17)) ? threshold_and_mask_self; + (rIOOLFIR, bit(17)) ? threshold_and_mask_self_smp_only; /** IOOLFIR[18] * link0 retrain threshold @@ -597,12 +597,12 @@ group gIOOLFIR (rIOOLFIR, bit(33)) ? defaultMaskedError; /** IOOLFIR[34] - * link0 num replay + * link0 num replay or no forward progress */ (rIOOLFIR, bit(34)) ? defaultMaskedError; /** IOOLFIR[35] - * link1 num replay + * link1 num replay or no forward progress */ (rIOOLFIR, bit(35)) ? defaultMaskedError; @@ -619,12 +619,12 @@ group gIOOLFIR /** IOOLFIR[38] * link0 prbs select error */ - (rIOOLFIR, bit(38)) ? threshold_and_mask_self; + (rIOOLFIR, bit(38)) ? threshold_and_mask_self_smp_only; /** IOOLFIR[39] * link1 prbs select error */ - (rIOOLFIR, bit(39)) ? threshold_and_mask_self; + (rIOOLFIR, bit(39)) ? threshold_and_mask_self_smp_only; /** IOOLFIR[40] * link0 tcomplete bad @@ -639,102 +639,102 @@ group gIOOLFIR /** IOOLFIR[42] * link0 no spare lane available */ - (rIOOLFIR, bit(42)) ? obusSmpCallout_L0; + (rIOOLFIR, bit(42)) ? obusSmpCallout_L0_smp_only; /** IOOLFIR[43] * link1 no spare lane available */ - (rIOOLFIR, bit(43)) ? obusSmpCallout_L1; + (rIOOLFIR, bit(43)) ? obusSmpCallout_L1_smp_only; /** IOOLFIR[44] - * link0 spare done + * link0 spare done or degraded mode */ - (rIOOLFIR, bit(44)) ? obusSmpCallout_th32_L0; + (rIOOLFIR, bit(44)) ? spare_lane_degraded_mode_L0; /** IOOLFIR[45] - * link1 spare done + * link1 spare done or degraded mode */ - (rIOOLFIR, bit(45)) ? obusSmpCallout_th32_L1; + (rIOOLFIR, bit(45)) ? spare_lane_degraded_mode_L1; /** IOOLFIR[46] * link0 too many crc errors */ - (rIOOLFIR, bit(46)) ? obusSmpCallout_L0; + (rIOOLFIR, bit(46)) ? obusSmpCallout_L0_smp_only; /** IOOLFIR[47] * link1 too many crc errors */ - (rIOOLFIR, bit(47)) ? obusSmpCallout_L1; + (rIOOLFIR, bit(47)) ? obusSmpCallout_L1_smp_only; /** IOOLFIR[48] - * link0 npu error + * link0 npu error or orx otx dlx errors */ (rIOOLFIR, bit(48)) ? threshold_and_mask_self; /** IOOLFIR[49] - * link1 npu error + * link1 npu error or orx otx dlx errors */ (rIOOLFIR, bit(49)) ? threshold_and_mask_self; /** IOOLFIR[50] * linkx npu error */ - (rIOOLFIR, bit(50)) ? threshold_and_mask_self; + (rIOOLFIR, bit(50)) ? threshold_and_mask_self_smp_only; /** IOOLFIR[51] * osc switch */ - (rIOOLFIR, bit(51)) ? threshold_and_mask_self; + (rIOOLFIR, bit(51)) ? threshold_and_mask_self_smp_only; /** IOOLFIR[52] * link0 correctable array error */ - (rIOOLFIR, bit(52)) ? obusSmpCallout_th32_L0; + (rIOOLFIR, bit(52)) ? self_th_32perDay; /** IOOLFIR[53] * link1 correctable array error */ - (rIOOLFIR, bit(53)) ? obusSmpCallout_th32_L1; + (rIOOLFIR, bit(53)) ? self_th_32perDay; /** IOOLFIR[54] * link0 uncorrectable array error */ - (rIOOLFIR, bit(54)) ? obusSmpFailure_L0; + (rIOOLFIR, bit(54)) ? self_th_1; /** IOOLFIR[55] * link1 uncorrectable array error */ - (rIOOLFIR, bit(55)) ? obusSmpFailure_L1; + (rIOOLFIR, bit(55)) ? self_th_1; /** IOOLFIR[56] * link0 training failed */ - (rIOOLFIR, bit(56)) ? obusSmpFailure_L0; + (rIOOLFIR, bit(56)) ? training_failure_L0; /** IOOLFIR[57] * link1 training failed */ - (rIOOLFIR, bit(57)) ? obusSmpFailure_L1; + (rIOOLFIR, bit(57)) ? training_failure_L1; /** IOOLFIR[58] * link0 unrecoverable error */ - (rIOOLFIR, bit(58)) ? obusSmpFailure_L0; + (rIOOLFIR, bit(58)) ? unrecoverable_error_L0; /** IOOLFIR[59] * link1 unrecoverable error */ - (rIOOLFIR, bit(59)) ? obusSmpFailure_L1; + (rIOOLFIR, bit(59)) ? unrecoverable_error_L1; /** IOOLFIR[60] * link0 internal error */ - (rIOOLFIR, bit(60)) ? obusSmpFailure_L0; + (rIOOLFIR, bit(60)) ? internal_error_L0; /** IOOLFIR[61] * link1 internal error */ - (rIOOLFIR, bit(61)) ? obusSmpFailure_L1; + (rIOOLFIR, bit(61)) ? internal_error_L1; /** IOOLFIR[62] * fir scom err dup diff --git a/src/usr/diag/prdf/common/plat/nimbus/nimbus_proc.rule b/src/usr/diag/prdf/common/plat/nimbus/nimbus_proc.rule index 6ac3bc5a1..6712a5977 100644 --- a/src/usr/diag/prdf/common/plat/nimbus/nimbus_proc.rule +++ b/src/usr/diag/prdf/common/plat/nimbus/nimbus_proc.rule @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2016,2018 +# Contributors Listed Below - COPYRIGHT 2016,2019 # [+] International Business Machines Corp. # # @@ -2872,7 +2872,7 @@ group gNXCQFIR /** NXCQFIR[19] * Uncorrectable error on ERAT arrays */ - (rNXCQFIR, bit(19)) ? nx_th_32perDay; + (rNXCQFIR, bit(19)) ? nx_th_1; /** NXCQFIR[20] * SUE on ERAT arrays diff --git a/src/usr/diag/prdf/common/plat/nimbus/nimbus_proc_actions.rule b/src/usr/diag/prdf/common/plat/nimbus/nimbus_proc_actions.rule index 826308710..1960da53b 100644 --- a/src/usr/diag/prdf/common/plat/nimbus/nimbus_proc_actions.rule +++ b/src/usr/diag/prdf/common/plat/nimbus/nimbus_proc_actions.rule @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2018 +# Contributors Listed Below - COPYRIGHT 2018,2019 # [+] International Business Machines Corp. # # @@ -23,9 +23,15 @@ # # IBM_PROLOG_END_TAG -############################################################################### +################################################################################ +# Analyze +################################################################################ + +actionclass analyzeENHCAFIR { analyze(gENHCAFIR); }; + +################################################################################ # Analyze connected -############################################################################### +################################################################################ actionclass analyzeConnectedMCBIST0 { analyze(connected(TYPE_MCBIST, 0)); }; actionclass analyzeConnectedMCBIST1 { analyze(connected(TYPE_MCBIST, 1)); }; diff --git a/src/usr/diag/prdf/common/plat/p9/p9_common_actions.rule b/src/usr/diag/prdf/common/plat/p9/p9_common_actions.rule index 669d3e5b5..2e7e32869 100644 --- a/src/usr/diag/prdf/common/plat/p9/p9_common_actions.rule +++ b/src/usr/diag/prdf/common/plat/p9/p9_common_actions.rule @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2016,2018 +# Contributors Listed Below - COPYRIGHT 2016,2019 # [+] International Business Machines Corp. # # @@ -208,6 +208,12 @@ actionclass parent_proc_th_1 threshold1; }; +actionclass parent_proc_th_32perDay +{ + callout(connected(TYPE_PROC), MRU_MED); + threshold32pday; +}; + actionclass level2_M_proc_L_th_1 { callout2ndLvlMed; @@ -273,4 +279,3 @@ actionclass chip_to_chip calloutSelfMed; threshold1; }; - diff --git a/src/usr/diag/prdf/common/plat/p9/p9_common_obus_actions.rule b/src/usr/diag/prdf/common/plat/p9/p9_common_obus_actions.rule index 6590bb122..700e87649 100644 --- a/src/usr/diag/prdf/common/plat/p9/p9_common_obus_actions.rule +++ b/src/usr/diag/prdf/common/plat/p9/p9_common_obus_actions.rule @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2018 +# Contributors Listed Below - COPYRIGHT 2018,2019 # [+] International Business Machines Corp. # # @@ -88,6 +88,150 @@ actionclass obusSmpFailure_L1 threshold1; }; +actionclass smp_masked +{ + # If SMP mode, does defaultMaskedError action and returns SUCCESS. + # Otherwise, returns PRD_SCAN_COMM_REGISTER_ZERO. + funccall( "smp_masked" ); # If SMP mode +}; + +actionclass non_smp_masked +{ + # If NOT in SMP mode, does defaultMaskedError action and returns SUCCESS. + # Otherwise, returns PRD_SCAN_COMM_REGISTER_ZERO. + funccall( "non_smp_masked" ); +}; + +actionclass non_smp_callout_bus_th_1 +{ + # NOTE: We cannot put the threshold action in this actionclass because it + # will affect the SMP action in the try() statement. Therefore, the + # plugin must handle the thresholding if in non-SMP mode. + + # If NOT in SMP mode: + # - calls out this OBUS + # - indicates the probably may be somewhere between this OBUS and whatever + # is on the other side (which we know nothing about) + # - sets threshold + # - sets service call + # - returns SUCCESS + # Otherwise + # - returns PRD_SCAN_COMM_REGISTER_ZERO + funccall( "non_smp_callout_bus_th_1" ); +}; + +actionclass non_smp_callout_lvl2_th_1 +{ + # NOTE: We cannot put the threshold action in this actionclass because it + # will affect the SMP action in the try() statement. Therefore, the + # plugin must handle the thresholding if in non-SMP mode. + + # If NOT in SMP mode: + # - calls out level 2 support + # - sets threshold + # - sets service call + # - returns SUCCESS + # Otherwise + # - returns PRD_SCAN_COMM_REGISTER_ZERO + funccall( "non_smp_callout_lvl2_th_1" ); +}; + +actionclass non_smp_callout_self_th_32perDay +{ + threshold32pday; # This is ok because it is greater than threshold1. + + # If NOT in SMP mode: + # - calls out this OBUS + # - returns SUCCESS + # Otherwise + # - returns PRD_SCAN_COMM_REGISTER_ZERO + funccall( "non_smp_callout_self" ); +}; + +actionclass threshold_and_mask_self_non_smp_only +{ + # SMP: masked + # Non-SMP: threshold_and_mask_self + try ( smp_masked, threshold_and_mask_self ); +}; + +actionclass threshold_and_mask_self_smp_only +{ + # SMP: threshold_and_mask_self + # Non-SMP: masked + try ( non_smp_masked, threshold_and_mask_self ); +}; + +actionclass obusSmpCallout_L0_smp_only +{ + # SMP: obusSmpCallout_L0 + # Non-SMP: masked + try ( non_smp_masked, obusSmpCallout_L0 ); +}; + +actionclass obusSmpCallout_L1_smp_only +{ + # SMP: obusSmpCallout_L1 + # Non-SMP: masked + try ( non_smp_masked, obusSmpCallout_L1 ); +}; + +actionclass spare_lane_degraded_mode_L0 +{ + # SMP: obusSmpCallout_th32_L0 (lane spare) + # Non-SMP: non_smp_callout_bus_th_1 (degraded mode) + try ( non_smp_callout_bus_th_1, obusSmpCallout_th32_L0 ); +}; + +actionclass spare_lane_degraded_mode_L1 +{ + # SMP: obusSmpCallout_th32_L1 (lane spare) + # Non-SMP: non_smp_callout_bus_th_1 (degraded mode) + try ( non_smp_callout_bus_th_1, obusSmpCallout_th32_L1 ); +}; + +actionclass training_failure_L0 +{ + # SMP: obusSmpFailure_L0 + # Non-SMP: non_smp_callout_lvl2_th_1 + try ( non_smp_callout_lvl2_th_1, obusSmpFailure_L0 ); +}; + +actionclass training_failure_L1 +{ + # SMP: obusSmpFailure_L1 + # Non-SMP: non_smp_callout_lvl2_th_1 + try ( non_smp_callout_lvl2_th_1, obusSmpFailure_L1 ); +}; + +actionclass unrecoverable_error_L0 +{ + # SMP: obusSmpFailure_L0 + # Non-SMP: non_smp_callout_bus_th_1 + try ( non_smp_callout_bus_th_1, obusSmpFailure_L0 ); +}; + +actionclass unrecoverable_error_L1 +{ + # SMP: obusSmpFailure_L1 + # Non-SMP: non_smp_callout_bus_th_1 + try ( non_smp_callout_bus_th_1, obusSmpFailure_L1 ); +}; + +actionclass internal_error_L0 +{ + # SMP: obusSmpFailure_L0 + # Non-SMP: non_smp_callout_self_th_32perDay + try ( non_smp_callout_self_th_32perDay, obusSmpFailure_L0 ); +}; + +actionclass internal_error_L1 +{ + # SMP: obusSmpFailure_L1 + # Non-SMP: non_smp_callout_self_th_32perDay + try ( non_smp_callout_self_th_32perDay, obusSmpFailure_L1 ); +}; + ############################################################################### # Analyze groups ############################################################################### diff --git a/src/usr/diag/prdf/common/plat/p9/p9_common_obus_regs.rule b/src/usr/diag/prdf/common/plat/p9/p9_common_obus_regs.rule index 461fbc664..bc25fba5d 100644 --- a/src/usr/diag/prdf/common/plat/p9/p9_common_obus_regs.rule +++ b/src/usr/diag/prdf/common/plat/p9/p9_common_obus_regs.rule @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2016,2018 +# Contributors Listed Below - COPYRIGHT 2016,2019 # [+] International Business Machines Corp. # # @@ -85,6 +85,13 @@ capture group default; }; + register MISC_ERROR_STATUS + { + name "P9 OBUS target Misc Error Status register"; + scomaddr 0x09010829; + capture group default; + }; + ############################################################################ # P9 OBUS targets for cable FFDC # One additional reg (IOOLFIR) is in default group diff --git a/src/usr/diag/prdf/common/plat/p9/p9_common_proc_actions.rule b/src/usr/diag/prdf/common/plat/p9/p9_common_proc_actions.rule index aacf978bd..e5700c34b 100644 --- a/src/usr/diag/prdf/common/plat/p9/p9_common_proc_actions.rule +++ b/src/usr/diag/prdf/common/plat/p9/p9_common_proc_actions.rule @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2017,2018 +# Contributors Listed Below - COPYRIGHT 2017,2019 # [+] International Business Machines Corp. # # @@ -670,7 +670,6 @@ actionclass analyzePBIOOFIR { analyze(gPBIOOFIR ); }; actionclass analyzePBAFIR { analyze(gPBAFIR ); }; actionclass analyzePSIHBFIR { analyze(gPSIHBFIR ); }; actionclass analyzePBAMFIR { analyze(gPBAMFIR ); }; -actionclass analyzeENHCAFIR { analyze(gENHCAFIR ); }; actionclass analyzeXB_LFIR { analyze(gXB_LFIR ); }; actionclass analyzeXBPPEFIR { analyze(gXBPPEFIR ); }; diff --git a/src/usr/diag/prdf/common/plat/p9/prdfCommonPlugins.C b/src/usr/diag/prdf/common/plat/p9/prdfCommonPlugins.C index ece3fc1a8..730f99f09 100644 --- a/src/usr/diag/prdf/common/plat/p9/prdfCommonPlugins.C +++ b/src/usr/diag/prdf/common/plat/p9/prdfCommonPlugins.C @@ -127,6 +127,88 @@ PRDF_PLUGIN_DEFINE_NS(nimbus_proc, CommonPlugins, ClearServiceCallFlag_mnfgInfo PRDF_PLUGIN_DEFINE_NS(cumulus_proc, CommonPlugins, ClearServiceCallFlag_mnfgInfo); PRDF_PLUGIN_DEFINE_NS(axone_proc, CommonPlugins, ClearServiceCallFlag_mnfgInfo); +/** + * @brief Will change the gard state of any NVDIMMs in the callout list to + * NO_GARD. + * @param i_chip The chip. + * @param io_sc The step code data struct. + * @returns SUCCESS + */ +int32_t ClearNvdimmGardState( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + #ifdef __HOSTBOOT_MODULE + + // Call the sdc to clear the NVDIMM mru list. + io_sc.service_data->clearNvdimmMruListGard(); + + #endif + + return SUCCESS; +} +PRDF_PLUGIN_DEFINE_NS(nimbus_mca, CommonPlugins, ClearNvdimmGardState); + +/** + * @brief Will check if any of the DIMMs connected to this chip are NVDIMMs + * and send a message to PHYP/Hostboot that save/restore may work. If + * we are at IPL, we will callout self no gard instead of garding. + * @param i_chip The chip of the DIMM parent. + * @param io_sc The step code data struct. + * @returns SUCCESS if NVDIMMs found at IPL, PRD_SCAN_COMM_REGISTER_ZERO if not. + */ +int32_t CheckForNvdimms( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + int32_t rc = PRD_SCAN_COMM_REGISTER_ZERO; + + #ifdef CONFIG_NVDIMM + #ifdef __HOSTBOOT_MODULE + + TargetHandleList dimmList = getConnected( i_chip->getTrgt(), TYPE_DIMM ); + + // Always loop through all the dimms so we send the + // nvdimmNotifyProtChange message for all the NVDIMMs on the target. + for ( auto & dimm : dimmList ) + { + // If the callout target is an NVDIMM send a message to + // PHYP/Hostboot that a save/restore may work, and if we are at + // IPL, do not gard the target. + if ( isNVDIMM(dimm) ) + { + // Send the message to PHYP/Hostboot + uint32_t l_rc = PlatServices::nvdimmNotifyProtChange( dimm, + NVDIMM::NVDIMM_RISKY_HW_ERROR ); + if ( SUCCESS != l_rc ) + { + PRDF_TRAC( "CheckForNvdimms: nvdimmNotifyProtChange(0x%08x)" + " failed.", PlatServices::getHuid(dimm) ); + continue; + } + + #ifndef __HOSTBOOT_RUNTIME + // IPL + // We will callout self, no gard. No need for another self callout + // from the rule code, so return SUCCESS. + rc = SUCCESS; + #endif + } + } + + if ( SUCCESS == rc ) + { + // Callout self, no gard + io_sc.service_data->SetCallout( i_chip->getTrgt(), MRU_MED, NO_GARD ); + } + + #endif // __HOSTBOOT_MODULE + #endif // CONFIG_NVDIMM + + return rc; +} +PRDF_PLUGIN_DEFINE_NS(nimbus_mcs, CommonPlugins, CheckForNvdimms); +PRDF_PLUGIN_DEFINE_NS(nimbus_mca, CommonPlugins, CheckForNvdimms); +PRDF_PLUGIN_DEFINE_NS(nimbus_mcbist, CommonPlugins, CheckForNvdimms); + } // namespace CommonPlugins ends }// namespace PRDF ends diff --git a/src/usr/diag/prdf/common/plat/p9/prdfLaneRepair.C b/src/usr/diag/prdf/common/plat/p9/prdfLaneRepair.C index 6cb4e6535..6ad889fd5 100644 --- a/src/usr/diag/prdf/common/plat/p9/prdfLaneRepair.C +++ b/src/usr/diag/prdf/common/plat/p9/prdfLaneRepair.C @@ -75,6 +75,16 @@ TargetHandle_t getTxBusEndPt( TargetHandle_t i_rxTrgt) // grab connected DMI parent o_txTrgt = getConnectedParent( i_rxTrgt, TYPE_DMI ); } + else if ( TYPE_OMI == busType ) + { + // Get connected child OCMB (one OCMB per OMI) + o_txTrgt = getConnectedChild( i_rxTrgt, TYPE_OCMB_CHIP, 0 ); + } + else if ( TYPE_OCMB_CHIP == busType ) + { + // Get connected parent OMI + o_txTrgt = getConnectedParent( i_rxTrgt, TYPE_OMI ); + } PRDF_ASSERT(nullptr != o_txTrgt); return o_txTrgt; @@ -310,38 +320,6 @@ int32_t __handleLaneRepairEvent( ExtensibleChip * i_chip, #undef PRDF_FUNC } -template<> -int32_t __handleLaneRepairEvent<TYPE_OBUS, TYPE_OBUS>( ExtensibleChip * i_chip, - STEP_CODE_DATA_STRUCT & i_sc, - bool i_spareDeployed ) -{ - TargetHandle_t rxBusTgt = i_chip->getTrgt(); - - // Make predictive on first occurrence in MFG - if ( isLaneRepairDisabled<TYPE_OBUS>() ) - { - i_sc.service_data->setServiceCall(); - } - - // RTC 174485 - // Need HWPs for this. Just callout bus interface for now. - if ( obusInSmpMode(rxBusTgt) ) - { - calloutBusInterface( i_chip, i_sc, MRU_LOW ); - i_sc.service_data->setServiceCall(); - } - else - { - PRDF_ERR( "__handleLaneRepairEvent: Lane repair only supported " - "in SMP mode obus: 0x%08x", getHuid(rxBusTgt) ); - i_sc.service_data->SetCallout( LEVEL2_SUPPORT, MRU_MED, NO_GARD ); - i_sc.service_data->SetCallout( SP_CODE, MRU_MED, NO_GARD ); - i_sc.service_data->setServiceCall(); - } - return SUCCESS; -} - - int32_t handleLaneRepairEvent( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & i_sc, bool i_spareDeployed ) @@ -350,10 +328,6 @@ int32_t handleLaneRepairEvent( ExtensibleChip * i_chip, TYPE trgtType = getTargetType(i_chip->getTrgt()); switch (trgtType) { - case TYPE_OBUS: - rc = __handleLaneRepairEvent<TYPE_OBUS,TYPE_OBUS>( i_chip, i_sc, - i_spareDeployed ); - break; case TYPE_XBUS: rc = __handleLaneRepairEvent<TYPE_XBUS,TYPE_XBUS>( i_chip, i_sc, i_spareDeployed ); @@ -729,6 +703,8 @@ void obus_clearMaskFail( errlHndl_t &io_errl, TargetHandle_t &i_rxTrgt, PRDF_ASSERT( NULL != i_txTrgt ); PRDF_ASSERT( NULL != io_errl ); +#ifdef __HOSTBOOT_MODULE // register writes not allowed on FSP + uint32_t l_rc = SUCCESS; ExtensibleChip *l_rxChip = (ExtensibleChip *)systemPtr->GetChip( i_rxTrgt ); @@ -790,6 +766,8 @@ void obus_clearMaskFail( errlHndl_t &io_errl, TargetHandle_t &i_rxTrgt, } while (0); +#endif // __HOSTBOOT_MODULE + } // end obus_clearMaskFail @@ -924,7 +902,7 @@ PRDF_PLUGIN_DEFINE_NS( cumulus_proc, LaneRepair, captureSmpObus3 ); PRDF_PLUGIN_DEFINE_NS( nimbus_proc, LaneRepair, captureSmpObus3 ); PRDF_PLUGIN_DEFINE_NS( axone_proc, LaneRepair, captureSmpObus3 ); -int32_t calloutBusInterface( ExtensibleChip * i_chip, +int32_t calloutBusInterface( TargetHandle_t i_rxTrgt, STEP_CODE_DATA_STRUCT & i_sc, PRDpriority i_priority ) { @@ -934,10 +912,9 @@ int32_t calloutBusInterface( ExtensibleChip * i_chip, do { // Get both endpoints - TargetHandle_t rxTrgt = i_chip->getTrgt(); - TYPE rxType = getTargetType(rxTrgt); + TYPE rxType = getTargetType(i_rxTrgt); - if ( rxType == TYPE_OBUS && !obusInSmpMode( rxTrgt ) ) + if ( rxType == TYPE_OBUS && !obusInSmpMode( i_rxTrgt ) ) { // There is no support in hostboot for calling out the other end of // an NV or openCAPI bus. By design, any FIR bits associated with @@ -945,7 +922,7 @@ int32_t calloutBusInterface( ExtensibleChip * i_chip, // action. So if we hit this case, just make a default callout. PRDF_ERR( PRDF_FUNC "Lane repair only supported in SMP mode " - "obus: 0x%08x", getHuid(rxTrgt) ); + "obus: 0x%08x", getHuid(i_rxTrgt) ); i_sc.service_data->SetCallout( LEVEL2_SUPPORT, MRU_MED, NO_GARD ); i_sc.service_data->SetCallout( SP_CODE, MRU_MED, NO_GARD ); @@ -953,11 +930,11 @@ int32_t calloutBusInterface( ExtensibleChip * i_chip, break; } - TargetHandle_t txTrgt = getTxBusEndPt(rxTrgt); + TargetHandle_t txTrgt = getTxBusEndPt(i_rxTrgt); TYPE txType = getTargetType(txTrgt); // Add the endpoint target callouts - i_sc.service_data->SetCallout( rxTrgt, MRU_MEDA ); + i_sc.service_data->SetCallout( i_rxTrgt, MRU_MEDA ); i_sc.service_data->SetCallout( txTrgt, MRU_MEDA); // Get the HWAS bus type. @@ -975,6 +952,11 @@ int32_t calloutBusInterface( ExtensibleChip * i_chip, { hwasType = HWAS::DMI_BUS_TYPE; } + else if ( (TYPE_OMI == rxType && TYPE_OCMB_CHIP == txType) || + (TYPE_OCMB_CHIP == rxType && TYPE_OMI == txType) ) + { + hwasType = HWAS::OMI_BUS_TYPE; + } else { PRDF_ASSERT( false ); @@ -990,7 +972,7 @@ int32_t calloutBusInterface( ExtensibleChip * i_chip, } // Callout this bus interface. - PRDF_ADD_BUS_CALLOUT( errl, rxTrgt, txTrgt, hwasType, i_priority ); + PRDF_ADD_BUS_CALLOUT( errl, i_rxTrgt, txTrgt, hwasType, i_priority ); } while(0); @@ -1020,9 +1002,6 @@ int32_t spareDeployed( ExtensibleChip * i_chip, PRDF_PLUGIN_DEFINE_NS( nimbus_xbus, LaneRepair, spareDeployed ); PRDF_PLUGIN_DEFINE_NS( cumulus_xbus, LaneRepair, spareDeployed ); PRDF_PLUGIN_DEFINE_NS( axone_xbus, LaneRepair, spareDeployed ); -PRDF_PLUGIN_DEFINE_NS( nimbus_obus, LaneRepair, spareDeployed ); -PRDF_PLUGIN_DEFINE_NS( cumulus_obus, LaneRepair, spareDeployed ); -PRDF_PLUGIN_DEFINE_NS( axone_obus, LaneRepair, spareDeployed ); PRDF_PLUGIN_DEFINE_NS( centaur_membuf, LaneRepair, spareDeployed ); /** @@ -1042,9 +1021,6 @@ int32_t maxSparesExceeded( ExtensibleChip * i_chip, PRDF_PLUGIN_DEFINE_NS( nimbus_xbus, LaneRepair, maxSparesExceeded ); PRDF_PLUGIN_DEFINE_NS( cumulus_xbus, LaneRepair, maxSparesExceeded ); PRDF_PLUGIN_DEFINE_NS( axone_xbus, LaneRepair, maxSparesExceeded ); -PRDF_PLUGIN_DEFINE_NS( nimbus_obus, LaneRepair, maxSparesExceeded ); -PRDF_PLUGIN_DEFINE_NS( cumulus_obus, LaneRepair, maxSparesExceeded ); -PRDF_PLUGIN_DEFINE_NS( axone_obus, LaneRepair, maxSparesExceeded ); PRDF_PLUGIN_DEFINE_NS( centaur_membuf, LaneRepair, maxSparesExceeded ); /** @@ -1064,9 +1040,6 @@ int32_t tooManyBusErrors( ExtensibleChip * i_chip, PRDF_PLUGIN_DEFINE_NS( nimbus_xbus, LaneRepair, tooManyBusErrors ); PRDF_PLUGIN_DEFINE_NS( cumulus_xbus, LaneRepair, tooManyBusErrors ); PRDF_PLUGIN_DEFINE_NS( axone_xbus, LaneRepair, tooManyBusErrors ); -PRDF_PLUGIN_DEFINE_NS( nimbus_obus, LaneRepair, tooManyBusErrors ); -PRDF_PLUGIN_DEFINE_NS( cumulus_obus, LaneRepair, tooManyBusErrors ); -PRDF_PLUGIN_DEFINE_NS( axone_obus, LaneRepair, tooManyBusErrors ); PRDF_PLUGIN_DEFINE_NS( centaur_membuf, LaneRepair, tooManyBusErrors ); /** @@ -1078,18 +1051,53 @@ PRDF_PLUGIN_DEFINE_NS( centaur_membuf, LaneRepair, tooManyBusErrors ); int32_t calloutBusInterfacePlugin( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc ) { - calloutBusInterface(i_chip, io_sc, MRU_LOW); + calloutBusInterface(i_chip->getTrgt(), io_sc, MRU_LOW); return SUCCESS; } PRDF_PLUGIN_DEFINE_NS( nimbus_xbus, LaneRepair, calloutBusInterfacePlugin ); PRDF_PLUGIN_DEFINE_NS( cumulus_xbus, LaneRepair, calloutBusInterfacePlugin ); PRDF_PLUGIN_DEFINE_NS( axone_xbus, LaneRepair, calloutBusInterfacePlugin ); -PRDF_PLUGIN_DEFINE_NS( nimbus_obus, LaneRepair, calloutBusInterfacePlugin ); -PRDF_PLUGIN_DEFINE_NS( cumulus_obus, LaneRepair, calloutBusInterfacePlugin ); -PRDF_PLUGIN_DEFINE_NS( axone_obus, LaneRepair, calloutBusInterfacePlugin ); +PRDF_PLUGIN_DEFINE_NS( explorer_ocmb, LaneRepair, calloutBusInterfacePlugin ); PRDF_PLUGIN_DEFINE_NS( cumulus_dmi, LaneRepair, calloutBusInterfacePlugin ); PRDF_PLUGIN_DEFINE_NS( centaur_membuf, LaneRepair, calloutBusInterfacePlugin ); +/** + * @brief Add callouts for a BUS interface inputting an OMIC or MCC target + * @param i_chip OMIC/MCC chip + * @param io_sc Step code data struct. + * @param i_pos The position of the OMI relative to the OMIC/MCC. + * @return SUCCESS always + */ + +int32_t omiParentCalloutBusInterfacePlugin( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc, + uint8_t i_pos ) +{ + TargetHandle_t omi = getConnectedChild(i_chip->getTrgt(), TYPE_OMI, i_pos); + TargetHandle_t ocmb = getConnectedChild( omi, TYPE_OCMB_CHIP, 0 ); + + // Callout both ends of the bus as well (OMI and OCMB) + io_sc.service_data->SetCallout( omi, MRU_MEDA ); + io_sc.service_data->SetCallout( ocmb, MRU_MEDA ); + + calloutBusInterface(omi, io_sc, MRU_LOW); + return SUCCESS; +} + +#define OMI_PARENT_CALL_BUS_PLUGIN( POS ) \ +int32_t omiParentCalloutBusInterfacePlugin_##POS( ExtensibleChip * i_chip, \ + STEP_CODE_DATA_STRUCT & io_sc ) \ +{ \ + return omiParentCalloutBusInterfacePlugin( i_chip, io_sc, POS ); \ +} \ +PRDF_PLUGIN_DEFINE_NS( axone_omic, LaneRepair, \ + omiParentCalloutBusInterfacePlugin_##POS );\ +PRDF_PLUGIN_DEFINE_NS( axone_mcc, LaneRepair, \ + omiParentCalloutBusInterfacePlugin_##POS ); + +OMI_PARENT_CALL_BUS_PLUGIN( 0 ); +OMI_PARENT_CALL_BUS_PLUGIN( 1 ); +OMI_PARENT_CALL_BUS_PLUGIN( 2 ); //------------------------------------------------------------------------------ diff --git a/src/usr/diag/prdf/common/plat/p9/prdfLaneRepair.H b/src/usr/diag/prdf/common/plat/p9/prdfLaneRepair.H index afc834e29..3f5a3f33c 100644 --- a/src/usr/diag/prdf/common/plat/p9/prdfLaneRepair.H +++ b/src/usr/diag/prdf/common/plat/p9/prdfLaneRepair.H @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2017 */ +/* Contributors Listed Below - COPYRIGHT 2017,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -56,12 +56,12 @@ int32_t handleLaneRepairEvent (ExtensibleChip * i_chip, /** * @brief Will add target bus interface endpoints and all parts in between the * endpoints to the global error log in RasServices. - * @param i_chip RX-side chip of bus interface - * @param i_sc The step code data struct. + * @param i_rxTrgt RX-side target of bus interface + * @param i_sc The step code data struct. * @param i_priority Callout priority (default MRU_LOW). * @return Non-SUCCESS if an internal function fails. SUCCESS otherwise. */ -int32_t calloutBusInterface( ExtensibleChip * i_chip, +int32_t calloutBusInterface( TARGETING::TargetHandle_t i_rxTrgt, STEP_CODE_DATA_STRUCT & i_sc, PRDpriority i_priority = MRU_LOW ); diff --git a/src/usr/diag/prdf/common/plat/p9/prdfP9Configurator.C b/src/usr/diag/prdf/common/plat/p9/prdfP9Configurator.C index e37cffcd3..7c3033dc2 100755 --- a/src/usr/diag/prdf/common/plat/p9/prdfP9Configurator.C +++ b/src/usr/diag/prdf/common/plat/p9/prdfP9Configurator.C @@ -243,7 +243,7 @@ errlHndl_t PlatConfigurator::addDomainChips( TARGETING::TYPE i_type, { errlHndl_t errl = nullptr; - std::map<TARGETING::MODEL, std::map<TARGETING::TYPE, const char *>> fnMap = + std::map<uint32_t, std::map<TARGETING::TYPE, const char *>> fnMap = { { MODEL_NIMBUS, { { TYPE_PROC, nimbus_proc }, { TYPE_EQ, nimbus_eq }, @@ -285,7 +285,14 @@ errlHndl_t PlatConfigurator::addDomainChips( TARGETING::TYPE i_type, { TYPE_MI, axone_mi }, { TYPE_MCC, axone_mcc }, { TYPE_OMIC, axone_omic }, } }, - { MODEL_EXPLORER, { { TYPE_OCMB_CHIP, explorer_ocmb }, } }, + #ifdef __HOSTBOOT_MODULE + { POWER_CHIPID::EXPLORER_16, { { TYPE_OCMB_CHIP, explorer_ocmb }, } }, + #endif + // OCMB is not supported on FSP, however we need support here for the + // MODEL_OCMB model for our simulator to work. + #ifdef ESW_SIM_COMPILE + { MODEL_OCMB, { { TYPE_OCMB_CHIP, explorer_ocmb }, } }, + #endif }; // Get references to factory objects. @@ -299,7 +306,19 @@ errlHndl_t PlatConfigurator::addDomainChips( TARGETING::TYPE i_type, // Iterate all the targets for this type and add to given domain. for ( const auto & trgt : getFunctionalTargetList(i_type) ) { - TARGETING::MODEL model = getChipModel( trgt ); + uint32_t model = getChipModel( trgt ); + + #ifdef __HOSTBOOT_MODULE + // Special case for OCMBs (hostboot only issue for P9). + if ( MODEL_OCMB == model ) + { + // Use the chip ID instead of model. + model = getChipId( trgt ); + + // Skip Gemini OCMBs. They can exist, but PRD won't support them. + if ( POWER_CHIPID::GEMINI_16 == model ) continue; + } + #endif // Ensure this model is supported. if ( fnMap.end() == fnMap.find(model) ) @@ -350,8 +369,6 @@ errlHndl_t PlatConfigurator::addDomainChips( TARGETING::TYPE i_type, scanFac, resFac ); break; - // TODO RTC 199020 - add the pll domains for axone - default: ; } } diff --git a/src/usr/diag/prdf/common/plat/p9/prdfP9Obus.C b/src/usr/diag/prdf/common/plat/p9/prdfP9Obus.C new file mode 100644 index 000000000..6117c6edc --- /dev/null +++ b/src/usr/diag/prdf/common/plat/p9/prdfP9Obus.C @@ -0,0 +1,193 @@ +/* IBM_PROLOG_BEGIN_TAG */ +/* This is an automatically generated prolog. */ +/* */ +/* $Source: src/usr/diag/prdf/common/plat/p9/prdfP9Obus.C $ */ +/* */ +/* OpenPOWER HostBoot Project */ +/* */ +/* Contributors Listed Below - COPYRIGHT 2019 */ +/* [+] International Business Machines Corp. */ +/* */ +/* */ +/* Licensed under the Apache License, Version 2.0 (the "License"); */ +/* you may not use this file except in compliance with the License. */ +/* You may obtain a copy of the License at */ +/* */ +/* http://www.apache.org/licenses/LICENSE-2.0 */ +/* */ +/* Unless required by applicable law or agreed to in writing, software */ +/* distributed under the License is distributed on an "AS IS" BASIS, */ +/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */ +/* implied. See the License for the specific language governing */ +/* permissions and limitations under the License. */ +/* */ +/* IBM_PROLOG_END_TAG */ + + +// Framework includes +#include <iipServiceDataCollector.h> +#include <prdfExtensibleChip.H> +#include <prdfPluginMap.H> + +// Platform includes +#include <prdfPlatServices.H> + +using namespace TARGETING; + +namespace PRDF +{ + +using namespace PlatServices; + +namespace obus +{ + +//############################################################################## +// +// IOOLFIR +// +//############################################################################## + +/** + * @brief If OBUS is in SMP mode, does defaultMaskedError actions and returns + * SUCCESS. Otherwise, returns PRD_SCAN_COMM_REGISTER_ZERO. + */ +int32_t smp_masked( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc ) +{ + if ( obusInSmpMode(i_chip->getTrgt()) ) + { + // SMP mode: This attention should be masked. + io_sc.service_data->SetCallout( LEVEL2_SUPPORT, MRU_MED, NO_GARD ); + io_sc.service_data->setFlag( ServiceDataCollector::AT_THRESHOLD ); + io_sc.service_data->setFlag( ServiceDataCollector::SERVICE_CALL ); + return SUCCESS; + } + else + { + // Non-SMP mode: Try some other action. + return PRD_SCAN_COMM_REGISTER_ZERO; + } +} +PRDF_PLUGIN_DEFINE_NS( nimbus_obus, obus, smp_masked ); +PRDF_PLUGIN_DEFINE_NS( cumulus_obus, obus, smp_masked ); +PRDF_PLUGIN_DEFINE_NS( axone_obus, obus, smp_masked ); + +//------------------------------------------------------------------------------ + +/** + * @brief If OBUS is NOT in SMP mode, does defaultMaskedError actions and + * returns SUCCESS. Otherwise, returns PRD_SCAN_COMM_REGISTER_ZERO. + */ +int32_t non_smp_masked( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc ) +{ + if ( obusInSmpMode(i_chip->getTrgt()) ) + { + // SMP mode: Try some other action. + return PRD_SCAN_COMM_REGISTER_ZERO; + } + else + { + // Non-SMP mode: This attention should be masked. + io_sc.service_data->SetCallout( LEVEL2_SUPPORT, MRU_MED, NO_GARD ); + io_sc.service_data->setFlag( ServiceDataCollector::AT_THRESHOLD ); + io_sc.service_data->setFlag( ServiceDataCollector::SERVICE_CALL ); + return SUCCESS; + } +} +PRDF_PLUGIN_DEFINE_NS( nimbus_obus, obus, non_smp_masked ); +PRDF_PLUGIN_DEFINE_NS( cumulus_obus, obus, non_smp_masked ); +PRDF_PLUGIN_DEFINE_NS( axone_obus, obus, non_smp_masked ); + +//------------------------------------------------------------------------------ + +/** + * @brief If OBUS is NOT in SMP mode, calls out this bus on first occurrence and + * returns SUCCESS. Otherwise, returns PRD_SCAN_COMM_REGISTER_ZERO. + */ +int32_t non_smp_callout_bus_th_1( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + if ( obusInSmpMode(i_chip->getTrgt()) ) + { + // SMP mode: Try some other action. + return PRD_SCAN_COMM_REGISTER_ZERO; + } + else + { + // Non-SMP mode: Callout this bus. Note that Hostboot does not know what + // is on the other side of this bus and does not have any control over + // garding/deconfiguring. Therefore, we cannot gard since we will never + // know if the other side of the bus has been replaced. Also, there is + // a small probability that the fault could be between the two + // endpoints. Usually, we would do a procedure callout or call some HWP + // that would take care of the "everything in between" scenario. + // However, there is no existing mechanism. For now callout level 2 + // support at low priority. + io_sc.service_data->SetCallout( i_chip->getTrgt(), MRU_MED, NO_GARD ); + io_sc.service_data->SetCallout( LEVEL2_SUPPORT, MRU_LOW, NO_GARD ); + io_sc.service_data->setFlag( ServiceDataCollector::AT_THRESHOLD ); + io_sc.service_data->setFlag( ServiceDataCollector::SERVICE_CALL ); + return SUCCESS; + } +} +PRDF_PLUGIN_DEFINE_NS( nimbus_obus, obus, non_smp_callout_bus_th_1 ); +PRDF_PLUGIN_DEFINE_NS( cumulus_obus, obus, non_smp_callout_bus_th_1 ); +PRDF_PLUGIN_DEFINE_NS( axone_obus, obus, non_smp_callout_bus_th_1 ); + +//------------------------------------------------------------------------------ + +/** + * @brief If OBUS is NOT in SMP mode, calls out level 2 support on first + * occurrence and returns SUCCESS. Otherwise, returns + * PRD_SCAN_COMM_REGISTER_ZERO. + */ +int32_t non_smp_callout_lvl2_th_1( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + if ( obusInSmpMode(i_chip->getTrgt()) ) + { + // SMP mode: Try some other action. + return PRD_SCAN_COMM_REGISTER_ZERO; + } + else + { + // Non-SMP mode: Callout this bus on first occurrence. + io_sc.service_data->SetCallout( LEVEL2_SUPPORT, MRU_MED, NO_GARD ); + io_sc.service_data->setFlag( ServiceDataCollector::AT_THRESHOLD ); + io_sc.service_data->setFlag( ServiceDataCollector::SERVICE_CALL ); + return SUCCESS; + } +} +PRDF_PLUGIN_DEFINE_NS( nimbus_obus, obus, non_smp_callout_lvl2_th_1 ); +PRDF_PLUGIN_DEFINE_NS( cumulus_obus, obus, non_smp_callout_lvl2_th_1 ); +PRDF_PLUGIN_DEFINE_NS( axone_obus, obus, non_smp_callout_lvl2_th_1 ); + +//------------------------------------------------------------------------------ + +/** + * @brief If OBUS is NOT in SMP mode, calls out this OBUS target and returns + * SUCCESS. Otherwise, returns PRD_SCAN_COMM_REGISTER_ZERO. + */ +int32_t non_smp_callout_self( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + if ( obusInSmpMode(i_chip->getTrgt()) ) + { + // SMP mode: Try some other action. + return PRD_SCAN_COMM_REGISTER_ZERO; + } + else + { + // Non-SMP mode: Callout this OBUS target. + io_sc.service_data->SetCallout( i_chip->getTrgt() ); + return SUCCESS; + } +} +PRDF_PLUGIN_DEFINE_NS( nimbus_obus, obus, non_smp_callout_self ); +PRDF_PLUGIN_DEFINE_NS( cumulus_obus, obus, non_smp_callout_self ); +PRDF_PLUGIN_DEFINE_NS( axone_obus, obus, non_smp_callout_self ); + +} // end namespace obus + +} // end namespace PRDF + diff --git a/src/usr/diag/prdf/common/plat/p9/prdfP9OcmbChipDomain.C b/src/usr/diag/prdf/common/plat/p9/prdfP9OcmbChipDomain.C new file mode 100644 index 000000000..2f6c25646 --- /dev/null +++ b/src/usr/diag/prdf/common/plat/p9/prdfP9OcmbChipDomain.C @@ -0,0 +1,78 @@ +/* IBM_PROLOG_BEGIN_TAG */ +/* This is an automatically generated prolog. */ +/* */ +/* $Source: src/usr/diag/prdf/common/plat/p9/prdfP9OcmbChipDomain.C $ */ +/* */ +/* OpenPOWER HostBoot Project */ +/* */ +/* Contributors Listed Below - COPYRIGHT 2019 */ +/* [+] International Business Machines Corp. */ +/* */ +/* */ +/* Licensed under the Apache License, Version 2.0 (the "License"); */ +/* you may not use this file except in compliance with the License. */ +/* You may obtain a copy of the License at */ +/* */ +/* http://www.apache.org/licenses/LICENSE-2.0 */ +/* */ +/* Unless required by applicable law or agreed to in writing, software */ +/* distributed under the License is distributed on an "AS IS" BASIS, */ +/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */ +/* implied. See the License for the specific language governing */ +/* permissions and limitations under the License. */ +/* */ +/* IBM_PROLOG_END_TAG */ + +/** + * @file prdfP9OcmbChipDomain.C + * @brief chip Plug-in code for OCMB domain + */ + +#include <prdfP9OcmbChipDomain.H> + +// Framework includes +#include <prdfExtensibleChip.H> +#include <prdfPlatServices.H> +#include <prdfTrace.H> +#include <prdfOcmbDataBundle.H> + +using namespace TARGETING; + +namespace PRDF +{ + +using namespace PlatServices; + +#ifdef __HOSTBOOT_RUNTIME +void OcmbChipDomain::handleRrFo() +{ + #define PRDF_FUNC "[OcmbChipDomain::handleRrFo] " + + do + { + uint32_t domainSize = GetSize(); + // Iterate all OCMBs in the domain. + for ( uint32_t i = 0; i < domainSize; ++i ) + { + RuleChip * ocmbChip = LookUp(i); + + // Start background scrub if required. + OcmbDataBundle * ocmbdb = getOcmbDataBundle( ocmbChip ); + int32_t l_rc = ocmbdb->getTdCtlr()->handleRrFo(); + if ( SUCCESS != l_rc ) + { + // Let us not fail here. If problem is contained within an OCMB + // we will discover it again during normal TD procedures. + PRDF_ERR( PRDF_FUNC "handleRrFo() failed: OCMB=0x%08x", + ocmbChip->GetId() ); + continue; // Keep going. + } + } + + } while (0); + + #undef PRDF_FUNC +} +#endif + +} // end namespace PRDF diff --git a/src/usr/diag/prdf/common/plat/p9/prdfP9OcmbChipDomain.H b/src/usr/diag/prdf/common/plat/p9/prdfP9OcmbChipDomain.H index 5546d9453..9f5776cac 100644 --- a/src/usr/diag/prdf/common/plat/p9/prdfP9OcmbChipDomain.H +++ b/src/usr/diag/prdf/common/plat/p9/prdfP9OcmbChipDomain.H @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2018 */ +/* Contributors Listed Below - COPYRIGHT 2018,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -54,6 +54,16 @@ class OcmbChipDomain : public RuleChipDomain virtual bool Query( ATTENTION_TYPE i_attnType ) { return false; } + #ifdef __HOSTBOOT_RUNTIME + + /** + * @brief Starts memory background scrubbing or VCM procedure for OCMB + * during R/R and F/O if required. + */ + void handleRrFo(); + + #endif + }; } // end namespace PRDF diff --git a/src/usr/diag/prdf/common/plat/p9/prdf_plat_p9.mk b/src/usr/diag/prdf/common/plat/p9/prdf_plat_p9.mk index cb69cad14..64092650f 100644 --- a/src/usr/diag/prdf/common/plat/p9/prdf_plat_p9.mk +++ b/src/usr/diag/prdf/common/plat/p9/prdf_plat_p9.mk @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2016,2018 +# Contributors Listed Below - COPYRIGHT 2016,2019 # [+] International Business Machines Corp. # # @@ -56,4 +56,5 @@ prd_rule_plugin += prdfP9Eq.o prd_rule_plugin += prdfP9TodPlugins.o prd_rule_plugin += prdfP9Dmi_common.o prd_rule_plugin += prdfP9Mc_common.o +prd_rule_plugin += prdfP9Obus.o diff --git a/src/usr/diag/prdf/common/plat/prdfPlatServices_common.C b/src/usr/diag/prdf/common/plat/prdfPlatServices_common.C index f99427d61..5cabaedc8 100644 --- a/src/usr/diag/prdf/common/plat/prdfPlatServices_common.C +++ b/src/usr/diag/prdf/common/plat/prdfPlatServices_common.C @@ -48,7 +48,6 @@ #include <p9_io_xbus_pdwn_lanes.H> #include <p9_io_xbus_clear_firs.H> #include <p9_io_erepairAccessorHwpFuncs.H> -#include <config.h> #include <p9_io_cen_read_erepair.H> #include <p9_io_cen_pdwn_lanes.H> #include <p9_io_dmi_read_erepair.H> @@ -695,6 +694,10 @@ uint32_t getBadDqBitmap( TargetHandle_t i_trgt, const MemRank & i_rank, o_rc = __getBadDqBitmap<fapi2::TARGET_TYPE_MEM_PORT>( i_trgt, i_rank, o_bitmap ); break; + case TYPE_OCMB_CHIP: + o_rc = __getBadDqBitmap<fapi2::TARGET_TYPE_OCMB_CHIP>( i_trgt, + i_rank, o_bitmap ); + break; default: PRDF_ERR( PRDF_FUNC "Invalid trgt type" ); o_rc = FAIL; @@ -777,6 +780,10 @@ uint32_t setBadDqBitmap( TargetHandle_t i_trgt, const MemRank & i_rank, o_rc = __setBadDqBitmap<fapi2::TARGET_TYPE_MEM_PORT>( i_trgt, i_rank, i_bitmap ); break; + case TYPE_OCMB_CHIP: + o_rc = __setBadDqBitmap<fapi2::TARGET_TYPE_OCMB_CHIP>( i_trgt, + i_rank, i_bitmap ); + break; default: PRDF_ERR( PRDF_FUNC "Invalid trgt type" ); o_rc = FAIL; @@ -872,6 +879,17 @@ void getDimmDqAttr<TYPE_MEM_PORT>( TargetHandle_t i_target, } // end function getDimmDqAttr template<> +void getDimmDqAttr<TYPE_OCMB_CHIP>( TargetHandle_t i_target, + uint8_t (&o_dqMapPtr)[DQS_PER_DIMM] ) +{ + PRDF_ASSERT( TYPE_OCMB_CHIP == getTargetType(i_target) ); + + // TODO RTC 210072 - Support for multiple ports per OCMB + TargetHandle_t memPort = getConnectedChild( i_target, TYPE_MEM_PORT, 0 ); + getDimmDqAttr<TYPE_MEM_PORT>( memPort, o_dqMapPtr ); +} + +template<> void getDimmDqAttr<TYPE_DIMM>( TargetHandle_t i_target, uint8_t (&o_dqMapPtr)[DQS_PER_DIMM] ) { @@ -947,15 +965,15 @@ int32_t mssGetSteerMux<TYPE_MBA>( TargetHandle_t i_mba, const MemRank & i_rank, } template<> -int32_t mssGetSteerMux<TYPE_MEM_PORT>( TargetHandle_t i_memPort, - const MemRank & i_rank, - MemSymbol & o_port0Spare, - MemSymbol & o_port1Spare, - MemSymbol & o_eccSpare ) +int32_t mssGetSteerMux<TYPE_OCMB_CHIP>( TargetHandle_t i_ocmb, + const MemRank & i_rank, + MemSymbol & o_port0Spare, + MemSymbol & o_port1Spare, + MemSymbol & o_eccSpare ) { int32_t o_rc = SUCCESS; - /* TODO RTC 207273 - sparing support + /* TODO RTC 199032 - sparing support // called by FSP code so can't just move to hostboot side #ifdef __HOSTBOOT_MODULE @@ -963,7 +981,7 @@ int32_t mssGetSteerMux<TYPE_MEM_PORT>( TargetHandle_t i_memPort, uint8_t port0Spare, port1Spare, eccSpare; - fapi2::Target<fapi2::TARGET_TYPE_MEM_PORT> fapiPort(i_memPort); + fapi2::Target<fapi2::TARGET_TYPE_OCMB_CHIP> fapiPort(i_ocmb); FAPI_INVOKE_HWP( errl, mss_check_steering, fapiPort, i_rank.getMaster(), port0Spare, port1Spare, eccSpare ); @@ -971,15 +989,15 @@ int32_t mssGetSteerMux<TYPE_MEM_PORT>( TargetHandle_t i_memPort, { PRDF_ERR( "[PlatServices::mssGetSteerMux] mss_check_steering() " "failed. HUID: 0x%08x rank: %d", - getHuid(i_memPort), i_rank.getMaster() ); + getHuid(i_ocmb), i_rank.getMaster() ); PRDF_COMMIT_ERRL( errl, ERRL_ACTION_REPORT ); o_rc = FAIL; } else { - o_port0Spare = MemSymbol::fromSymbol( i_memPort, i_rank, port0Spare ); - o_port1Spare = MemSymbol::fromSymbol( i_memPort, i_rank, port1Spare ); - o_eccSpare = MemSymbol::fromSymbol( i_memPort, i_rank, eccSpare ); + o_port0Spare = MemSymbol::fromSymbol( i_ocmb, i_rank, port0Spare ); + o_port1Spare = MemSymbol::fromSymbol( i_ocmb, i_rank, port1Spare ); + o_eccSpare = MemSymbol::fromSymbol( i_ocmb, i_rank, eccSpare ); } #endif */ @@ -1020,20 +1038,22 @@ int32_t mssSetSteerMux<TYPE_MBA>( TargetHandle_t i_mba, const MemRank & i_rank, } template<> -int32_t mssSetSteerMux<TYPE_MEM_PORT>( TargetHandle_t i_memPort, +int32_t mssSetSteerMux<TYPE_OCMB_CHIP>( TargetHandle_t i_memPort, const MemRank & i_rank, const MemSymbol & i_symbol, bool i_x4EccSpare ) { int32_t o_rc = SUCCESS; - /* TODO RTC 207273 - sparing support + /* TODO RTC 199032 - sparing support #ifdef __HOSTBOOT_MODULE errlHndl_t errl = NULL; fapi2::Target<fapi2::TARGET_TYPE_MEM_PORT> fapiPort(i_memPort); + TargetHandle_t dimm = getConnectedDimm( i_memPort, i_rank, + i_symbol.getPortSlct() ); uint8_t l_dramSymbol = PARSERUTILS::dram2Symbol<TYPE_MBA>( i_symbol.getDram(), - isDramWidthX4(i_memPort) ); + isDramWidthX4(dimm) ); FAPI_INVOKE_HWP( errl, mss_do_steering, fapiPort, i_rank.getMaster(), l_dramSymbol, @@ -1105,7 +1125,9 @@ int32_t getDimmSpareConfig<TYPE_MEM_PORT>( TargetHandle_t i_memPort, bool isFullByte = ( ENUM_ATTR_MEM_EFF_DIMM_SPARE_FULL_BYTE == o_spareConfig ); - bool isX4Dram = isDramWidthX4(i_memPort); + + TargetHandle_t dimm = getConnectedDimm( i_memPort, i_rank, i_ps ); + bool isX4Dram = isDramWidthX4(dimm); if ( ( isX4Dram && isFullByte ) || ( !isX4Dram && !isFullByte ) ) { @@ -1122,6 +1144,15 @@ int32_t getDimmSpareConfig<TYPE_MEM_PORT>( TargetHandle_t i_memPort, } template<> +int32_t getDimmSpareConfig<TYPE_OCMB_CHIP>( TargetHandle_t i_ocmb, + MemRank i_rank, uint8_t i_ps, uint8_t & o_spareConfig ) +{ + TargetHandle_t memPort = getConnectedChild( i_ocmb, TYPE_MEM_PORT, i_ps ); + return getDimmSpareConfig<TYPE_MEM_PORT>( memPort, i_rank, i_ps, + o_spareConfig ); +} + +template<> int32_t getDimmSpareConfig<TYPE_MBA>( TargetHandle_t i_mba, MemRank i_rank, uint8_t i_ps, uint8_t & o_spareConfig ) { @@ -1207,7 +1238,8 @@ uint32_t isDramSparingEnabled<TYPE_MEM_PORT>( TARGETING::TargetHandle_t i_trgt, do { - const bool isX4 = isDramWidthX4( i_trgt ); + TargetHandle_t dimm = getConnectedDimm( i_trgt, i_rank, i_ps ); + const bool isX4 = isDramWidthX4( dimm ); if ( isX4 ) { // Always an ECC spare in x4 mode. @@ -1216,9 +1248,7 @@ uint32_t isDramSparingEnabled<TYPE_MEM_PORT>( TARGETING::TargetHandle_t i_trgt, } // Check for any DRAM spares. - // TODO RTC 207273 - no TARGETING support for attr yet - //uint8_t cnfg = TARGETING::MEM_EFF_DIMM_SPARE_NO_SPARE; - uint8_t cnfg = 0; + uint8_t cnfg = TARGETING::MEM_EFF_DIMM_SPARE_NO_SPARE; o_rc = getDimmSpareConfig<TYPE_MEM_PORT>( i_trgt, i_rank, i_ps, cnfg ); if ( SUCCESS != o_rc ) { @@ -1226,9 +1256,7 @@ uint32_t isDramSparingEnabled<TYPE_MEM_PORT>( TARGETING::TargetHandle_t i_trgt, "failed", getHuid(i_trgt), i_rank.getKey(), i_ps ); break; } - // TODO RTC 207273 - no TARGETING support for attr yet - //o_spareEnable = (TARGETING::MEM_EFF_DIMM_SPARE_NO_SPARE; != cnfg); - o_spareEnable = (0 != cnfg); + o_spareEnable = (TARGETING::MEM_EFF_DIMM_SPARE_NO_SPARE != cnfg); }while(0); @@ -1303,12 +1331,22 @@ uint32_t isSpareAvailable( TARGETING::TargetHandle_t i_trgt, MemRank i_rank, if ( !dramSparingEnabled ) break; // Get the current spares in hardware + TargetHandle_t steerTrgt = i_trgt; MemSymbol sp0, sp1, ecc; - o_rc = mssGetSteerMux<T>( i_trgt, i_rank, sp0, sp1, ecc ); + if ( TYPE_MEM_PORT == T ) + { + steerTrgt = getConnectedParent( i_trgt, TYPE_OCMB_CHIP ); + o_rc = mssGetSteerMux<TYPE_OCMB_CHIP>( steerTrgt, i_rank, sp0, sp1, + ecc ); + } + else + { + o_rc = mssGetSteerMux<T>( steerTrgt, i_rank, sp0, sp1, ecc ); + } if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "mssGetSteerMux(0x%08x,0x%02x) failed", - getHuid(i_trgt), i_rank.getKey() ); + getHuid(steerTrgt), i_rank.getKey() ); break; } @@ -1353,6 +1391,10 @@ template uint32_t isSpareAvailable<TYPE_MBA>( TARGETING::TargetHandle_t i_trgt, MemRank i_rank, uint8_t i_ps, bool & o_spAvail, bool & o_eccAvail ); +template +uint32_t isSpareAvailable<TYPE_MEM_PORT>( TARGETING::TargetHandle_t i_trgt, + MemRank i_rank, uint8_t i_ps, bool & o_spAvail, bool & o_eccAvail ); + //------------------------------------------------------------------------------ template<> diff --git a/src/usr/diag/prdf/common/plat/prdfPlatServices_common.H b/src/usr/diag/prdf/common/plat/prdfPlatServices_common.H index 5d41d96e0..203703b42 100755 --- a/src/usr/diag/prdf/common/plat/prdfPlatServices_common.H +++ b/src/usr/diag/prdf/common/plat/prdfPlatServices_common.H @@ -193,7 +193,7 @@ bool obusInSmpMode(TARGETING::TargetHandle_t obusTgt); /** * @brief Reads the bad DQ bitmap attribute for both ports of the target rank. - * @param i_trgt A MCA/MBA/MEM_PORT target. + * @param i_trgt A MCA/MBA/MEM_PORT/OCMB_CHIP target. * @param i_rank Target rank. * @param o_bitmap DQ bitmap container. * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. @@ -203,7 +203,7 @@ uint32_t getBadDqBitmap( TARGETING::TargetHandle_t i_trgt, /** * @brief Writes the bad DQ bitmap attribute for both ports of the target rank. - * @param i_trgt A MCA/MBA/MEM_PORT target. + * @param i_trgt A MCA/MBA/MEM_PORT/OCMB_CHIP target. * @param i_rank Target rank. * @param i_bitmap DQ bitmap container. * @note This is a no-op if DRAM Repairs are disabled in manufacturing. @@ -215,7 +215,7 @@ uint32_t setBadDqBitmap( TARGETING::TargetHandle_t i_trgt, /** * @brief Clears the bad DQ bitmap attribute for all ports of the target rank. - * @param i_trgt A MCA/MBA/MEM_PORT target. + * @param i_trgt A MCA/MBA/MEM_PORT/OCMB_CHIP target. * @param i_rank Target rank. * @note This is a no-op if DRAM Repairs are disabled in manufacturing. * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. diff --git a/src/usr/diag/prdf/common/plat/prdfRasServices_common.C b/src/usr/diag/prdf/common/plat/prdfRasServices_common.C index 3f9ba2322..2742286b3 100755 --- a/src/usr/diag/prdf/common/plat/prdfRasServices_common.C +++ b/src/usr/diag/prdf/common/plat/prdfRasServices_common.C @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2016,2018 */ +/* Contributors Listed Below - COPYRIGHT 2016,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -891,12 +891,21 @@ void ErrDataService::deallocateDimms( const SDC_MRU_LIST & i_mruList ) for ( SDC_MRU_LIST::const_iterator it = i_mruList.begin(); it != i_mruList.end(); ++it ) { + PRDcallout thiscallout = it->callout; if ( PRDcalloutData::TYPE_TARGET == thiscallout.getType() ) { TargetHandle_t calloutTgt = thiscallout.getTarget(); TYPE tgtType = getTargetType( calloutTgt ); + #ifdef CONFIG_NVDIMM + // If the MRU's gard policy is set to NO_GARD, skip it. + if ( NO_GARD == it->gardState && isNVDIMM(calloutTgt) ) + { + continue; + } + #endif + if ( TYPE_L4 == tgtType ) { calloutTgt = getConnectedParent( calloutTgt, TYPE_MEMBUF ); @@ -932,7 +941,17 @@ void ErrDataService::deallocateDimms( const SDC_MRU_LIST & i_mruList ) dimm != dimms.end(); ++dimm ) { if ( TYPE_DIMM == getTargetType(*dimm) ) + { + #ifdef CONFIG_NVDIMM + // If the MRU's gard policy is set to NO_GARD, skip it. + if ( NO_GARD == it->gardState && isNVDIMM(*dimm) ) + { + continue; + } + #endif + dimmList.push_back(*dimm); + } } } } diff --git a/src/usr/diag/prdf/common/plat/prdfTargetServices.C b/src/usr/diag/prdf/common/plat/prdfTargetServices.C index 65f8b9cdc..d34c980ad 100755 --- a/src/usr/diag/prdf/common/plat/prdfTargetServices.C +++ b/src/usr/diag/prdf/common/plat/prdfTargetServices.C @@ -365,6 +365,20 @@ TARGETING::MODEL getChipModel( TARGETING::TargetHandle_t i_trgt ) //------------------------------------------------------------------------------ +#ifdef __HOSTBOOT_MODULE +uint32_t getChipId( TARGETING::TargetHandle_t i_trgt ) +{ + PRDF_ASSERT( NULL != i_trgt ); + + TargetHandle_t parent = getParentChip( i_trgt ); + PRDF_ASSERT( NULL != parent ); + + return parent->getAttr<ATTR_CHIP_ID>(); +} +#endif + +//------------------------------------------------------------------------------ + uint8_t getChipLevel( TARGETING::TargetHandle_t i_trgt ) { PRDF_ASSERT( NULL != i_trgt ); @@ -566,6 +580,7 @@ TargetService::ASSOCIATION_TYPE getAssociationType( TargetHandle_t i_target, { TYPE_MC, TYPE_PROC, TargetService::PARENT_BY_AFFINITY }, { TYPE_MC, TYPE_MI, TargetService::CHILD_BY_AFFINITY }, { TYPE_MC, TYPE_OMIC, TargetService::CHILD_BY_AFFINITY }, + { TYPE_MC, TYPE_MCC, TargetService::CHILD_BY_AFFINITY }, { TYPE_MC, TYPE_DMI, TargetService::CHILD_BY_AFFINITY }, { TYPE_MC, TYPE_DIMM, TargetService::CHILD_BY_AFFINITY }, @@ -579,13 +594,16 @@ TargetService::ASSOCIATION_TYPE getAssociationType( TargetHandle_t i_target, { TYPE_OMIC, TYPE_OMI, TargetService::CHILD_BY_AFFINITY }, { TYPE_MCC, TYPE_PROC, TargetService::PARENT_BY_AFFINITY }, + { TYPE_MCC, TYPE_MC, TargetService::PARENT_BY_AFFINITY }, { TYPE_MCC, TYPE_MI, TargetService::PARENT_BY_AFFINITY }, { TYPE_MCC, TYPE_OMI, TargetService::CHILD_BY_AFFINITY }, + { TYPE_MCC, TYPE_OCMB_CHIP, TargetService::CHILD_BY_AFFINITY }, { TYPE_OMI, TYPE_OMIC, TargetService::PARENT_BY_AFFINITY }, { TYPE_OMI, TYPE_MCC, TargetService::PARENT_BY_AFFINITY }, { TYPE_OMI, TYPE_OCMB_CHIP, TargetService::CHILD_BY_AFFINITY }, + { TYPE_OCMB_CHIP, TYPE_MCC, TargetService::PARENT_BY_AFFINITY }, { TYPE_OCMB_CHIP, TYPE_OMI, TargetService::PARENT_BY_AFFINITY }, { TYPE_OCMB_CHIP, TYPE_MEM_PORT,TargetService::CHILD_BY_AFFINITY }, { TYPE_OCMB_CHIP, TYPE_DIMM, TargetService::CHILD_BY_AFFINITY }, @@ -648,14 +666,30 @@ TargetHandleList getConnAssoc( TargetHandle_t i_target, TYPE i_connType, TargetHandleList o_list; // Default empty list - // Match any class, specified type, and functional. - PredicateCTM predType( CLASS_NA, i_connType ); - PredicateIsFunctional predFunc; - PredicatePostfixExpr predAnd; - predAnd.push(&predType).push(&predFunc).And(); + TYPE trgtType = getTargetType( i_target ); - targetService().getAssociated( o_list, i_target, i_assocType, - TargetService::ALL, &predAnd ); + // OMIC -> OMI and vice versa require special handling. + if ( TYPE_OMIC == trgtType && TYPE_OMI == i_connType ) + { + getChildOmiTargetsByState( o_list, i_target, CLASS_NA, TYPE_OMI, + UTIL_FILTER_FUNCTIONAL ); + } + else if ( TYPE_OMI == trgtType && TYPE_OMIC == i_connType ) + { + getParentOmicTargetsByState( o_list, i_target, CLASS_NA, TYPE_OMIC, + UTIL_FILTER_FUNCTIONAL ); + } + else + { + // Match any class, specified type, and functional. + PredicateCTM predType( CLASS_NA, i_connType ); + PredicateIsFunctional predFunc; + PredicatePostfixExpr predAnd; + predAnd.push(&predType).push(&predFunc).And(); + + targetService().getAssociated( o_list, i_target, i_assocType, + TargetService::ALL, &predAnd ); + } // Sort by target position. std::sort( o_list.begin(), o_list.end(), @@ -866,6 +900,17 @@ TargetHandle_t getConnectedChild( TargetHandle_t i_target, TYPE i_connType, (i_connPos == (miPos % MAX_MI_PER_MC)); } ); } + else if ( TYPE_MC == trgtType && TYPE_MCC == i_connType ) + { + // i_connPos is position relative to MC (0-3) + itr = std::find_if( list.begin(), list.end(), + [&](const TargetHandle_t & t) + { + uint32_t mccPos = getTargetPosition(t); + return (trgtPos == (mccPos / MAX_MCC_PER_MC)) && + (i_connPos == (mccPos % MAX_MCC_PER_MC)); + } ); + } else if ( TYPE_MC == trgtType && TYPE_DMI == i_connType ) { // i_connPos is position relative to MC (0-3) @@ -929,6 +974,17 @@ TargetHandle_t getConnectedChild( TargetHandle_t i_target, TYPE i_connType, (i_connPos == (omiPos % MAX_OMI_PER_MCC)); } ); } + else if ( TYPE_MCC == trgtType && TYPE_OCMB_CHIP == i_connType ) + { + // i_connPos is position relative to MCC (0-1) + itr = std::find_if( list.begin(), list.end(), + [&](const TargetHandle_t & t) + { + uint32_t ocmbPos = getTargetPosition(t); + return (trgtPos == (ocmbPos / MAX_OCMB_PER_MCC)) && + (i_connPos == (ocmbPos % MAX_OCMB_PER_MCC)); + } ); + } else if ( TYPE_MC == trgtType && TYPE_OMIC == i_connType ) { // i_connPos is position relative to MC (0-2) @@ -943,13 +999,17 @@ TargetHandle_t getConnectedChild( TargetHandle_t i_target, TYPE i_connType, else if ( TYPE_OMIC == trgtType && TYPE_OMI == i_connType ) { // i_connPos is position relative to OMIC (0-2) - itr = std::find_if( list.begin(), list.end(), - [&](const TargetHandle_t & t) - { - uint32_t omiPos = getTargetPosition(t); - return (trgtPos == (omiPos / MAX_OMI_PER_OMIC)) && - (i_connPos == (omiPos % MAX_OMI_PER_OMIC)); - } ); + for ( TargetHandleList::iterator trgtIt = list.begin(); + trgtIt != list.end(); trgtIt++ ) + { + uint8_t omiPos = 0; + if ( (*trgtIt)->tryGetAttr<ATTR_OMI_DL_GROUP_POS>(omiPos) && + (i_connPos == omiPos) ) + { + itr = trgtIt; + break; + } + } } else if ( TYPE_PROC == trgtType && TYPE_NPU == i_connType ) { @@ -991,7 +1051,12 @@ ExtensibleChipList getConnected( ExtensibleChip * i_chip, TYPE i_connType ) TargetHandleList list = getConnected( i_chip->getTrgt(), i_connType ); for ( auto & trgt : list ) { - o_list.push_back( (ExtensibleChip *)systemPtr->GetChip(trgt) ); + // Check to make sure that if we have a non-null Target, we also + // get back a non-null ExtensibleChip. + ExtensibleChip * chip = (ExtensibleChip *)systemPtr->GetChip(trgt); + PRDF_ASSERT( nullptr != chip ); + + o_list.push_back( chip ); } return o_list; @@ -1007,7 +1072,12 @@ ExtensibleChip * getConnectedParent( ExtensibleChip * i_child, TargetHandle_t trgt = getConnectedParent( i_child->getTrgt(), i_parentType ); - return (ExtensibleChip *)systemPtr->GetChip( trgt ); + // Check to make sure that if we have a non-null Target, we also + // get back a non-null ExtensibleChip. + ExtensibleChip * chip = (ExtensibleChip *)systemPtr->GetChip( trgt ); + PRDF_ASSERT( nullptr != chip ); + + return chip; } //------------------------------------------------------------------------------ @@ -1026,6 +1096,10 @@ ExtensibleChip * getConnectedChild( ExtensibleChip * i_parent, if ( nullptr != trgt ) { o_child = (ExtensibleChip *)systemPtr->GetChip( trgt ); + + // Check to make sure that if we have a non-null Target, we also + // get back a non-null ExtensibleChip. + PRDF_ASSERT( nullptr != o_child ); } return o_child; @@ -1471,7 +1545,9 @@ bool isDramWidthX4( TargetHandle_t i_trgt ) bool o_dramWidthX4 = false; PRDF_ASSERT( nullptr != i_trgt ); - //uint8_t dramWidths = 0; + uint8_t dramWidths[MAX_DIMM_PER_PORT]; + uint8_t dimmSlct = 0; + TargetHandle_t memPort = nullptr; switch ( getTargetType(i_trgt) ) { @@ -1485,12 +1561,17 @@ bool isDramWidthX4( TargetHandle_t i_trgt ) break; case TYPE_DIMM: - // TODO RTC 207273 - attribute not in TARGETING code yet - //TargetHandle_t memPort = getConnectedParent(i_trgt, TYPE_MEM_PORT); - //dramWidths = memPort->getAttr<ATTR_MEM_EFF_DRAM_WIDTH>(); - //uint8_t dimmSlct = getDimmSlct( i_trgt ); - //o_dramWidthX4 = - // (fapi2::ENUM_ATTR_MEM_EFF_DRAM_WIDTH_X4 == dramWidths[dimmSlct]); + memPort = getConnectedParent(i_trgt, TYPE_MEM_PORT); + if ( !memPort->tryGetAttr<ATTR_MEM_EFF_DRAM_WIDTH>(dramWidths) ) + { + PRDF_ERR( "isDramWidthX4: Unable to access " + "ATTR_MEM_EFF_DRAM_WIDTH i_trgt=0x%08x.", + getHuid(memPort) ); + PRDF_ASSERT( false ); + } + dimmSlct = getDimmSlct( i_trgt ); + o_dramWidthX4 = + (TARGETING::MEM_EFF_DRAM_WIDTH_X4 == dramWidths[dimmSlct]); break; default: @@ -1538,15 +1619,12 @@ void __getMasterRanks( TargetHandle_t i_trgt, std::vector<MemRank> & o_ranks, } else if ( MODEL_AXONE == l_procModel ) { - PRDF_ERR( PRDF_FUNC "Axone attribute not supported yet" ); - /* TODO RTC 207273 - no targeting support for attr yet if ( !i_trgt->tryGetAttr<ATTR_MEM_EFF_DIMM_RANKS_CONFIGED>(info[0]) ) { PRDF_ERR( PRDF_FUNC "tryGetAttr<ATTR_MEM_EFF_DIMM_RANKS_CONFIGED> " "failed: i_trgt=0x%08x", getHuid(i_trgt) ); PRDF_ASSERT( false ); // attribute does not exist for target } - */ } else { @@ -1605,17 +1683,21 @@ void getMasterRanks<TYPE_MBA>( TargetHandle_t i_trgt, } template<> -void getMasterRanks<TYPE_MEM_PORT>( TargetHandle_t i_trgt, - std::vector<MemRank> & o_ranks, - uint8_t i_ds ) -{ - __getMasterRanks<TYPE_MEM_PORT>( i_trgt, o_ranks, 0, i_ds ); +void getMasterRanks<TYPE_OCMB_CHIP>( TargetHandle_t i_trgt, + std::vector<MemRank> & o_ranks, + uint8_t i_ds ) +{ + // TODO RTC 210072 - Explorer only has one port, however, multiple ports + // will be supported in the future. Updates will need to be made here so we + // can get the relevant port. + TargetHandle_t memPort = getConnectedChild( i_trgt, TYPE_MEM_PORT, 0 ); + __getMasterRanks<TYPE_MEM_PORT>( memPort, o_ranks, 0, i_ds ); } //------------------------------------------------------------------------------ template<TARGETING::TYPE T> -void __getSlaveRanks( TargetHandle_t i_trgt, std::vector<MemRank> & o_ranks, +void getSlaveRanks( TargetHandle_t i_trgt, std::vector<MemRank> & o_ranks, uint8_t i_ds ) { PRDF_ASSERT( nullptr != i_trgt ); @@ -1656,29 +1738,18 @@ void __getSlaveRanks( TargetHandle_t i_trgt, std::vector<MemRank> & o_ranks, } } -template<> +template void getSlaveRanks<TYPE_MCA>( TargetHandle_t i_trgt, std::vector<MemRank> & o_ranks, - uint8_t i_ds ) -{ - __getSlaveRanks<TYPE_MCA>( i_trgt, o_ranks, i_ds ); -} - -template<> + uint8_t i_ds ); +template void getSlaveRanks<TYPE_MBA>( TargetHandle_t i_trgt, std::vector<MemRank> & o_ranks, - uint8_t i_ds ) -{ - __getSlaveRanks<TYPE_MBA>( i_trgt, o_ranks, i_ds ); -} - -template<> -void getSlaveRanks<TYPE_MEM_PORT>( TargetHandle_t i_trgt, - std::vector<MemRank> & o_ranks, - uint8_t i_ds ) -{ - __getSlaveRanks<TYPE_MEM_PORT>( i_trgt, o_ranks, i_ds ); -} + uint8_t i_ds ); +template +void getSlaveRanks<TYPE_OCMB_CHIP>( TargetHandle_t i_trgt, + std::vector<MemRank> & o_ranks, + uint8_t i_ds ); //------------------------------------------------------------------------------ @@ -1774,12 +1845,15 @@ uint8_t getNumMasterRanksPerDimm<TYPE_MBA>( TargetHandle_t i_trgt, } template<> -uint8_t getNumMasterRanksPerDimm<TYPE_MEM_PORT>( TargetHandle_t i_trgt, - uint8_t i_ds ) -{ - return __getNumMasterRanksPerDimm<TYPE_MEM_PORT>( i_trgt, 0, i_ds ); +uint8_t getNumMasterRanksPerDimm<TYPE_OCMB_CHIP>( TargetHandle_t i_trgt, + uint8_t i_ds ) +{ + // TODO RTC 210072 - Explorer only has one port, however, multiple ports + // will be supported in the future. Updates will need to be made here so we + // can get the relevant port. + TargetHandle_t memPort = getConnectedChild( i_trgt, TYPE_MEM_PORT, 0 ); + return __getNumMasterRanksPerDimm<TYPE_MEM_PORT>( memPort, 0, i_ds ); } - //------------------------------------------------------------------------------ template<TARGETING::TYPE T> @@ -1822,10 +1896,10 @@ uint8_t __getNumRanksPerDimm( TargetHandle_t i_trgt, } else if ( MODEL_AXONE == l_procModel ) { - ATTR_MEM_EFF_NUM_RANKS_PER_DIMM_type attr; - if ( !i_trgt->tryGetAttr<ATTR_MEM_EFF_NUM_RANKS_PER_DIMM>(attr) ) + ATTR_MEM_EFF_LOGICAL_RANKS_PER_DIMM_type attr; + if ( !i_trgt->tryGetAttr<ATTR_MEM_EFF_LOGICAL_RANKS_PER_DIMM>(attr) ) { - PRDF_ERR( PRDF_FUNC "tryGetAttr<ATTR_MEM_EFF_NUM_RANKS_PER_DIMM> " + PRDF_ERR( PRDF_FUNC "tryGetAttr<ATTR_MEM_EFF_LOGICAL_RANKS_PER_DIMM> " "failed: i_trgt=0x%08x", getHuid(i_trgt) ); PRDF_ASSERT( false ); // attribute does not exist for target } @@ -1869,9 +1943,13 @@ uint8_t getNumRanksPerDimm<TYPE_MBA>( TargetHandle_t i_trgt, uint8_t i_ds ) } template<> -uint8_t getNumRanksPerDimm<TYPE_MEM_PORT>( TargetHandle_t i_trgt, uint8_t i_ds ) +uint8_t getNumRanksPerDimm<TYPE_OCMB_CHIP>(TargetHandle_t i_trgt, uint8_t i_ds) { - return __getNumRanksPerDimm<TYPE_MEM_PORT>( i_trgt, 0, i_ds ); + // TODO RTC 210072 - Explorer only has one port, however, multiple ports + // will be supported in the future. Updates will need to be made here so we + // can get the relevant port. + TargetHandle_t memPort = getConnectedChild( i_trgt, TYPE_MEM_PORT, 0 ); + return __getNumRanksPerDimm<TYPE_MEM_PORT>( memPort, 0, i_ds ); } //############################################################################## diff --git a/src/usr/diag/prdf/common/plat/prdfTargetServices.H b/src/usr/diag/prdf/common/plat/prdfTargetServices.H index 8793e8c61..34af865d7 100755 --- a/src/usr/diag/prdf/common/plat/prdfTargetServices.H +++ b/src/usr/diag/prdf/common/plat/prdfTargetServices.H @@ -42,6 +42,10 @@ #include <targeting/common/target.H> #include <prdfParserEnums.H> +#ifdef __HOSTBOOT_MODULE + #include <chipids.H> +#endif + //------------------------------------------------------------------------------ namespace PRDF @@ -145,6 +149,20 @@ TARGETING::CLASS getTargetClass( TARGETING::TargetHandle_t i_target ); */ TARGETING::MODEL getChipModel( TARGETING::TargetHandle_t i_trgt ); +#ifdef __HOSTBOOT_MODULE + +// NOTE: This should be used instead of getChipModel() because of the case of +// MODEL_OCMB, where we need the chip ID to distinguish between Explorer +// and Gemini. + +/** + * @param i_trgt A chip target or any unit target within the chip. + * @return The chip ID. + */ +uint32_t getChipId( TARGETING::TargetHandle_t i_trgt ); + +#endif + /** * @param i_trgt A chip target or any unit target within the chip. * @return The level (EC level) of a chip. @@ -293,7 +311,7 @@ TARGETING::TargetHandle_t getConnectedPeerTarget( TARGETING::TargetHandle_t i_tgt); /** - * @param i_trgt The target MBA, MCA, or MEM_PORT. + * @param i_trgt The target MBA, MCA, OCMB_CHIP, or MEM_PORT. * @param i_rank The target rank. * @return A list of DIMMs connected to the target and rank. */ @@ -301,10 +319,10 @@ TARGETING::TargetHandleList getConnectedDimms( TARGETING::TargetHandle_t i_trgt, const MemRank & i_rank ); /** - * @param i_trgt The target MBA, MCA, or MEM_PORT. + * @param i_trgt The target MBA, MCA, OCMB_CHIP, or MEM_PORT. * @param i_rank The target rank. - * @param i_port Port select, only needed for MBA. MCA and MEM_PORT are - * targets equivalent to the port already. + * @param i_port Port select, only needed for MBA and OCMB_CHIP. MCA and + * MEM_PORT are targets equivalent to the port already. * @return The DIMM connected to the target and rank on a port. */ TARGETING::TargetHandle_t getConnectedDimm( TARGETING::TargetHandle_t i_trgt, @@ -434,7 +452,7 @@ uint8_t getColNumConfig( TARGETING::TargetHandle_t i_trgt ); /** * @brief Returns a sorted list of configured master ranks for an MCA or MBA. - * @param i_trgt MCA, MBA, or MEM_PORT target. + * @param i_trgt MCA, MBA, or OCMB_CHIP target. * @param o_ranks The returned list. * @param i_ds When used, this function will only return the list of ranks * for the target DIMM select. Otherwise, the default is to @@ -450,7 +468,7 @@ void getMasterRanks( TARGETING::TargetHandle_t i_trgt, /** * @brief Returns a sorted list of configured slave ranks for an MCA or MBA. - * @param i_trgt MCA, MBA, or MEM_PORT target. + * @param i_trgt MCA, MBA, or OCMB_CHIP target. * @param o_ranks The returned list. * @param i_ds When used, this function will only return the list of ranks * for the target DIMM select. Otherwise, the default is to @@ -466,7 +484,7 @@ void getSlaveRanks( TARGETING::TargetHandle_t i_trgt, /** * @brief Obtains the number of master ranks per DIMM select. - * @param i_trgt MCA, MBA, or MEM_PORT target. + * @param i_trgt MCA, MBA, or OCMB_CHIP target. * @param i_ds DIMM select. * @return Total number of master ranks configured per DIMM select. */ @@ -477,7 +495,7 @@ uint8_t getNumMasterRanksPerDimm( TARGETING::TargetHandle_t i_trgt, /** * @brief Obtains the total number of ranks (including slave ranks) per DIMM * select. - * @param i_trgt MCA, MBA, or MEM_PORT target. + * @param i_trgt MCA, MBA, or OCMB_CHIP target. * @param i_ds DIMM select. * @return Total number of ranks configured per DIMM select. */ |