diff options
7 files changed, 123 insertions, 58 deletions
diff --git a/src/usr/diag/prdf/common/plat/pegasus/Mba.rule b/src/usr/diag/prdf/common/plat/pegasus/Mba.rule index d038cf24a..7447f7dd3 100755 --- a/src/usr/diag/prdf/common/plat/pegasus/Mba.rule +++ b/src/usr/diag/prdf/common/plat/pegasus/Mba.rule @@ -438,7 +438,7 @@ rule MbaFir RECOVERABLE: MBAFIR & ~MBAFIR_MASK & ~MBAFIR_ACT0 & MBAFIR_ACT1; }; -group gMbaFir filter singlebit, secondarybits( 15, 16 ) +group gMbaFir filter singlebit, secondarybits(0,1,2,4,9,10,11,12,13,14,15,16) { /** MBAFIR[0] * MBAFIRQ_INVALID_MAINT_CMD @@ -560,7 +560,7 @@ rule MbaDdrPhyFir MBADDRPHYFIR & ~MBADDRPHYFIR_MASK & ~MBADDRPHYFIR_ACT0 & MBADDRPHYFIR_ACT1; }; -group gMbaDdrPhyFir filter singlebit, secondarybits( 53 ) +group gMbaDdrPhyFir filter singlebit, secondarybits(50,53,54,55,58) { /** MBADDRPHYFIR[48] * DDRPHY_FIR_REG_DDR0_FSM_CKSTP @@ -638,7 +638,7 @@ rule MbaCalFir # bits 4 and 7 given priority to check for RCD parity error before potential # side effects in bits 2 and 17 group gMbaCalFir filter priority( 4, 7 ), - secondarybits( 10, 14, 19, 20, 21, 24, 25 ) + secondarybits(3,10,12,14,16,19,20,21,22,23,24,25) { /** MBACALFIR[0] * MBACALFIRQ_MBA_RECOVERABLE_ERROR diff --git a/src/usr/diag/prdf/common/plat/pegasus/Mcs.rule b/src/usr/diag/prdf/common/plat/pegasus/Mcs.rule index e70bb0349..f56bb9c60 100755 --- a/src/usr/diag/prdf/common/plat/pegasus/Mcs.rule +++ b/src/usr/diag/prdf/common/plat/pegasus/Mcs.rule @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2012,2015 +# Contributors Listed Below - COPYRIGHT 2012,2016 # [+] International Business Machines Corp. # # @@ -200,7 +200,8 @@ rule MciFir group gMciFir attntype CHECK_STOP, RECOVERABLE, SPECIAL, UNIT_CS filter priority( 8, 9, 22, 23, 6, 0, 40, # Channel failure 20, 5, 10 ), # Recoverable - secondarybits( 32 ) + secondarybits(1,2,3,4,5,7,10,11,12,13,14,15,16,17,18,19,20,21,24, + 31,32,34,35,37,41,42,43,44,45,47,50,51,52,53) { /** MCIFIR[0] * MCIFIRQ_REPLAY_TIMEOUT diff --git a/src/usr/diag/prdf/common/plat/pegasus/Membuf_acts_MEM.rule b/src/usr/diag/prdf/common/plat/pegasus/Membuf_acts_MEM.rule index 5f836129f..f069dc465 100755 --- a/src/usr/diag/prdf/common/plat/pegasus/Membuf_acts_MEM.rule +++ b/src/usr/diag/prdf/common/plat/pegasus/Membuf_acts_MEM.rule @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2012,2015 +# Contributors Listed Below - COPYRIGHT 2012,2016 # [+] International Business Machines Corp. # # @@ -120,7 +120,10 @@ rule MemLFir RECOVERABLE: MEM_LFIR & ~MEM_LFIR_MASK & ~MEM_LFIR_ACT0 & MEM_LFIR_ACT1; }; -group gMemLFir filter singlebit, secondarybits( 0 ) +group gMemLFir filter singlebit, + secondarybits(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17, + 18,19,20,21,22,23,24,25,26,27,28,29,30,31,32, + 33,34,35,36,37,38,39,40) { /** MEM_LFIR[0] * CFIR internal parity error diff --git a/src/usr/diag/prdf/common/plat/pegasus/Membuf_acts_NEST.rule b/src/usr/diag/prdf/common/plat/pegasus/Membuf_acts_NEST.rule index f3b5b6077..daf355e2a 100755 --- a/src/usr/diag/prdf/common/plat/pegasus/Membuf_acts_NEST.rule +++ b/src/usr/diag/prdf/common/plat/pegasus/Membuf_acts_NEST.rule @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2012,2015 +# Contributors Listed Below - COPYRIGHT 2012,2016 # [+] International Business Machines Corp. # # @@ -101,7 +101,10 @@ rule NestLFir RECOVERABLE: NEST_LFIR & ~NEST_LFIR_MASK & ~NEST_LFIR_ACT0 & NEST_LFIR_ACT1; }; -group gNestLFir filter singlebit, secondarybits( 0 ) +group gNestLFir filter singlebit, + secondarybits(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16, + 17,18,19,20,21,22,23,24,25,26,27,28,29,30, + 31,32,33,34,35,36,37,38,39,40) { /** NEST_LFIR[0] * CFIR internal parity error @@ -173,7 +176,9 @@ rule DmiFir group gDmiFir filter priority( 10, # Channel failure 2, 11, 12, 9 ), # Recoverable - secondarybits( 9, 48, 49 ) + secondarybits(0,1,3,4,5,6,7,8,9,11,12,13,14,15,16,17,18,19, + 20,21,22,23,24,25,26,27,28,29,30,31,32,33,34, + 35,36,37,38,39,40,41,42,43,44,45,46,47,48,49) { /** DMIFIR[0] * FIR_RX_INVALID_STATE_OR_PARITY_ERROR @@ -267,7 +272,10 @@ rule ScacFir RECOVERABLE: SCACFIR & ~SCACFIR_MASK & ~SCACFIR_ACT0 & SCACFIR_ACT1; }; -group gScacFir filter singlebit, secondarybits( 35, 36 ) +group gScacFir filter singlebit, + secondarybits(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17, + 18,19,20,21,22,23,24,27,28,29,30,31,32,33, + 34,35,36) { /** SCACFIR[0] * SCAC_LFIR_I2CMINVADDR @@ -429,7 +437,8 @@ rule MbiFir group gMbiFir filter priority( 8, 9, 19, 20, 6, 0, # Channel failure 16, 5, 10 ), # Recoverable - secondarybits( 25, 26 ) + secondarybits(1,2,3,4,5,7,10,11,12,13,14,15,16,17,18,21,22, + 23,24,25,26) { /** MBIFIR[0] * MBIFIRQ_REPLAY_TIMEOUT @@ -569,7 +578,9 @@ rule MbsFir RECOVERABLE: MBSFIR & ~MBSFIR_MASK & ~MBSFIR_ACT0 & MBSFIR_ACT1; }; -group gMbsFir filter singlebit, secondarybits( 3, 9, 12, 26, 29, 33, 34 ) +group gMbsFir filter singlebit, + secondarybits(3,5,7,9,11,12,14,15,17,19,21,22,23,24,25,26, + 28,29,31,32,33,34) { /** MBSFIR[0] * MBS_FIR_REG_HOST_PROTOCOL_ERROR @@ -599,7 +610,10 @@ group gMbsFir filter singlebit, secondarybits( 3, 9, 12, 26, 29, 33, 34 ) /** MBSFIR[3,4] * MBS_FIR_REG_EXTERNAL_TIMEOUT */ - (MbsFir, bit(3,4)) ? callout2ndLvlMedThr1; + # NOTE: The signature will match the external timeout, but we will still + # call the internalTimeout plugin because there is extra processing + # done to handle parity errors and such. + (MbsFir, bit(3,4)) ? internalTimeout; /** MBSFIR[5] * MBS_FIR_REG_INT_BUFFER_CE @@ -782,9 +796,10 @@ rule Mba1_MbsEccFir }; group gMbsEccFir filter priority ( 19, 41 ), - secondarybits( 0, 1, 2, 3, 4, 5, 6, 7, - 20, 21, 22, 23, 24, 25, 26, 27, - 16, 17, 43, 48, 50, 51 ) + secondarybits(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16, + 17,18,20,21,22,23,24,25,26,27,28,29,30,31, + 32,33,34,35,36,37,38,39,40,41,42,43,44,45, + 48,50,51) { /** MBA0_MBSECCFIR[0] * Memory chip mark on rank 0 @@ -1122,7 +1137,8 @@ rule Mba1_McbistFir ~MBA1_MCBISTFIR_ACT0 & MBA1_MCBISTFIR_ACT1; }; -group gMcbistFir filter singlebit, secondarybits( 15, 16 ) +group gMcbistFir filter singlebit, + secondarybits(2,3,4,5,6,7,8,9,10,11,12,13,14,15,16) { /** MBA0_MCBISTFIR[0] * MBSFIRQ_SCOM_PAR_ERRORS @@ -1273,7 +1289,8 @@ actionclass replayTimeOutError /** Handles MBACAL parity err if present, else handles MBS Internal Timeout */ actionclass internalTimeout { - try( funccall("handleMbaCalParityErr"), SelfMedThr1 ); + threshold1; + funccall("internalTimeout"); # must be called last so rc is passed on }; /** Handles MBACAL parity err if present, else handles MBA0 MBSECC Memory UE */ diff --git a/src/usr/diag/prdf/common/plat/pegasus/Membuf_acts_TP.rule b/src/usr/diag/prdf/common/plat/pegasus/Membuf_acts_TP.rule index cec1fddd6..1b2a18f85 100755 --- a/src/usr/diag/prdf/common/plat/pegasus/Membuf_acts_TP.rule +++ b/src/usr/diag/prdf/common/plat/pegasus/Membuf_acts_TP.rule @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2012,2015 +# Contributors Listed Below - COPYRIGHT 2012,2016 # [+] International Business Machines Corp. # # @@ -57,7 +57,10 @@ rule TpLFir RECOVERABLE: TP_LFIR & ~TP_LFIR_MASK & ~TP_LFIR_ACT0 & TP_LFIR_ACT1; }; -group gTpLFir filter singlebit, secondarybits( 0, 13, 14 ) +group gTpLFir filter singlebit, + secondarybits(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17, + 18,21,22,23,24,25,26,27,28,29,30,31,32,33,34, + 35,36,37,38,39,40) { /** TP_LFIR[0] * CFIR internal parity error diff --git a/src/usr/diag/prdf/common/plat/pegasus/Proc_acts_PB.rule b/src/usr/diag/prdf/common/plat/pegasus/Proc_acts_PB.rule index 2c5ff6b12..ab810703d 100755 --- a/src/usr/diag/prdf/common/plat/pegasus/Proc_acts_PB.rule +++ b/src/usr/diag/prdf/common/plat/pegasus/Proc_acts_PB.rule @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2012,2015 +# Contributors Listed Below - COPYRIGHT 2012,2016 # [+] International Business Machines Corp. # # @@ -3065,11 +3065,17 @@ rule IomcFir_0 RECOVERABLE: IOMCFIR_0 & ~IOMCFIR_0_MASK & ~IOMCFIR_0_ACT0 & IOMCFIR_0_ACT1; }; -group gIomcFir_0 filter priority( 10, 18, 26, 34, # Channel failure - 2, # Recoverable - 11, 19, 27, 35, # Recoverable - 12, 20, 28, 36, # Recoverable - 9, 17, 25, 33 ) # Recoverable +group gIomcFir_0 filter priority( 10, 18, 26, 34, # Channel failure + 2, # Recoverable + 11, 19, 27, 35, # Recoverable + 12, 20, 28, 36, # Recoverable + 9, 17, 25, 33 ), # Recoverable + secondarybits( 0, 1, 3, 4, 5, 6, 7, + 8, 9,11,12,13,14,15, + 16,17,19,20,21,22,23, + 24,25,27,28,29,30,31, + 32,33,35,36,37,38,39, + 40,41,43,44,45,46,47,48,49) { /** IOMCFIR_0[0] * FIR_RX_INVALID_STATE_OR_PARITY_ERROR @@ -3238,11 +3244,17 @@ rule IomcFir_1 RECOVERABLE: IOMCFIR_1 & ~IOMCFIR_1_MASK & ~IOMCFIR_1_ACT0 & IOMCFIR_1_ACT1; }; -group gIomcFir_1 filter priority( 10, 18, 26, 34, # Channel failure - 2, # Recoverable - 11, 19, 27, 35, # Recoverable - 12, 20, 28, 36, # Recoverable - 9, 17, 25, 33 ) # Recoverable +group gIomcFir_1 filter priority( 10, 18, 26, 34, # Channel failure + 2, # Recoverable + 11, 19, 27, 35, # Recoverable + 12, 20, 28, 36, # Recoverable + 9, 17, 25, 33 ), # Recoverable + secondarybits( 0, 1, 3, 4, 5, 6, 7, + 8, 9,11,12,13,14,15, + 16,17,19,20,21,22,23, + 24,25,27,28,29,30,31, + 32,33,35,36,37,38,39, + 40,41,43,44,45,46,47,48,49) { /** IOMCFIR_1[0] * FIR_RX_INVALID_STATE_OR_PARITY_ERROR diff --git a/src/usr/diag/prdf/common/plat/pegasus/prdfCenMembuf.C b/src/usr/diag/prdf/common/plat/pegasus/prdfCenMembuf.C index a7c977294..cfc7a5cfa 100755 --- a/src/usr/diag/prdf/common/plat/pegasus/prdfCenMembuf.C +++ b/src/usr/diag/prdf/common/plat/pegasus/prdfCenMembuf.C @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2012,2015 */ +/* Contributors Listed Below - COPYRIGHT 2012,2016 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -1243,41 +1243,70 @@ int32_t handleSingleMbaCalParityErr( ExtensibleChip * i_membChip, //------------------------------------------------------------------------------ /** - * @brief Handles MBACALFIR RCD Parity error bits, if they exist. - * - * @param i_membChip The Centaur chip. - * @param i_sc ServiceDataCollector. - * - * @return SUCCESS if MBACALFIR Parity error is present and properly - * handled, FAIL otherwise. + * @brief MBSFIR[4] - Internal Timeout error. + * @param i_mbChip The Centaur chip + * @param i_sc Step code data struct + * @return Non-SUCCESS if analysis fails. SUCCESS otherwise. */ -int32_t handleMbaCalParityErr( ExtensibleChip * i_membChip, - STEP_CODE_DATA_STRUCT & i_sc ) +int32_t internalTimeout( ExtensibleChip * i_mbChip, + STEP_CODE_DATA_STRUCT & i_sc ) { - #define PRDF_FUNC "[handleMbaCalParityErr] " + #define PRDF_FUNC "[internalTimeout] " - // We will return FAIL from this function if MBACALFIR parity error bits are - // not set. If MBACALFIR parity error bits are set, we will try to analyze - // the MBACALFIR. If MBACALFIR is not analyzed properly, we will return - // FAIL. This will trigger rule code to execute alternate resolution. - - int32_t l_rc; + int32_t o_rc = SUCCESS; - // We will loop through to check all MBA if necessary until one is found - // with parity error bits set - for ( uint32_t i = 0; i < MAX_MBA_PER_MEMBUF; i++) + do { - l_rc = SUCCESS; + // First, check if there are any MBACALFIR parity errors. + for ( uint32_t i = 0; i < MAX_MBA_PER_MEMBUF; i++) + { + o_rc = handleSingleMbaCalParityErr( i_mbChip, i_sc, i ); - l_rc = handleSingleMbaCalParityErr( i_membChip, i_sc, i ); + // If SUCCESS is returned, then there was a parity error and + // analysis was successful. + if ( SUCCESS == o_rc ) break; + } + if ( SUCCESS == o_rc ) break; // nothing more to do. - if ( SUCCESS == l_rc ) break; - } + // Next, check if there was an MBSFIR external timeout. + SCAN_COMM_REGISTER_CLASS * fir = i_mbChip->getRegister("MBSFIR"); + o_rc = fir->Read(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "failed to read MBSFIR on 0x%08x", + i_mbChip->GetId() ); + break; + } + + if ( fir->IsBitSet(3) ) + { + if ( CHECK_STOP == i_sc.service_data->getPrimaryAttnType() ) + { + // In this case, we do not want the internal timeout to be + // blamed as the root cause of the checkstop. So move onto the + // next FIR bit. + o_rc = PRD_SCAN_COMM_REGISTER_ZERO; + } + else + { + // Make the callout of the external timeout error. + i_sc.service_data->SetCallout( NextLevelSupport_ENUM, + MRU_MED, NO_GARD ); + } + } + else + { + // The internal timeout error is on by itself. + i_sc.service_data->SetCallout( i_mbChip->GetChipHandle(), MRU_MED ); + } + + } while (0); + + return o_rc; - return l_rc; #undef PRDF_FUNC -} PRDF_PLUGIN_DEFINE( Membuf, handleMbaCalParityErr ); +} PRDF_PLUGIN_DEFINE( Membuf, internalTimeout ); //------------------------------------------------------------------------------ |