diff options
author | Zane Shelley <zshelle@us.ibm.com> | 2018-06-22 16:31:24 -0500 |
---|---|---|
committer | Zane C. Shelley <zshelle@us.ibm.com> | 2018-06-28 10:57:28 -0400 |
commit | 6fd60cf786f0539fb2c11265ad6c6a9fb120988a (patch) | |
tree | 8e532e6bd67994fe0c8d54c85a4e5291a2459b21 | |
parent | b983851d8eb04664ca608b3a15b5b0de2fbf0644 (diff) | |
download | talos-hostboot-6fd60cf786f0539fb2c11265ad6c6a9fb120988a.tar.gz talos-hostboot-6fd60cf786f0539fb2c11265ad6c6a9fb120988a.zip |
PRD: Query for active attentions when channel fail detected
The HWP used for querying channel failure does not take into account
that a FIR bit may be masked, or configured as UNIT_CS (which is a
bug). Therefore, we must check for active attentions after calling
the HWP.
Change-Id: I46bd9413d8f17198b6c466be00dfbcfc487c2229
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/61221
Reviewed-by: Benjamin J. Weisenbeck <bweisenb@us.ibm.com>
Reviewed-by: Caleb N. Palmer <cnpalmer@us.ibm.com>
Reviewed-by: Matt Derksen <mderkse1@us.ibm.com>
Reviewed-by: Brian J. Stegmiller <bjs@us.ibm.com>
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Reviewed-by: Zane C. Shelley <zshelle@us.ibm.com>
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/61532
Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com>
-rwxr-xr-x | src/usr/diag/prdf/common/plat/mem/prdfMemUtils.C | 115 | ||||
-rw-r--r-- | src/usr/diag/prdf/common/plat/p9/p9_dmi_regs.rule | 8 |
2 files changed, 98 insertions, 25 deletions
diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemUtils.C b/src/usr/diag/prdf/common/plat/mem/prdfMemUtils.C index c28eba4cf..529351f35 100755 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemUtils.C +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemUtils.C @@ -521,10 +521,92 @@ uint32_t __queryChnlFail<TYPE_MEMBUF>( ExtensibleChip * i_chip, template<> uint32_t __queryChnlFail<TYPE_DMI>( ExtensibleChip * i_chip, bool & o_chnlFail ) { - // There is a HWP on the processor side that will query the CHIFIR, IOMCFIR, - // and associated configuration registers for a valid channel failure - // attention. - return PlatServices::queryChnlFail<TYPE_DMI>( i_chip, o_chnlFail ); + #define PRDF_FUNC "[MemUtils::__queryChnlFail] " + + PRDF_ASSERT( nullptr != i_chip ); + PRDF_ASSERT( TYPE_DMI == i_chip->getType() ); + + uint32_t o_rc = SUCCESS; + + o_chnlFail = false; + + SCAN_COMM_REGISTER_CLASS * fir = nullptr; + SCAN_COMM_REGISTER_CLASS * mask = nullptr; + SCAN_COMM_REGISTER_CLASS * act0 = nullptr; + SCAN_COMM_REGISTER_CLASS * act1 = nullptr; + + do + { + // There is a HWP on the processor side that will query if this channel + // has failed. Unfortunately, it does not check for an active channel + // fail attention (i.e. not masked). That will need to be done + // afterwards. + bool tmpChnlFail = false; + o_rc = PlatServices::queryChnlFail<TYPE_DMI>( i_chip, tmpChnlFail ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "Failed to read GLOBAL_CS_FIR on 0x%08x", + i_chip->getHuid() ); + break; + } + if ( !tmpChnlFail ) break; // nothing more to do. + + // Check for an active attention on the CHIFIR. + fir = i_chip->getRegister( "CHIFIR" ); + mask = i_chip->getRegister( "CHIFIR_MASK" ); + act0 = i_chip->getRegister( "CHIFIR_ACT0" ); + act1 = i_chip->getRegister( "CHIFIR_ACT1" ); + o_rc = fir->Read() | mask->Read() | act0->Read() | act1->Read(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "Failed to read CHIFIRs on 0x%08x", + i_chip->getHuid() ); + break; + } + + if ( 0 != ( fir->GetBitFieldJustified( 0,64) & + ~mask->GetBitFieldJustified(0,64) & + act0->GetBitFieldJustified(0,64) & + act1->GetBitFieldJustified(0,64) ) ) + { + o_chnlFail = true; + break; // nothing more to do. + } + + // Check for an active attention on the IOMCFIR. + ExtensibleChip * mcChip = getConnectedParent( i_chip, TYPE_MC ); + uint32_t dmiPos = i_chip->getPos() % MAX_DMI_PER_MC; + uint32_t bitPos = 8 + dmiPos * 8; + + fir = mcChip->getRegister( "IOMCFIR" ); + mask = mcChip->getRegister( "IOMCFIR_MASK" ); + act0 = mcChip->getRegister( "IOMCFIR_ACT0" ); + act1 = mcChip->getRegister( "IOMCFIR_ACT1" ); + o_rc = fir->Read() | mask->Read() | act0->Read() | act1->Read(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "Failed to read IOMCFIRs on 0x%08x", + mcChip->getHuid() ); + break; + } + + if ( 0 != ( fir->GetBitFieldJustified( bitPos,8) & + ~mask->GetBitFieldJustified(bitPos,8) & + act0->GetBitFieldJustified(bitPos,8) & + act1->GetBitFieldJustified(bitPos,8) ) ) + { + o_chnlFail = true; + break; // nothing more to do. + } + + PRDF_INF( PRDF_FUNC "Failed channel detected on 0x%08x, but no active " + "attentions found", i_chip->getHuid() ); + + } while (0); + + return o_rc; + + #undef PRDF_FUNC } //------------------------------------------------------------------------------ @@ -666,27 +748,10 @@ void __cleanupChnlFail<TYPE_DMI,TYPE_MEMBUF>( ExtensibleChip * i_dmiChip, ExtensibleChip * mcChip = getConnectedParent( i_dmiChip, TYPE_MC ); uint32_t dmiPos = i_dmiChip->getPos() % MAX_DMI_PER_MC; - // Mask off all attentions from the DMI target in the chiplet FIRs. - reg = mcChip->getRegister( "MC_CHIPLET_FIR_MASK" ); - if ( SUCCESS == reg->Read() ) - { - reg->SetBit( 4 + (dmiPos * 2) ); // 4, 6, 8, 10 - reg->Write(); - } - - reg = mcChip->getRegister( "MC_CHIPLET_UCS_FIR_MASK" ); - if ( SUCCESS == reg->Read() ) - { - reg->SetBit( 0 + (dmiPos * 2) ); // 0, 2, 4, 6 (masks 1, 3, 5, 7) - reg->Write(); - } - - reg = mcChip->getRegister( "MC_CHIPLET_HA_FIR_MASK" ); - if ( SUCCESS == reg->Read() ) - { - reg->SetBit( 0 + (dmiPos * 2) ); // 0, 2, 4, 6 (masks 1, 3, 5, 7) - reg->Write(); - } + // Mask off all attentions from the DMI target in the CHIFIR. + reg = i_dmiChip->getRegister( "CHIFIR_MASK_OR" ); + reg->setAllBits(); + reg->Write(); // Mask off all attentions from the DMI target in the IOMCFIR. reg = mcChip->getRegister( "IOMCFIR_MASK_OR" ); diff --git a/src/usr/diag/prdf/common/plat/p9/p9_dmi_regs.rule b/src/usr/diag/prdf/common/plat/p9/p9_dmi_regs.rule index d3ad0f0c2..d681846d8 100644 --- a/src/usr/diag/prdf/common/plat/p9/p9_dmi_regs.rule +++ b/src/usr/diag/prdf/common/plat/p9/p9_dmi_regs.rule @@ -35,6 +35,14 @@ access write_only; }; + register CHIFIR_MASK_OR + { + name "P9 DMI target CHIFIR_MASK atomic OR"; + scomaddr 0x07010905; + capture group never; + access write_only; + }; + register MCICFG0 { name "MCI Configuration Register 0"; |