From 06d556e9b03c86d451b3a383f12834cb88302ae1 Mon Sep 17 00:00:00 2001 From: Zane Shelley Date: Mon, 23 Apr 2018 22:05:54 -0500 Subject: PRD: L4 line delete Change-Id: I41ddf8520c3124d2e9dabcbcae766d2e74239e56 RTC: 187477 Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/57707 Tested-by: Jenkins Server Reviewed-by: Benjamin J. Weisenbeck Reviewed-by: Caleb N. Palmer Reviewed-by: Matt Derksen Reviewed-by: Zane C. Shelley Squashed: I97a883994a99823c605c6a4af15a5d8bf7b575c6 Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/57883 CI-Ready: Zane C. Shelley Tested-by: Jenkins OP Build CI Tested-by: Jenkins OP HW Tested-by: FSP CI Jenkins --- src/usr/diag/prdf/common/plat/cen/cen_centaur.rule | 8 +- .../prdf/common/plat/cen/cen_centaur_actions.rule | 68 +++++ .../prdf/common/plat/cen/cen_centaur_regs.rule | 18 ++ .../prdf/common/plat/pegasus/Membuf_acts_NEST.rule | 8 +- .../prdf/common/plat/pegasus/Membuf_regs_NEST.rule | 19 -- .../diag/prdf/common/plat/pegasus/prdfCenMembuf.C | 314 +-------------------- 6 files changed, 95 insertions(+), 340 deletions(-) create mode 100644 src/usr/diag/prdf/common/plat/cen/cen_centaur_actions.rule (limited to 'src/usr/diag/prdf/common') diff --git a/src/usr/diag/prdf/common/plat/cen/cen_centaur.rule b/src/usr/diag/prdf/common/plat/cen/cen_centaur.rule index 37298d943..f75ff5791 100644 --- a/src/usr/diag/prdf/common/plat/cen/cen_centaur.rule +++ b/src/usr/diag/prdf/common/plat/cen/cen_centaur.rule @@ -1234,12 +1234,12 @@ group gMBSFIR filter singlebit, cs_root_cause( 0, 1, 2, 4, 6, 8, 10, 13, 16, 18, /** MBSFIR[9] * CACHE_SRW_CE */ - (rMBSFIR, bit(9)) ? TBDDefaultCallout; + (rMBSFIR, bit(9)) ? l4_cache_srw_ce; /** MBSFIR[10] * CACHE_SRW_UE */ - (rMBSFIR, bit(10)) ? TBDDefaultCallout; + (rMBSFIR, bit(10)) ? l4_cache_srw_ue_UERE; /** MBSFIR[11] * CACHE_SRW_SUE @@ -1249,12 +1249,12 @@ group gMBSFIR filter singlebit, cs_root_cause( 0, 1, 2, 4, 6, 8, 10, 13, 16, 18, /** MBSFIR[12] * CACHE_CO_CE */ - (rMBSFIR, bit(12)) ? TBDDefaultCallout; + (rMBSFIR, bit(12)) ? l4_cache_co_ce; /** MBSFIR[13] * CACHE_CO_UE */ - (rMBSFIR, bit(13)) ? TBDDefaultCallout; + (rMBSFIR, bit(13)) ? l4_cache_co_ue_UERE; /** MBSFIR[14] * CACHE_CO_SUE diff --git a/src/usr/diag/prdf/common/plat/cen/cen_centaur_actions.rule b/src/usr/diag/prdf/common/plat/cen/cen_centaur_actions.rule new file mode 100644 index 000000000..9ee085a9b --- /dev/null +++ b/src/usr/diag/prdf/common/plat/cen/cen_centaur_actions.rule @@ -0,0 +1,68 @@ +# IBM_PROLOG_BEGIN_TAG +# This is an automatically generated prolog. +# +# $Source: src/usr/diag/prdf/common/plat/cen/cen_centaur_actions.rule $ +# +# OpenPOWER HostBoot Project +# +# Contributors Listed Below - COPYRIGHT 2018 +# [+] International Business Machines Corp. +# +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# IBM_PROLOG_END_TAG + +/** Callout the connected L4 */ +actionclass calloutL4 { callout(connected(TYPE_L4), MRU_MED); }; + +/** L4 cache SRW CE */ +actionclass l4_cache_srw_ce +{ + calloutL4; + threshold( field(32 / day), mfg_file(ATTR_MNFG_TH_CEN_L4_CACHE_CES)); + funccall("CaptureL4CacheErr"); + funccall("ClearServiceCallFlag"); + funccall("ClearMbsSecondaryBits"); +}; + +/** L4 cache SRW UE */ +actionclass l4_cache_srw_ue_UERE +{ + calloutL4; + threshold1; + funccall("CaptureL4CacheErr"); + funccall("MaskMbsSecondaryBits"); + SueSource; +}; + +/** L4 cache CO UE */ +actionclass l4_cache_co_ce +{ + calloutL4; + threshold( field(32 / day), mfg_file(ATTR_MNFG_TH_CEN_L4_CACHE_CES)); + funccall("CaptureL4CacheErr"); + funccall("ClearServiceCallFlag"); + funccall("ClearMbaCalSecondaryBits"); +}; + +/** L4 cache CO UE */ +actionclass l4_cache_co_ue_UERE +{ + calloutL4; + threshold1; + funccall("CaptureL4CacheErr"); + funccall("MaskMbaCalSecondaryBits"); + SueSource; +}; + diff --git a/src/usr/diag/prdf/common/plat/cen/cen_centaur_regs.rule b/src/usr/diag/prdf/common/plat/cen/cen_centaur_regs.rule index 0fa0f5557..2689a439b 100644 --- a/src/usr/diag/prdf/common/plat/cen/cen_centaur_regs.rule +++ b/src/usr/diag/prdf/common/plat/cen/cen_centaur_regs.rule @@ -39,6 +39,14 @@ # Centaur chip MBSFIR ############################################################################ + register MBSFIR_AND + { + name "Centaur chip MBSFIR atomic AND"; + scomaddr 0x02011401; + capture group never; + access write_only; + }; + register MBSFIR_MASK_OR { name "Centaur chip MBSFIR MASK atomic OR"; @@ -123,4 +131,14 @@ capture group MaintCmdRegs_mba1; }; + ############################################################################ + # L4 cache address trap + ############################################################################ + + register MBCELOG + { + name "MBU.MBS.MBCELOGQ"; + scomaddr 0x02011416; + capture group L4CacheErr; + }; diff --git a/src/usr/diag/prdf/common/plat/pegasus/Membuf_acts_NEST.rule b/src/usr/diag/prdf/common/plat/pegasus/Membuf_acts_NEST.rule index e9a24513f..33c9fb1d3 100755 --- a/src/usr/diag/prdf/common/plat/pegasus/Membuf_acts_NEST.rule +++ b/src/usr/diag/prdf/common/plat/pegasus/Membuf_acts_NEST.rule @@ -638,12 +638,12 @@ group gMbsFir filter singlebit, /** MBSFIR[9] * MBS_FIR_REG_CACHE_SRW_CE */ - (MbsFir, bit(9)) ? clearSecMbsBitsAndLineDelete; + (MbsFir, bit(9)) ? l4_cache_srw_ce; /** MBSFIR[10] * MBS_FIR_REG_CACHE_SRW_UE */ - (MbsFir, bit(10)) ? maskSecMbsBitsAndConnL4UE; + (MbsFir, bit(10)) ? l4_cache_srw_ue_UERE; /** MBSFIR[11] * MBS_FIR_REG_CACHE_SRW_SUE @@ -653,12 +653,12 @@ group gMbsFir filter singlebit, /** MBSFIR[12] * MBS_FIR_REG_CACHE_CO_CE */ - (MbsFir, bit(12)) ? clearSecMbaCalBitsAndLineDelete; + (MbsFir, bit(12)) ? l4_cache_co_ce; /** MBSFIR[13] * MBS_FIR_REG_CACHE_CO_UE */ - (MbsFir, bit(13)) ? maskSecMbaCalBitsAndConnL4UE; + (MbsFir, bit(13)) ? l4_cache_co_ue_UERE; /** MBSFIR[14] * MBS_FIR_REG_CACHE_CO_SUE diff --git a/src/usr/diag/prdf/common/plat/pegasus/Membuf_regs_NEST.rule b/src/usr/diag/prdf/common/plat/pegasus/Membuf_regs_NEST.rule index 56db89619..090168456 100755 --- a/src/usr/diag/prdf/common/plat/pegasus/Membuf_regs_NEST.rule +++ b/src/usr/diag/prdf/common/plat/pegasus/Membuf_regs_NEST.rule @@ -199,14 +199,6 @@ capture group FirRegs; }; - register MBSFIR_AND - { - name "MBU.MBS.MBS_FIR_REG AND"; - scomaddr 0x02011401; - capture group never; - access write_only; - }; - register MBSFIR_MASK { name "MBU.MBS.MBS_FIR_MASK_REG"; @@ -937,14 +929,3 @@ capture group never; }; - ############################################################################ - # L4 cache trapped address. - # This is FFDC only register. - ############################################################################ - - register MBCELOG - { - name "MBU.MBS.MBCELOGQ"; - scomaddr 0x02011416; - capture group L4CacheErr; - }; diff --git a/src/usr/diag/prdf/common/plat/pegasus/prdfCenMembuf.C b/src/usr/diag/prdf/common/plat/pegasus/prdfCenMembuf.C index a47acd8be..97e63249e 100755 --- a/src/usr/diag/prdf/common/plat/pegasus/prdfCenMembuf.C +++ b/src/usr/diag/prdf/common/plat/pegasus/prdfCenMembuf.C @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2012,2017 */ +/* Contributors Listed Below - COPYRIGHT 2012,2018 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -731,239 +731,6 @@ PLUGIN_FETCH_UE_ERROR( 1 ) //------------------------------------------------------------------------------ -/** - * @fn ClearMbsSecondaryBits - * @brief Clears MBS secondary Fir bits which may come up because of primary - * MBS/MBI FIR bits. - * @param i_chip The Centaur chip. - * @param i_sc ServiceDataColector. - * @return SUCCESS. - */ -int32_t ClearMbsSecondaryBits( ExtensibleChip * i_chip, - STEP_CODE_DATA_STRUCT & i_sc ) -{ - #define PRDF_FUNC "[ClearMbsSecondaryBits] " - - int32_t l_rc = SUCCESS; - do - { - SCAN_COMM_REGISTER_CLASS * mbsFir = i_chip->getRegister("MBSFIR"); - SCAN_COMM_REGISTER_CLASS * mbsFirMask = - i_chip->getRegister("MBSFIR_MASK"); - SCAN_COMM_REGISTER_CLASS * mbsFirAnd = - i_chip->getRegister("MBSFIR_AND"); - l_rc = mbsFir->Read(); - l_rc |= mbsFirMask->Read(); - if ( SUCCESS != l_rc ) - { - PRDF_ERR( PRDF_FUNC "MBSFIR/MBSFIR_MASK read failed" - "for 0x%08x", i_chip->GetId()); - break; - } - - mbsFirAnd->setAllBits(); - - if ( mbsFir->IsBitSet(26) - && mbsFir->IsBitSet(9) && ( ! mbsFirMask->IsBitSet(9))) - { - mbsFirAnd->ClearBit(26); - } - - if( mbsFir->IsBitSet(3) || mbsFir->IsBitSet(4) ) - { - SCAN_COMM_REGISTER_CLASS * mbiFir = i_chip->getRegister("MBIFIR"); - SCAN_COMM_REGISTER_CLASS * mbiFirMask = - i_chip->getRegister("MBIFIR_MASK"); - l_rc = mbiFir->Read(); - l_rc |= mbiFirMask->Read(); - if ( SUCCESS != l_rc ) - { - // Do not break from here, just print error trace. - // If there are other secondary bits ( e.g. 26, 27 ), - // we want to clear them. - PRDF_ERR( PRDF_FUNC "MBIFIR/MASK read failed" - "for 0x%08x", i_chip->GetId()); - } - else if ( mbiFir->IsBitSet( 0 ) && ( ! mbiFirMask->IsBitSet( 0 )) ) - { - mbsFirAnd->ClearBit(3); - mbsFirAnd->ClearBit(4); - } - } - - l_rc = mbsFirAnd->Write(); - if ( SUCCESS != l_rc ) - { - PRDF_ERR( PRDF_FUNC "MBSFIR_AND write failed" - "for 0x%08x", i_chip->GetId()); - break; - } - - }while( 0 ); - return SUCCESS; - - #undef PRDF_FUNC -} PRDF_PLUGIN_DEFINE( Membuf, ClearMbsSecondaryBits ); - -//------------------------------------------------------------------------------ - -/** - * @fn ClearMbaCalSecondaryBits - * @brief Clears MBACAL secondary Fir bits which may come up because of MBSFIR - * @param i_chip The Centaur chip. - * @param i_sc ServiceDataColector. - * @return SUCCESS. - - */ -int32_t ClearMbaCalSecondaryBits( ExtensibleChip * i_chip, - STEP_CODE_DATA_STRUCT & i_sc ) -{ - #define PRDF_FUNC "[ClearMbaCalSecondaryBits ] " - int32_t l_rc = SUCCESS; - - do - { - SCAN_COMM_REGISTER_CLASS * mbsFir = i_chip->getRegister("MBSFIR"); - SCAN_COMM_REGISTER_CLASS * mbsFirMask = - i_chip->getRegister("MBSFIR_MASK"); - l_rc = mbsFir->Read(); - l_rc |= mbsFirMask->Read(); - if ( SUCCESS != l_rc ) - { - PRDF_ERR( PRDF_FUNC "MBSFIR/MBSFIR_MASK read failed" - "for 0x%08x", i_chip->GetId()); - break; - } - - CenMembufDataBundle * membdb = getMembufDataBundle( i_chip ); - - for( uint32_t i = 0; i < MAX_MBA_PER_MEMBUF; i++ ) - { - ExtensibleChip * mbaChip = membdb->getMbaChip(i); - if ( NULL == mbaChip ) continue; - - SCAN_COMM_REGISTER_CLASS * mbaCalFir = - mbaChip->getRegister("MBACALFIR"); - - if( SUCCESS != mbaCalFir->Read() ) - { - // Do not break. Just print error trace and look for - // other MBA. - PRDF_ERR( PRDF_FUNC "MBACALFIR read failed" - "for 0x%08x", mbaChip->GetId()); - continue; - } - - if( !( mbaCalFir->IsBitSet( 10 ) || mbaCalFir->IsBitSet( 14 ) )) - continue; - - SCAN_COMM_REGISTER_CLASS * mbaCalAndFir = - mbaChip->getRegister("MBACALFIR_AND"); - - mbaCalAndFir->setAllBits(); - - mbaCalAndFir->ClearBit(10); - mbaCalAndFir->ClearBit(14); - - l_rc = mbaCalAndFir->Write(); - if ( SUCCESS != l_rc ) - { - // Do not break. Just print error trace and look for - // other MBA. - PRDF_ERR( PRDF_FUNC "MBACALFIR_AND write failed" - "for 0x%08x", mbaChip->GetId()); - } - } - - }while( 0 ); - - return SUCCESS; - #undef PRDF_FUNC - -} PRDF_PLUGIN_DEFINE( Membuf, ClearMbaCalSecondaryBits ); - -//------------------------------------------------------------------------------ - -/** - * @fn MaskMbsSecondaryBits - * @brief Mask MBS secondary Fir bits which may come up because of L4 UE. - * @param i_chip The Centaur chip. - * @param i_sc ServiceDataColector. - * @return SUCCESS. - */ -int32_t MaskMbsSecondaryBits( ExtensibleChip * i_chip, - STEP_CODE_DATA_STRUCT & i_sc ) -{ - #define PRDF_FUNC "[MaskMbsSecondaryBits] " - - int32_t l_rc = SUCCESS; - do - { - SCAN_COMM_REGISTER_CLASS * mbsFirMaskOr = - i_chip->getRegister("MBSFIR_MASK_OR"); - mbsFirMaskOr->SetBit(27); - l_rc = mbsFirMaskOr->Write(); - if ( SUCCESS != l_rc ) - { - PRDF_ERR( PRDF_FUNC "MBSFIR_MASK_OR write failed" - "for 0x%08x", i_chip->GetId()); - break; - } - - }while( 0 ); - - return SUCCESS; - #undef PRDF_FUNC - -} PRDF_PLUGIN_DEFINE( Membuf, MaskMbsSecondaryBits ); - -//------------------------------------------------------------------------------ - -/** - * @fn MaskMbaCalSecondaryBits - * @brief Mask MBACAL secondary Fir bits which may come up because of L4 UE. - * @param i_chip The Centaur chip. - * @param i_sc ServiceDataColector. - * @return SUCCESS. - */ -int32_t MaskMbaCalSecondaryBits( ExtensibleChip * i_chip, - STEP_CODE_DATA_STRUCT & i_sc ) -{ - #define PRDF_FUNC "[MaskMbaCalSecondaryBits ] " - int32_t l_rc = SUCCESS; - - do - { - CenMembufDataBundle * membdb = getMembufDataBundle( i_chip ); - - for( uint32_t i = 0; i < MAX_MBA_PER_MEMBUF; i++ ) - { - ExtensibleChip * mbaChip = membdb->getMbaChip(i); - if ( NULL == mbaChip ) continue; - - SCAN_COMM_REGISTER_CLASS * mbaCalFirMaskOr = - mbaChip->getRegister("MBACALFIR_MASK_OR"); - - mbaCalFirMaskOr->SetBit(9); - mbaCalFirMaskOr->SetBit(15); - l_rc = mbaCalFirMaskOr->Write(); - if ( SUCCESS != l_rc ) - { - // Do not break. Just print error trace and look for - // other MBA. - PRDF_ERR( PRDF_FUNC "MBACALFIR_MASK_OR write failed" - "for 0x%08x", mbaChip->GetId()); - } - } - }while( 0 ); - - return SUCCESS; - #undef PRDF_FUNC - -} PRDF_PLUGIN_DEFINE( Membuf, MaskMbaCalSecondaryBits ); - -//------------------------------------------------------------------------------ - /** * @brief Handles MCS Channel fail bits, if they exist. * @@ -1175,85 +942,6 @@ int32_t internalTimeout( ExtensibleChip * i_mbChip, //------------------------------------------------------------------------------ -/** - * @brief When not in MNFG mode, clear the service call flag so that - * thresholding will still be done, but no visible error log committed. - * @param i_chip Centaur chip - * @param i_sc Step code data struct - * @returns SUCCESS always - */ -int32_t ClearServiceCallFlag( ExtensibleChip * i_chip, - STEP_CODE_DATA_STRUCT & i_sc ) -{ - if ( i_sc.service_data->IsAtThreshold() && !mfgMode() && - (CHECK_STOP != i_sc.service_data->getPrimaryAttnType()) && - (!i_sc.service_data->queryFlag(ServiceDataCollector::UNIT_CS)) ) - { - i_sc.service_data->clearServiceCall(); - } - - return SUCCESS; -} -PRDF_PLUGIN_DEFINE( Membuf, ClearServiceCallFlag ); - -//------------------------------------------------------------------------------ - -/** - * @brief Captures trapped address for L4 cache ECC errors. - * @param i_mbChip Centaur chip - * @param i_sc Step code data struct - * @returns SUCCESS always - * @note This function also reset ECC trapped address regsiters so that HW - * can capture address for next L4 ecc error. - */ -int32_t CaptureL4CacheErr( ExtensibleChip * i_mbChip, - STEP_CODE_DATA_STRUCT & i_sc ) -{ - #define PRDF_FUNC "[CaptureL4CacheErr] " - do - { - i_mbChip->CaptureErrorData( i_sc.service_data->GetCaptureData(), - Util::hashString( "L4CacheErr" ) ); - - // NOTE: FW should write on MBCELOG so that HW can capture - // address for next L4 CE error. - - // NOTE: Line delete feature for L4 cache may not be available during - // P8. But if it is incorporated in P8, we need to make sure following - // should be the order of events: - // 1. Capture group of registers associated with group L4CacheErr - // 2. do L4 line delete. - // 3. clear register MBCELOG - - // If we clear register MBCELOG before doing line delete, it is possible - // that hardware procedures shall run into erroneous scenarios. One - // probable order of events from PRDF's perspective which can cause - // this is below: - // 1. Receives an attention due to failure at cache address X. - // 2. captures all relevant register including MBCELOG. - // 3. cleares MBCELOG - i.e. failed address info is lost. HW populates - // this register with another L4 CE address say Y. - // 4. requestes HWP for line delete operation on address X but it - // actually deletes Y. It's because MBCELOG now contains address Y. - - SCAN_COMM_REGISTER_CLASS * mbcelogReg = - i_mbChip->getRegister("MBCELOG"); - mbcelogReg->clearAllBits(); - - if ( SUCCESS != mbcelogReg->Write() ) - { - PRDF_ERR( PRDF_FUNC "MBCELOG write failed for 0x%08x", - i_mbChip->GetId()); - break; - } - }while( 0 ); - - return SUCCESS; -} -PRDF_PLUGIN_DEFINE( Membuf, CaptureL4CacheErr ); - -//------------------------------------------------------------------------------ - /** * @brief Checks DD level. If DD1, implements the DD1 callout actions for * MBSFIR bit 30. -- cgit v1.2.1