diff options
Diffstat (limited to 'src/usr/diag')
155 files changed, 9747 insertions, 2630 deletions
diff --git a/src/usr/diag/attn/common/attnprd.C b/src/usr/diag/attn/common/attnprd.C index 65f2fafd9..e3f98335a 100644 --- a/src/usr/diag/attn/common/attnprd.C +++ b/src/usr/diag/attn/common/attnprd.C @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2014,2016 */ +/* Contributors Listed Below - COPYRIGHT 2014,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -41,7 +41,6 @@ #include <errl/errlmanager.H> // Custom compile configs -#include <config.h> #if !defined(__HOSTBOOT_RUNTIME) && defined(CONFIG_ENABLE_CHECKSTOP_ANALYSIS) #include <prdf/prdfMain_ipl.H> diff --git a/src/usr/diag/attn/ipl/attn.C b/src/usr/diag/attn/ipl/attn.C index 7d59a3965..cd8762d49 100644 --- a/src/usr/diag/attn/ipl/attn.C +++ b/src/usr/diag/attn/ipl/attn.C @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2014,2018 */ +/* Contributors Listed Below - COPYRIGHT 2014,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -44,7 +44,6 @@ #include <targeting/common/utilFilter.H> // Custom compile configs -#include <config.h> #ifdef CONFIG_ENABLE_CHECKSTOP_ANALYSIS #include "ipl/attnfilereg.H" diff --git a/src/usr/diag/attn/ipl/attnsvc.C b/src/usr/diag/attn/ipl/attnsvc.C index 17d87100f..0f4bddbb3 100644 --- a/src/usr/diag/attn/ipl/attnsvc.C +++ b/src/usr/diag/attn/ipl/attnsvc.C @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2014,2018 */ +/* Contributors Listed Below - COPYRIGHT 2014,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -40,7 +40,6 @@ #include <initservice/initserviceif.H> // for hostboot TI // Custom compile configs -#include <config.h> using namespace std; using namespace PRDF; diff --git a/src/usr/diag/attn/ipl/attnsvc.H b/src/usr/diag/attn/ipl/attnsvc.H index 8e49fca1f..eabcb2176 100644 --- a/src/usr/diag/attn/ipl/attnsvc.H +++ b/src/usr/diag/attn/ipl/attnsvc.H @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2014,2018 */ +/* Contributors Listed Below - COPYRIGHT 2014,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -36,7 +36,6 @@ #include "common/attnsvc_common.H" // Custom compile configs -#include <config.h> namespace ATTN { diff --git a/src/usr/diag/attn/runtime/attn_rt.C b/src/usr/diag/attn/runtime/attn_rt.C index 810e79bbe..92b225c7d 100644 --- a/src/usr/diag/attn/runtime/attn_rt.C +++ b/src/usr/diag/attn/runtime/attn_rt.C @@ -28,7 +28,7 @@ #include "common/attnmem.H" #include "common/attnbits.H" #include <runtime/interface.h> -#include <runtime/rt_targeting.H> +#include <targeting/runtime/rt_targeting.H> #include <targeting/common/target.H> #include <targeting/common/targetservice.H> #include <targeting/common/utilFilter.H> diff --git a/src/usr/diag/attn/runtime/test/attntestRtAttns.H b/src/usr/diag/attn/runtime/test/attntestRtAttns.H index 6e22b094e..b2bd1fd8e 100644 --- a/src/usr/diag/attn/runtime/test/attntestRtAttns.H +++ b/src/usr/diag/attn/runtime/test/attntestRtAttns.H @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2014,2016 */ +/* Contributors Listed Below - COPYRIGHT 2014,2020 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -38,7 +38,7 @@ #include "../../common/attntrace.H" #include "../../common/attntarget.H" #include <runtime/interface.h> -#include <runtime/rt_targeting.H> +#include <targeting/runtime/rt_targeting.H> #include <targeting/common/targetservice.H> using namespace ATTN; @@ -79,8 +79,8 @@ class AttnCheckForRtAttentionsTest : public CxxTest::TestSuite } proc = procList[0]; - RT_TARG::rtChipId_t chipId = 0; - errlHndl_t err = RT_TARG::getRtTarget( proc, chipId ); + TARGETING::rtChipId_t chipId = 0; + errlHndl_t err = TARGETING::getRtTarget( proc, chipId ); if( NULL != err ) { TS_FAIL("getRtTarget() failed for 0x%08X", @@ -153,7 +153,7 @@ class AttnCheckForRtAttentionsTest : public CxxTest::TestSuite break; } - RT_TARG::rtChipId_t chipId = 0; + TARGETING::rtChipId_t chipId = 0; errlHndl_t err = RT_TARG::getRtTarget( proc, chipId ); if( NULL != err ) { diff --git a/src/usr/diag/mdia/makefile b/src/usr/diag/mdia/makefile index fff17dd96..c6279ee5c 100644 --- a/src/usr/diag/mdia/makefile +++ b/src/usr/diag/mdia/makefile @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2012,2017 +# Contributors Listed Below - COPYRIGHT 2012,2019 # [+] International Business Machines Corp. # # @@ -50,6 +50,11 @@ EXTRAINCDIR += ${ROOTPATH}/src/import/chips/centaur/common/include EXTRAINCDIR += ${ROOTPATH}/src/import/chips/centaur/procedures/hwp/memory EXTRAINCDIR += ${ROOTPATH}/src/import/chips/centaur/procedures/hwp/memory/lib/shared +EXTRAINCDIR += ${ROOTPATH}/src/import/generic/memory/lib/prd/ +EXTRAINCDIR += ${ROOTPATH}/src/import/generic/memory/lib/utils/mcbist/ +EXTRAINCDIR += ${ROOTPATH}/src/import/chips/ocmb/explorer/common/include/ +EXTRAINCDIR += ${ROOTPATH}/src/import/chips/ocmb/explorer/procedures/hwp/memory/ + MODULE = mdia OBJS += mdiamonitor.o diff --git a/src/usr/diag/mdia/mdia.C b/src/usr/diag/mdia/mdia.C index a13f28e59..f75ca1b60 100644 --- a/src/usr/diag/mdia/mdia.C +++ b/src/usr/diag/mdia/mdia.C @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2012,2018 */ +/* Contributors Listed Below - COPYRIGHT 2012,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -115,14 +115,16 @@ errlHndl_t runStep(const TargetHandleList & i_targetList) // ensure threads and pools are shutdown when finished - doStepCleanup(globals); + if(nullptr == err) + { + err = doStepCleanup(globals); + } // If this step completes without the need for a reconfig due to an RCD // parity error, clear all RCD parity error counters. ATTR_RECONFIGURE_LOOP_type attr = top->getAttr<ATTR_RECONFIGURE_LOOP>(); if ( 0 == (attr & RECONFIGURE_LOOP_RCD_PARITY_ERROR) ) { - //TODO RTC 201293 - may need to update this for axone as well TargetHandleList trgtList; getAllChiplets( trgtList, TYPE_MCA ); for ( auto & trgt : trgtList ) { @@ -140,13 +142,14 @@ errlHndl_t runStep(const TargetHandleList & i_targetList) } -void doStepCleanup(const Globals & i_globals) +errlHndl_t doStepCleanup(const Globals & i_globals) { // stop the state machine - Singleton<StateMachine>::instance().shutdown(); + errlHndl_t l_errl = Singleton<StateMachine>::instance().shutdown(); // TODO ... stop the command monitor + return l_errl; } errlHndl_t processEvent(MaintCommandEvent & i_event) diff --git a/src/usr/diag/mdia/mdiafwd.H b/src/usr/diag/mdia/mdiafwd.H index e3395781e..eae069588 100644 --- a/src/usr/diag/mdia/mdiafwd.H +++ b/src/usr/diag/mdia/mdiafwd.H @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2012,2018 */ +/* Contributors Listed Below - COPYRIGHT 2012,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -211,8 +211,10 @@ errlHndl_t getWorkFlow( * @brief doStepCleanup shut down threads and pools on step exit * * @param[in] i_globals contains objects to be cleaned up + * + * @return nullptr on success; non-nullptr on error */ -void doStepCleanup(const Globals & i_globals); +errlHndl_t doStepCleanup(const Globals & i_globals); /** * @brief check if hw state has been changed for an mba diff --git a/src/usr/diag/mdia/mdiasm.C b/src/usr/diag/mdia/mdiasm.C index bb1c123cf..ba00de6b0 100644 --- a/src/usr/diag/mdia/mdiasm.C +++ b/src/usr/diag/mdia/mdiasm.C @@ -43,12 +43,12 @@ #include <errl/errludlogregister.H> #include <initservice/istepdispatcherif.H> #include <ipmi/ipmiwatchdog.H> -#include <config.h> #include <initservice/initserviceif.H> #include <sys/time.h> #include <p9c_mss_maint_cmds.H> #include <dimmBadDqBitmapFuncs.H> #include <sys/misc.h> +#include <hwp_wrappers.H> using namespace TARGETING; using namespace ERRORLOG; @@ -632,16 +632,17 @@ void StateMachine::processCommandTimeout(const MonitorIDs & i_monitorIDs) // target type is MCBIST else if ( TYPE_MCBIST == trgtType ) { + #ifndef CONFIG_AXONE fapi2::Target<fapi2::TARGET_TYPE_MCBIST> fapiMcbist(target); - FAPI_INVOKE_HWP( err, mss::memdiags::stop, fapiMcbist ); + FAPI_INVOKE_HWP( err, nim_stop, fapiMcbist ); if ( nullptr != err ) { - MDIA_ERR("sm: mss::memdiags::stop failed"); + MDIA_ERR("sm: nim_stop failed"); errlCommit(err, MDIA_COMP_ID); } - //mss::memdiags::stop will set the command complete attention so + //nim_stop will set the command complete attention so //we need to clear those bitMask = ~bitMask; @@ -654,22 +655,23 @@ void StateMachine::processCommandTimeout(const MonitorIDs & i_monitorIDs) "0x%08X", firAddr, get_huid(target) ); errlCommit(err, MDIA_COMP_ID); } + #endif } // target type is OCMB_CHIP else if ( TYPE_OCMB_CHIP == trgtType ) { - /* TODO RTC 201293 uncomment once we have hwp support + #ifdef CONFIG_AXONE fapi2::Target<fapi2::TARGET_TYPE_OCMB_CHIP> fapiOcmb(target); - FAPI_INVOKE_HWP( err, mss::memdiags::stop, fapiOcmb ); + FAPI_INVOKE_HWP( err, exp_stop, fapiOcmb ); if ( nullptr != err ) { - MDIA_ERR("sm: mss::memdiags::stop failed"); + MDIA_ERR("sm: exp_stop failed"); errlCommit(err, MDIA_COMP_ID); } - // mss::memdiags::stop will set the command complete + // exp_stop will set the command complete // attention so we need to clear those bitMask = ~bitMask; @@ -682,7 +684,7 @@ void StateMachine::processCommandTimeout(const MonitorIDs & i_monitorIDs) "0x%08X", firAddr, get_huid(target) ); errlCommit(err, MDIA_COMP_ID); } - */ + #endif } // Assert if unsupported type else @@ -782,7 +784,15 @@ void StateMachine::setup(const WorkFlowAssocMap & i_list) p->timeoutCnt = 0; p->data = NULL; - p->chipUnit = it->first->getAttr<ATTR_CHIP_UNIT>(); + if ( TYPE_OCMB_CHIP == it->first->getAttr<ATTR_TYPE>() ) + { + // There is no chip unit attribute for OCMBs, so just use 0 + p->chipUnit = 0; + } + else + { + p->chipUnit = it->first->getAttr<ATTR_CHIP_UNIT>(); + } iv_workFlowProperties.push_back(p); } @@ -1242,14 +1252,15 @@ errlHndl_t StateMachine::doMaintCommand(WorkFlowProperties & i_wfp) //target type is MCBIST else if (TYPE_MCBIST == trgtType) { + #ifndef CONFIG_AXONE fapi2::Target<fapi2::TARGET_TYPE_MCBIST> fapiMcbist(target); - mss::mcbist::stop_conditions stopCond; + mss::mcbist::stop_conditions<mss::mc_type::NIMBUS> stopCond; switch(workItem) { case START_RANDOM_PATTERN: - FAPI_INVOKE_HWP( err, mss::memdiags::sf_init, fapiMcbist, + FAPI_INVOKE_HWP( err, nim_sf_init, fapiMcbist, mss::mcbist::PATTERN_RANDOM ); MDIA_FAST("sm: random init %p on: %x", fapiMcbist, get_huid(target)); @@ -1270,7 +1281,7 @@ errlHndl_t StateMachine::doMaintCommand(WorkFlowProperties & i_wfp) stopCond.set_pause_on_nce_hard(mss::ON); } - FAPI_INVOKE_HWP( err, mss::memdiags::sf_read, fapiMcbist, + FAPI_INVOKE_HWP( err, nim_sf_read, fapiMcbist, stopCond ); MDIA_FAST("sm: scrub %p on: %x", fapiMcbist, get_huid(target)); @@ -1285,7 +1296,7 @@ errlHndl_t StateMachine::doMaintCommand(WorkFlowProperties & i_wfp) case START_PATTERN_6: case START_PATTERN_7: - FAPI_INVOKE_HWP( err, mss::memdiags::sf_init, fapiMcbist, + FAPI_INVOKE_HWP( err, nim_sf_init, fapiMcbist, workItem ); MDIA_FAST("sm: init %p on: %x", fapiMcbist, get_huid(target)); @@ -1301,19 +1312,20 @@ errlHndl_t StateMachine::doMaintCommand(WorkFlowProperties & i_wfp) MDIA_FAST("sm: Running Maint Cmd failed"); i_wfp.data = nullptr; } + #endif } // target type is OCMB_CHIP else if ( TYPE_OCMB_CHIP == trgtType ) { - /* TODO RTC 201293 - uncomment with hwp support + #ifdef CONFIG_AXONE fapi2::Target<fapi2::TARGET_TYPE_OCMB_CHIP> fapiOcmb(target); - mss::mcbist::stop_conditions stopCond; + mss::mcbist::stop_conditions<mss::mc_type::EXPLORER> stopCond; switch(workItem) { case START_RANDOM_PATTERN: - FAPI_INVOKE_HWP( err, mss::memdiags::sf_init, fapiOcmb, + FAPI_INVOKE_HWP( err, exp_sf_init, fapiOcmb, mss::mcbist::PATTERN_RANDOM ); MDIA_FAST("sm: random init %p on: %x", fapiOcmb, get_huid(target)); @@ -1334,7 +1346,7 @@ errlHndl_t StateMachine::doMaintCommand(WorkFlowProperties & i_wfp) stopCond.set_pause_on_nce_hard(mss::ON); } - FAPI_INVOKE_HWP( err, mss::memdiags::sf_read, fapiOcmb, + FAPI_INVOKE_HWP( err, exp_sf_read, fapiOcmb, stopCond ); MDIA_FAST( "sm: scrub %p on: %x", fapiOcmb, get_huid(target) ); @@ -1349,7 +1361,7 @@ errlHndl_t StateMachine::doMaintCommand(WorkFlowProperties & i_wfp) case START_PATTERN_6: case START_PATTERN_7: - FAPI_INVOKE_HWP( err, mss::memdiags::sf_init, fapiOcmb, + FAPI_INVOKE_HWP( err, exp_sf_init, fapiOcmb, workItem ); MDIA_FAST( "sm: init %p on: %x", fapiOcmb, get_huid(target) ); @@ -1365,7 +1377,7 @@ errlHndl_t StateMachine::doMaintCommand(WorkFlowProperties & i_wfp) MDIA_FAST("sm: Running Maint Cmd failed"); i_wfp.data = nullptr; } - */ + #endif } else { @@ -1571,37 +1583,39 @@ bool StateMachine::processMaintCommandEvent(const MaintCommandEvent & i_event) //target type is MCBIST else if ( TYPE_MCBIST == trgtType ) { + #ifndef CONFIG_AXONE if(flags & STOP_CMD) { MDIA_FAST("sm: stopping command: %p", target); fapi2::Target<fapi2::TARGET_TYPE_MCBIST> fapiMcbist(target); - FAPI_INVOKE_HWP( err, mss::memdiags::stop, fapiMcbist ); + FAPI_INVOKE_HWP( err, nim_stop, fapiMcbist ); if(nullptr != err) { - MDIA_ERR("sm: mss::memdiags::stop failed"); + MDIA_ERR("sm: nim_stop failed"); errlCommit(err, MDIA_COMP_ID); } } + #endif } // target type is OCMB_CHIP else if ( TYPE_OCMB_CHIP == trgtType ) { + #ifdef CONFIG_AXONE if(flags & STOP_CMD) { MDIA_FAST("sm: stopping command: %p", target); - /* TODO RTC 201293 - reenable with hwp support fapi2::Target<fapi2::TARGET_TYPE_OCMB_CHIP> fapiOcmb(target); - FAPI_INVOKE_HWP( err, mss::memdiags::stop, fapiOcmb ); + FAPI_INVOKE_HWP( err, exp_stop, fapiOcmb ); if(nullptr != err) { - MDIA_ERR("sm: mss::memdiags::stop failed"); + MDIA_ERR("sm: exp_stop failed"); errlCommit(err, MDIA_COMP_ID); } - */ } + #endif } else { @@ -1665,10 +1679,12 @@ void StateMachine::reset() mutex_unlock(&iv_mutex); } -void StateMachine::shutdown() +errlHndl_t StateMachine::shutdown() { mutex_lock(&iv_mutex); + errlHndl_t l_errl = nullptr; + Util::ThreadPool<WorkItem> * tp = iv_tp; CommandMonitor * monitor = iv_monitor; @@ -1684,7 +1700,7 @@ void StateMachine::shutdown() if(tp) { MDIA_FAST("Stopping threadPool..."); - tp->shutdown(); + l_errl = tp->shutdown(); delete tp; } @@ -1696,11 +1712,16 @@ void StateMachine::shutdown() } MDIA_FAST("sm: ...shutdown complete"); + return l_errl; } StateMachine::~StateMachine() { - shutdown(); + errlHndl_t l_errl = shutdown(); + if(l_errl) + { + errlCommit(l_errl, MDIA_COMP_ID); + } sync_cond_destroy(&iv_cond); mutex_destroy(&iv_mutex); diff --git a/src/usr/diag/mdia/mdiasm.H b/src/usr/diag/mdia/mdiasm.H index 924af567a..55cc8ed74 100644 --- a/src/usr/diag/mdia/mdiasm.H +++ b/src/usr/diag/mdia/mdiasm.H @@ -106,8 +106,10 @@ class StateMachine /** * @brief shutdown state machine + * + * @retval nullptr on success; non-nullptr on error */ - void shutdown(); + errlHndl_t shutdown(); /** * @brief processMaintCommandEvent process maint command event from prd diff --git a/src/usr/diag/mdia/test/mdiatestmba.H b/src/usr/diag/mdia/test/mdiatestmba.H index bca85c11d..97360c80a 100644 --- a/src/usr/diag/mdia/test/mdiatestmba.H +++ b/src/usr/diag/mdia/test/mdiatestmba.H @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2012,2016 */ +/* Contributors Listed Below - COPYRIGHT 2012,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -47,33 +47,43 @@ class MdiaMbaTest : public CxxTest::TestSuite using namespace MDIA; using namespace TARGETING; - TS_TRACE(ENTER_MRK "testGetDiagnosticMode"); + TS_TRACE( ENTER_MRK "testGetDiagnosticMode" ); - TargetHandleList mbaList; - getAllChiplets(mbaList, TYPE_MBA); + TargetHandleList list; + fapi2::TargetType type = getMdiaTargetType(); + if ( fapi2::TARGET_TYPE_MBA_CHIPLET == type ) + { + TARGETING::getAllChiplets( list, TYPE_MBA ); + } + else if ( fapi2::TARGET_TYPE_MCBIST == type ) + { + TARGETING::getAllChiplets( list, TYPE_MCBIST ); + } + else if ( fapi2::TARGET_TYPE_OCMB_CHIP == type ) + { + TARGETING::getAllChiplets( list, TYPE_OCMB_CHIP ); + } - if( !mbaList.empty() ) + if( !list.empty() ) { DiagMode mode; Globals globals; - TargetHandle_t mba = mbaList[0]; + TargetHandle_t trgt = list[0]; - errlHndl_t err = getDiagnosticMode( - globals, mba, mode); + errlHndl_t err = getDiagnosticMode( globals, trgt, mode ); if(err) { - TS_FAIL("getDiagnosticMode failed " - "unexpectedly"); + TS_FAIL( "getDiagnosticMode failed unexpectedly" ); } if(mode != ONE_PATTERN) { - TS_FAIL("mode != ONE_PATTERN"); + TS_FAIL( "mode != ONE_PATTERN" ); } } - TS_TRACE(EXIT_MRK "testGetDiagnosticMode"); + TS_TRACE( EXIT_MRK "testGetDiagnosticMode" ); } void testGetWorkFlow(void) @@ -81,63 +91,74 @@ class MdiaMbaTest : public CxxTest::TestSuite using namespace MDIA; using namespace TARGETING; - TS_TRACE(ENTER_MRK "testGetWorkFlow"); + TS_TRACE( ENTER_MRK "testGetWorkFlow" ); Globals globals; - TargetHandle_t mba = 0; + TargetHandle_t trgt = 0; DiagMode mode; errlHndl_t err = NULL; - TargetHandleList mbaList; - getAllChiplets(mbaList, TYPE_MBA); - if( !mbaList.empty() ) + TargetHandleList list; + fapi2::TargetType type = getMdiaTargetType(); + if ( fapi2::TARGET_TYPE_MBA_CHIPLET == type ) { - mba = mbaList[0]; - err = getDiagnosticMode( - globals, mba, mode); + TARGETING::getAllChiplets( list, TYPE_MBA ); + } + else if ( fapi2::TARGET_TYPE_MCBIST == type ) + { + TARGETING::getAllChiplets( list, TYPE_MCBIST ); + } + else if ( fapi2::TARGET_TYPE_OCMB_CHIP == type ) + { + TARGETING::getAllChiplets( list, TYPE_OCMB_CHIP ); + } - if(err) + if( !list.empty() ) + { + trgt = list[0]; + err = getDiagnosticMode( globals, trgt, mode ); + + if( err ) { - TS_FAIL("getDiagnosticMode " - "failed unexpectedly"); + TS_FAIL( "getDiagnosticMode failed unexpectedly" ); } - if(mode != ONE_PATTERN) + if( mode != ONE_PATTERN ) { - TS_FAIL("mode != ONE_PATTERN"); + TS_FAIL( "mode != ONE_PATTERN" ); } - } - WorkFlow wf, expected; + WorkFlow wf, expected; - expected.push_back(RESTORE_DRAM_REPAIRS); - expected.push_back(START_PATTERN_0); - expected.push_back(START_SCRUB); - expected.push_back(CLEAR_HW_CHANGED_STATE); + expected.push_back( RESTORE_DRAM_REPAIRS ); + expected.push_back( START_PATTERN_0 ); + expected.push_back( START_SCRUB ); + expected.push_back( CLEAR_HW_CHANGED_STATE ); - err = getWorkFlow(mode, wf, globals); + err = getWorkFlow( mode, wf, globals ); - if(err) - { - TS_FAIL("getWorkFlow failed unexpectedly"); - } + if( err ) + { + TS_FAIL( "getWorkFlow failed unexpectedly" ); + } - if(wf.size() != expected.size()) - { - TS_FAIL("incorrect workflow size for init only mode"); - } + if( wf.size() != expected.size() ) + { + TS_FAIL( "incorrect workflow size for init only mode" ); + } - int64_t index = wf.size(); + int64_t index = wf.size(); - while(index-- != 0) - { - if(wf[index] != expected[index]) + while( index-- != 0 ) { - TS_FAIL("workflow entry incorrect or out of order"); + if( wf[index] != expected[index] ) + { + TS_FAIL( "workflow entry incorrect or out of order" ); + } } } - TS_TRACE(EXIT_MRK "testGetWorkFlow"); + TS_TRACE( EXIT_MRK "testGetWorkFlow" ); } }; #endif diff --git a/src/usr/diag/prdf/common/framework/config/iipSystem.h b/src/usr/diag/prdf/common/framework/config/iipSystem.h index ef4ed9322..b1b6ad1f4 100755 --- a/src/usr/diag/prdf/common/framework/config/iipSystem.h +++ b/src/usr/diag/prdf/common/framework/config/iipSystem.h @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 1996,2018 */ +/* Contributors Listed Below - COPYRIGHT 1996,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -87,9 +87,7 @@ #include <vector> #include <map> -#ifndef IIPCONST_H #include <iipconst.h> //TARGETING::TargetHandle_t, DOMAIN_ID_TYPE -#endif #include <iipsdbug.h> // Include file for ATTENTION_TYPE diff --git a/src/usr/diag/prdf/common/framework/register/iipCaptureData.h b/src/usr/diag/prdf/common/framework/register/iipCaptureData.h index e65e94d3f..9aae2880c 100755 --- a/src/usr/diag/prdf/common/framework/register/iipCaptureData.h +++ b/src/usr/diag/prdf/common/framework/register/iipCaptureData.h @@ -78,9 +78,7 @@ #include <list> -#ifndef IIPCONST_H #include <iipconst.h> -#endif #include <prdfPlatServices.H> #include <functional> // @jl04 a Needed for the unary function in new predicate. diff --git a/src/usr/diag/prdf/common/framework/register/iipErrorRegisterMask.h b/src/usr/diag/prdf/common/framework/register/iipErrorRegisterMask.h index fb1443df8..af67c68aa 100755 --- a/src/usr/diag/prdf/common/framework/register/iipErrorRegisterMask.h +++ b/src/usr/diag/prdf/common/framework/register/iipErrorRegisterMask.h @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2012,2017 */ +/* Contributors Listed Below - COPYRIGHT 2012,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -70,9 +70,7 @@ #include <iipErrorRegisterFilter.h> #endif -#ifndef IIPBITS_H -#include <iipbits.h> -#endif +#include <prdfBitString.H> namespace PRDF { diff --git a/src/usr/diag/prdf/common/framework/register/iipMopRegisterAccess.h b/src/usr/diag/prdf/common/framework/register/iipMopRegisterAccess.h deleted file mode 100755 index 1e7ad5947..000000000 --- a/src/usr/diag/prdf/common/framework/register/iipMopRegisterAccess.h +++ /dev/null @@ -1,184 +0,0 @@ -/* IBM_PROLOG_BEGIN_TAG */ -/* This is an automatically generated prolog. */ -/* */ -/* $Source: src/usr/diag/prdf/common/framework/register/iipMopRegisterAccess.h $ */ -/* */ -/* OpenPOWER HostBoot Project */ -/* */ -/* Contributors Listed Below - COPYRIGHT 2012,2017 */ -/* [+] International Business Machines Corp. */ -/* */ -/* */ -/* Licensed under the Apache License, Version 2.0 (the "License"); */ -/* you may not use this file except in compliance with the License. */ -/* You may obtain a copy of the License at */ -/* */ -/* http://www.apache.org/licenses/LICENSE-2.0 */ -/* */ -/* Unless required by applicable law or agreed to in writing, software */ -/* distributed under the License is distributed on an "AS IS" BASIS, */ -/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */ -/* implied. See the License for the specific language governing */ -/* permissions and limitations under the License. */ -/* */ -/* IBM_PROLOG_END_TAG */ - -#ifndef iipMopRegisterAccess_h -#define iipMopRegisterAccess_h - -// Class Specification ************************************************* -// -// Class name: MopRegisterAccess -// Parent class: None. -// -// Summary: This class provides access to hardware register via -// a MOP routine. A single pure virtual function Access() -// is declared for this purpose. -// -// Cardinality: 0 -// -// Performance/Implementation: -// Space Complexity: Constant -// Time Complexity: All member functions constant unless otherwise -// stated. -// -// Usage Examples: -// -// -// void foo(MopRegisterAccess & mra) -// { -// BitStringBuffer bitString(80); // 80 bits -// -// mra.Access(bitString, READ); -// ... -// -// } -// -// -// End Class Specification ********************************************* - -// Includes -#if !defined(IIPCONST_H) -#include <iipconst.h> -#endif -#include <prdfPlatServices.H> - -namespace PRDF -{ -// Forward References -class BitString; - -class MopRegisterAccess -{ -public: - - enum Operation - { - READ = 0, - WRITE = 1 - }; - - // MopRegisterAccess(void); - // Function Specification ******************************************** - // - // Purpose: Initialization - // Parameters: None. - // Returns: No value returned. - // Requirements: None. - // Promises: All data members are initialized. - // Exceptions: None. - // Concurrency: N/A - // Notes: This constructor is not declared. This compiler generated - // default definition is sufficient. - // - // End Function Specification ////////////////////////////////////// - - // MopRegisterAccess(const MopRegisterAccess & scr); - // Function Specification ******************************************** - // - // Purpose: Copy - // Parameters: scr: Reference to instance to copy - // Returns: No value returned. - // Requirements: None. - // Promises: All data members will be copied (Deep copy). - // Exceptions: None. - // Concurrency: N/A. - // Notes: This constructor is not declared. This compiler generated - // default definition is sufficient. - // - // End Function Specification **************************************** - - virtual ~MopRegisterAccess() {} - - // Function Specification ******************************************** - // - // Purpose: Destruction - // Parameters: None. - // Returns: No value returned - // Requirements: None. - // Promises: None. - // Exceptions: None. - // Concurrency: N/A - // - // End Function Specification **************************************** - - // MopRegisterAccess & operator=(const MopRegisterAccess & scr); - // Function Specification ******************************************** - // - // Purpose: Assigment - // Parameters: d: Reference to instance to assign from - // Returns: Reference to this instance - // Requirements: None. - // Promises: All data members are assigned to - // Exceptions: None. - // Concurrency: N/A. - // Notes: This assingment operator is not declared. The compiler - // generated default definition is sufficient. - // - // End Function Specification **************************************** - - virtual uint32_t Access(BitString & bs, - uint64_t registerId, - Operation operation) const = 0; - // Function Specification ******************************************** - // - // Purpose: This function reads or writes the hardware according - // to the specified operation. - // Parameters: bs: Bit string to retrieve(for write) or store data - // (from read) - // registerId: SCR Address or scan offset - // operation: Indicates either read or write operation - // Returns: Hardware OPs return code - // Requirements: bs.Length() == long enough - // Promises: For read operation, bs is modified to reflect hardware - // register state - // Exceptions: None. - // Concurrency: Nonreentrant. - // Note: The first bs.Length() bits from the Hardware OPs read - // are set/reset in bs (from left to right) - // For a write, the first bs.Length() bits are written - // to the hardware register with right padded 0's if - // needed - // - // End Function Specification **************************************** - //Get Ids and count - virtual const TARGETING::TargetHandle_t * GetChipIds(int & count) const = 0; - // Function Specification ******************************************** - // - // Purpose: Access Chip Ids and # of chips to access - // Parameters: count: Var to return chip count of valid IDs - // Returns: ptr to Chip ids - // Requirements: None - // Promises: None - // Exceptions: None. - // Concurrency: Reentrant. - // - // End Function Specification **************************************** - - private: - - }; - -} // end namespace PRDF - -#endif diff --git a/src/usr/diag/prdf/common/framework/register/iipMopRegisterAccessScanComm.h b/src/usr/diag/prdf/common/framework/register/iipMopRegisterAccessScanComm.h deleted file mode 100755 index e87d70210..000000000 --- a/src/usr/diag/prdf/common/framework/register/iipMopRegisterAccessScanComm.h +++ /dev/null @@ -1,158 +0,0 @@ -/* IBM_PROLOG_BEGIN_TAG */ -/* This is an automatically generated prolog. */ -/* */ -/* $Source: src/usr/diag/prdf/common/framework/register/iipMopRegisterAccessScanComm.h $ */ -/* */ -/* OpenPOWER HostBoot Project */ -/* */ -/* Contributors Listed Below - COPYRIGHT 1996,2017 */ -/* [+] International Business Machines Corp. */ -/* */ -/* */ -/* Licensed under the Apache License, Version 2.0 (the "License"); */ -/* you may not use this file except in compliance with the License. */ -/* You may obtain a copy of the License at */ -/* */ -/* http://www.apache.org/licenses/LICENSE-2.0 */ -/* */ -/* Unless required by applicable law or agreed to in writing, software */ -/* distributed under the License is distributed on an "AS IS" BASIS, */ -/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */ -/* implied. See the License for the specific language governing */ -/* permissions and limitations under the License. */ -/* */ -/* IBM_PROLOG_END_TAG */ - -#ifndef iipMopRegisterAccessScanComm_h -#define iipMopRegisterAccessScanComm_h - -// Class Specification ************************************************* -// -// Class name: MopRegisterAccessScanComm -// Parent class: MopRegisterAccess. -// -// Summary: This class provides access to hardware register data via -// a MOP Scan Comm routine. -// -// Cardinality: 0 -// -// Performance/Implementation: -// Space Complexity: Constant -// Time Complexity: All member functions constant unless otherwise -// stated. -// -// Usage Examples: -// -// -// -// End Class Specification ********************************************* - -// Includes - -#pragma interface - -#ifndef iipMopRegisterAccess_h -#include <iipMopRegisterAccess.h> -#endif - -namespace PRDF -{ - -// Forward References -class MopRegisterAccessScanComm : public MopRegisterAccess -{ -public: - - // Function Specification ******************************************** - // - // Purpose: CTOR - // Parameters: None - // Returns: No value returned. - // Requirements: None. - // Promises: All data members are initialized. - // Exceptions: None. - // Concurrency: N/A - // Note: Multiple chip IDs are for chips that MOPs must - // access at the same time when performing a Scan - // Comm operation (ie STINGER & ARROW chips) - // - // End Function Specification ////////////////////////////////////// - - // MopRegisterAccessScanComm(const MopRegisterAccessScanComm & scr); - // Function Specification ******************************************** - // - // Purpose: Copy - // Parameters: scr: Reference to instance to copy - // Returns: No value returned. - // Requirements: None. - // Promises: All data members will be copied (Deep copy). - // Exceptions: None. - // Concurrency: N/A. - // Notes: This constructor is not declared. This compiler generated - // default definition is sufficient. - // - // End Function Specification **************************************** - - // virtual ~MopRegisterAccessScanComm(void); - // Function Specification ******************************************** - // - // Purpose: Destruction - // Parameters: None. - // Returns: No value returned - // Requirements: None. - // Promises: None. - // Exceptions: None. - // Concurrency: N/A - // Notes: This destructor is not declared. This compiler generated - // default definition is sufficient. - // - // End Function Specification **************************************** - - // MopRegisterAccessScanComm & operator=(const MopRegisterAccessScanComm & scr); - // Function Specification ******************************************** - // - // Purpose: Assigment - // Parameters: d: Reference to instance to assign from - // Returns: Reference to this instance - // Requirements: None. - // Promises: All data members are assigned to - // Exceptions: None. - // Concurrency: N/A. - // Notes: This assingment operator is not declared. The compiler - // generated default definition is sufficient. - // - // End Function Specification **************************************** - - virtual uint32_t Access(BitString & bs, - uint32_t registerId, - Operation operation) const; - // Function Specification ******************************************** - // - // Purpose: This function reads or writes the hardware according - // to the specified operation. - // Parameters: bs: Bit string to retrieve(for write) or store data - // (from read) - // registerId: ScanComm register address - // operation: Indicates either read or write operation - // Returns: Hardware OPs return code - // Requirements: bs.Length() == long enough - // Promises: For read operation, bs is modified to reflect hardware - // register state - // Exceptions: None. - // Concurrency: Nonreentrant. - // Note: The first bs.Length() bits from the Hardware OPs read - // are set/reset in bs (from left to right) - // For a write, the first bs.Length() bits are written - // to the hardware register with right padded 0's if - // needed - // - // End Function Specification **************************************** - - -private: // DATA - -}; - -} // end namespace PRDF - -#endif diff --git a/src/usr/diag/prdf/common/framework/register/iipMopRegisterAccessScanComm.inl b/src/usr/diag/prdf/common/framework/register/iipMopRegisterAccessScanComm.inl deleted file mode 100755 index ad08084d6..000000000 --- a/src/usr/diag/prdf/common/framework/register/iipMopRegisterAccessScanComm.inl +++ /dev/null @@ -1,67 +0,0 @@ -/* IBM_PROLOG_BEGIN_TAG */ -/* This is an automatically generated prolog. */ -/* */ -/* $Source: src/usr/diag/prdf/common/framework/register/iipMopRegisterAccessScanComm.inl $ */ -/* */ -/* OpenPOWER HostBoot Project */ -/* */ -/* COPYRIGHT International Business Machines Corp. 1996,2014 */ -/* */ -/* Licensed under the Apache License, Version 2.0 (the "License"); */ -/* you may not use this file except in compliance with the License. */ -/* You may obtain a copy of the License at */ -/* */ -/* http://www.apache.org/licenses/LICENSE-2.0 */ -/* */ -/* Unless required by applicable law or agreed to in writing, software */ -/* distributed under the License is distributed on an "AS IS" BASIS, */ -/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */ -/* implied. See the License for the specific language governing */ -/* permissions and limitations under the License. */ -/* */ -/* IBM_PROLOG_END_TAG */ - -// Module Description ************************************************** -// -// Description: This module provides the inline implementation for the -// PRD MOP Register Access Scan Comm class. -// -// End Module Description ********************************************** - -namespace PRDF -{ - -//---------------------------------------------------------------------- -// Includes -//---------------------------------------------------------------------- - -//---------------------------------------------------------------------- -// User Types -//---------------------------------------------------------------------- - -//---------------------------------------------------------------------- -// Constants -//---------------------------------------------------------------------- - -//---------------------------------------------------------------------- -// Macros -//---------------------------------------------------------------------- - -//---------------------------------------------------------------------- -// Internal Function Prototypes -//---------------------------------------------------------------------- - -//---------------------------------------------------------------------- -// Global Variables -//---------------------------------------------------------------------- - -//--------------------------------------------------------------------- -// Member Function Specifications -//--------------------------------------------------------------------- - -inline -MopRegisterAccessScanComm::MopRegisterAccessScanComm(void) - { - } - -} // end namespace PRDF diff --git a/src/usr/diag/prdf/common/framework/register/iipscr.C b/src/usr/diag/prdf/common/framework/register/iipscr.C index d4d7017a2..6834c6415 100755 --- a/src/usr/diag/prdf/common/framework/register/iipscr.C +++ b/src/usr/diag/prdf/common/framework/register/iipscr.C @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 1997,2017 */ +/* Contributors Listed Below - COPYRIGHT 1997,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -41,7 +41,7 @@ /* Includes */ /*--------------------------------------------------------------------*/ -#include <iipbits.h> +#include <prdfBitString.H> #include <iipscr.h> #include <iipconst.h> diff --git a/src/usr/diag/prdf/common/framework/register/iipscr.h b/src/usr/diag/prdf/common/framework/register/iipscr.h index 53c9bfa5a..cd1243dc6 100755 --- a/src/usr/diag/prdf/common/framework/register/iipscr.h +++ b/src/usr/diag/prdf/common/framework/register/iipscr.h @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2012,2017 */ +/* Contributors Listed Below - COPYRIGHT 2012,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -43,7 +43,7 @@ // Includes //---------------------------------------------------------------------- -#include <iipbits.h> +#include <prdfBitString.H> #include <iipconst.h> #include <iipsdbug.h> #include <prdfMain.H> diff --git a/src/usr/diag/prdf/common/framework/register/prdfCaptureData.C b/src/usr/diag/prdf/common/framework/register/prdfCaptureData.C index 39113507b..5ddb11a4c 100755 --- a/src/usr/diag/prdf/common/framework/register/prdfCaptureData.C +++ b/src/usr/diag/prdf/common/framework/register/prdfCaptureData.C @@ -31,7 +31,7 @@ // Includes //---------------------------------------------------------------------- -#include <iipbits.h> +#include <prdfBitString.H> #include <prdfHomRegisterAccess.H> // dg06a #include <prdfScomRegister.H> #include <iipchip.h> diff --git a/src/usr/diag/prdf/common/framework/register/prdfErrorRegister.C b/src/usr/diag/prdf/common/framework/register/prdfErrorRegister.C index 3244022c7..450a7bc9c 100755 --- a/src/usr/diag/prdf/common/framework/register/prdfErrorRegister.C +++ b/src/usr/diag/prdf/common/framework/register/prdfErrorRegister.C @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2012,2018 */ +/* Contributors Listed Below - COPYRIGHT 2012,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -40,7 +40,7 @@ #include <prdfMain.H> #include <prdfAssert.h> #include <iipstep.h> -#include <iipbits.h> +#include <prdfBitString.H> #include <iipResolution.h> #include <iipscr.h> #include <prdfErrorSignature.H> diff --git a/src/usr/diag/prdf/common/framework/register/prdfHomRegisterAccess.C b/src/usr/diag/prdf/common/framework/register/prdfHomRegisterAccess.C index a9d2a615a..c7bf802a4 100755 --- a/src/usr/diag/prdf/common/framework/register/prdfHomRegisterAccess.C +++ b/src/usr/diag/prdf/common/framework/register/prdfHomRegisterAccess.C @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2012,2017 */ +/* Contributors Listed Below - COPYRIGHT 2012,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -30,11 +30,10 @@ //---------------------------------------------------------------------- // Includes //---------------------------------------------------------------------- -#define prdfHomRegisterAccess_C #include <prdfHomRegisterAccess.H> #include <prdf_service_codes.H> -#include <iipbits.h> +#include <prdfBitString.H> #include <prdfMain.H> #include <prdfPlatServices.H> #include <prdfGlobal.H> @@ -46,9 +45,6 @@ #include <p9_stop_api.H> #endif -#undef prdfHomRegisterAccess_C - - using namespace TARGETING; namespace PRDF @@ -117,7 +113,7 @@ void ScomService::setScomAccessor(ScomAccessor & i_ScomAccessor) uint32_t ScomService::Access(TargetHandle_t i_target, BitString & bs, uint64_t registerId, - MopRegisterAccess::Operation operation) const + RegisterAccess::Operation operation) const { PRDF_DENTER("ScomService::Access()"); uint32_t rc = SUCCESS; @@ -136,7 +132,7 @@ uint32_t ScomService::Access(TargetHandle_t i_target, uint32_t ScomAccessor::Access(TargetHandle_t i_target, BitString & bs, uint64_t registerId, - MopRegisterAccess::Operation operation) const + RegisterAccess::Operation operation) const { PRDF_DENTER("ScomAccessor::Access()"); @@ -146,7 +142,7 @@ uint32_t ScomAccessor::Access(TargetHandle_t i_target, { switch (operation) { - case MopRegisterAccess::WRITE: + case RegisterAccess::WRITE: { rc = PRDF::PlatServices::putScom(i_target, bs, registerId); @@ -198,7 +194,7 @@ uint32_t ScomAccessor::Access(TargetHandle_t i_target, break; } - case MopRegisterAccess::READ: + case RegisterAccess::READ: bs.clearAll(); // clear all bits rc = PRDF::PlatServices::getScom(i_target, bs, registerId); diff --git a/src/usr/diag/prdf/common/framework/register/prdfHomRegisterAccess.H b/src/usr/diag/prdf/common/framework/register/prdfHomRegisterAccess.H index 6426b4ac3..d26f173f6 100755 --- a/src/usr/diag/prdf/common/framework/register/prdfHomRegisterAccess.H +++ b/src/usr/diag/prdf/common/framework/register/prdfHomRegisterAccess.H @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2012,2017 */ +/* Contributors Listed Below - COPYRIGHT 2012,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -35,7 +35,6 @@ // Includes //-------------------------------------------------------------------- -#include <iipMopRegisterAccess.h> #include <vector> #include <prdfPlatServices.H> #include <prdfErrlUtil.H> @@ -47,6 +46,15 @@ namespace PRDF { +namespace RegisterAccess +{ + enum Operation + { + READ = 0, + WRITE = 1, + }; +} + class ScomAccessor { public: @@ -75,7 +83,7 @@ class ScomAccessor virtual uint32_t Access( TARGETING::TargetHandle_t i_target, BitString & bs, uint64_t registerId, - MopRegisterAccess::Operation operation) const; + RegisterAccess::Operation operation) const; private: @@ -142,7 +150,7 @@ class ScomService virtual uint32_t Access(TARGETING::TargetHandle_t i_target, BitString & bs, uint64_t registerId, - MopRegisterAccess::Operation operation) const; + RegisterAccess::Operation operation) const; private: diff --git a/src/usr/diag/prdf/common/framework/register/prdfRegisterCache.H b/src/usr/diag/prdf/common/framework/register/prdfRegisterCache.H index be34884a3..8d069b3cb 100644 --- a/src/usr/diag/prdf/common/framework/register/prdfRegisterCache.H +++ b/src/usr/diag/prdf/common/framework/register/prdfRegisterCache.H @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2012,2017 */ +/* Contributors Listed Below - COPYRIGHT 2012,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -29,7 +29,7 @@ /** @file prdfRegisterCache.H */ #include <map> -#include <iipbits.h> +#include <prdfBitString.H> #include <prdfGlobal.H> #include <prdfScanFacility.H> #include <prdfScomRegisterAccess.H> diff --git a/src/usr/diag/prdf/common/framework/register/prdfScomRegister.C b/src/usr/diag/prdf/common/framework/register/prdfScomRegister.C index 7e4cce81b..f8a445b20 100755 --- a/src/usr/diag/prdf/common/framework/register/prdfScomRegister.C +++ b/src/usr/diag/prdf/common/framework/register/prdfScomRegister.C @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2012,2017 */ +/* Contributors Listed Below - COPYRIGHT 2012,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -37,7 +37,7 @@ #include <iipchip.h> #include <prdfScomRegister.H> #include <iipconst.h> -#include <iipbits.h> +#include <prdfBitString.H> #include <prdfMain.H> #include <prdfRasServices.H> #include <prdfRegisterCache.H> @@ -155,7 +155,7 @@ uint32_t ScomRegister::ForceRead() const } // Read hardware. - o_rc = Access( readCache(), MopRegisterAccess::READ ); + o_rc = Access( readCache(), RegisterAccess::READ ); if ( SUCCESS != o_rc ) { // The read failed. Remove the entry from the cache so a subsequent @@ -201,7 +201,7 @@ uint32_t ScomRegister::Write() } // Write hardware. - o_rc = Access( readCache(), MopRegisterAccess::WRITE ); + o_rc = Access( readCache(), RegisterAccess::WRITE ); } while (0); @@ -213,7 +213,7 @@ uint32_t ScomRegister::Write() //------------------------------------------------------------------------------ uint32_t ScomRegister::Access( BitString & bs, - MopRegisterAccess::Operation op ) const + RegisterAccess::Operation op ) const { int32_t l_rc = SCR_ACCESS_FAILED; TARGETING::TargetHandle_t i_pchipTarget = getChip()->GetChipHandle(); diff --git a/src/usr/diag/prdf/common/framework/register/prdfScomRegister.H b/src/usr/diag/prdf/common/framework/register/prdfScomRegister.H index 655f4d523..e3d14a0dc 100755 --- a/src/usr/diag/prdf/common/framework/register/prdfScomRegister.H +++ b/src/usr/diag/prdf/common/framework/register/prdfScomRegister.H @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 1996,2017 */ +/* Contributors Listed Below - COPYRIGHT 1996,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -37,8 +37,8 @@ */ #include <iipscr.h> -#include <iipbits.h> -#include <iipMopRegisterAccess.h> +#include <prdfBitString.H> +#include <prdfHomRegisterAccess.H> #include <prdfTrace.H> namespace PRDF @@ -80,6 +80,9 @@ class ScomRegister : public SCAN_COMM_REGISTER_CLASS iv_operationType( ACCESS_NONE ) {} + /** @brief Destructor. */ + virtual ~ScomRegister() = default; + /** * @brief Returns the pointer to bit string * @param i_type attention type @@ -201,7 +204,7 @@ class ScomRegister : public SCAN_COMM_REGISTER_CLASS * @return [SUCCESS|FAIL] */ uint32_t Access( BitString & bs, - MopRegisterAccess::Operation op )const; + RegisterAccess::Operation op )const; /** * @brief Returns rulechip pointer associated with the register diff --git a/src/usr/diag/prdf/common/framework/resolution/prdfCalloutMap.H b/src/usr/diag/prdf/common/framework/resolution/prdfCalloutMap.H index 7bedec637..269e432cf 100644 --- a/src/usr/diag/prdf/common/framework/resolution/prdfCalloutMap.H +++ b/src/usr/diag/prdf/common/framework/resolution/prdfCalloutMap.H @@ -195,7 +195,9 @@ PRDF_TARGET_TYPE_ALIAS( TYPE_MI, TARGETING::TYPE_MI ) PRDF_TARGET_TYPE_ALIAS( TYPE_DMI, TARGETING::TYPE_DMI ) PRDF_TARGET_TYPE_ALIAS( TYPE_MCC, TARGETING::TYPE_MCC ) PRDF_TARGET_TYPE_ALIAS( TYPE_OMIC, TARGETING::TYPE_OMIC ) +PRDF_TARGET_TYPE_ALIAS( TYPE_OMI, TARGETING::TYPE_OMI ) PRDF_TARGET_TYPE_ALIAS( TYPE_OCMB_CHIP, TARGETING::TYPE_OCMB_CHIP ) +PRDF_TARGET_TYPE_ALIAS( TYPE_MEM_PORT, TARGETING::TYPE_MEM_PORT ) PRDF_TARGET_TYPE_ALIAS( TYPE_MEMBUF, TARGETING::TYPE_MEMBUF ) PRDF_TARGET_TYPE_ALIAS( TYPE_L4, TARGETING::TYPE_L4 ) PRDF_TARGET_TYPE_ALIAS( TYPE_MBA, TARGETING::TYPE_MBA ) diff --git a/src/usr/diag/prdf/common/framework/resolution/prdfThresholdResolutions.H b/src/usr/diag/prdf/common/framework/resolution/prdfThresholdResolutions.H index e412460dc..b61699159 100755 --- a/src/usr/diag/prdf/common/framework/resolution/prdfThresholdResolutions.H +++ b/src/usr/diag/prdf/common/framework/resolution/prdfThresholdResolutions.H @@ -5,7 +5,9 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* COPYRIGHT International Business Machines Corp. 2003,2014 */ +/* Contributors Listed Below - COPYRIGHT 2003,2019 */ +/* [+] International Business Machines Corp. */ +/* */ /* */ /* Licensed under the Apache License, Version 2.0 (the "License"); */ /* you may not use this file except in compliance with the License. */ @@ -100,10 +102,11 @@ class ThresholdResolution : public MaskResolution enum TimeBase { - ONE_SEC = 1, - ONE_MIN = ONE_SEC * 60, - ONE_HOUR = ONE_MIN * 60, - ONE_DAY = ONE_HOUR * 24, + ONE_SEC = 1, + ONE_MIN = ONE_SEC * 60, + ONE_HOUR = ONE_MIN * 60, + TEN_HOURS = ONE_HOUR * 10, + ONE_DAY = ONE_HOUR * 24, NONE = 0xffffffff, }; diff --git a/src/usr/diag/prdf/common/framework/service/iipServiceDataCollector.h b/src/usr/diag/prdf/common/framework/service/iipServiceDataCollector.h index 704dddf70..e8cdb79a5 100755 --- a/src/usr/diag/prdf/common/framework/service/iipServiceDataCollector.h +++ b/src/usr/diag/prdf/common/framework/service/iipServiceDataCollector.h @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2012,2018 */ +/* Contributors Listed Below - COPYRIGHT 2012,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -628,6 +628,11 @@ public: void clearMruListGard(); /** + * @brief Iterates the MRU list and clears gard for any NVDIMM targets. + */ + void clearNvdimmMruListGard(); + + /** * @brief Iterates the MRU list and returns true if at least on target in * the list is set to be garded. * @return True if there is at least one target set to be garded. diff --git a/src/usr/diag/prdf/common/framework/service/prdfServiceDataCollector.C b/src/usr/diag/prdf/common/framework/service/prdfServiceDataCollector.C index d9681d66b..731102a26 100755 --- a/src/usr/diag/prdf/common/framework/service/prdfServiceDataCollector.C +++ b/src/usr/diag/prdf/common/framework/service/prdfServiceDataCollector.C @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2012,2015 */ +/* Contributors Listed Below - COPYRIGHT 2012,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -177,6 +177,88 @@ void ServiceDataCollector::clearMruListGard() //------------------------------------------------------------------------------ +void ServiceDataCollector::clearNvdimmMruListGard() +{ + #define PRDF_FUNC "[ServiceDataCollector::clearNvdimmMruListGard] " + + #ifdef CONFIG_NVDIMM + #ifdef __HOSTBOOT_MODULE + // Loop through the MRU list. + for ( auto & mru : xMruList ) + { + PRDcallout callout = mru.callout; + PRDcalloutData::MruType mruType = callout.getType(); + + if ( mruType == PRDcalloutData::TYPE_TARGET ) + { + TargetHandle_t trgt = callout.getTarget(); + + // If the callout target is an NVDIMM send a message to + // PHYP/Hostboot that a save/restore may work, and if we are at + // IPL, clear Gard on the NVDIMM. + if ( TYPE_DIMM == PlatServices::getTargetType(trgt) && + isNVDIMM(trgt) ) + { + // Send the message to PHYP/Hostboot if a predictive log + if ( queryServiceCall() ) + { + uint32_t l_rc = PlatServices::nvdimmNotifyProtChange( trgt, + NVDIMM::NVDIMM_RISKY_HW_ERROR ); + if ( SUCCESS != l_rc ) + { + PRDF_TRAC( PRDF_FUNC "nvdimmNotifyProtChange(0x%08x) " + "failed.", PlatServices::getHuid(trgt) ); + continue; + } + } + #ifndef __HOSTBOOT_RUNTIME + // IPL, clear Gard + mru.gardState = NO_GARD; + #endif + } + } + else if ( mruType == PRDcalloutData::TYPE_MEMMRU ) + { + MemoryMru memMru( callout.flatten() ); + TargetHandleList dimmList = memMru.getCalloutList(); + + for ( auto & dimm : dimmList ) + { + // If the callout target is an NVDIMM send a message to + // PHYP/Hostboot that a save/restore may work, and if we are at + // IPL, clear Gard on the NVDIMM. + if ( TYPE_DIMM == PlatServices::getTargetType(dimm) && + isNVDIMM(dimm) ) + { + // Send the message to PHYP/Hostboot if a predictive log + if ( queryServiceCall() ) + { + uint32_t l_rc = PlatServices::nvdimmNotifyProtChange( + dimm, NVDIMM::NVDIMM_RISKY_HW_ERROR ); + if ( SUCCESS != l_rc ) + { + PRDF_TRAC( PRDF_FUNC "nvdimmNotifyProtChange" + "(0x%08x) failed.", + PlatServices::getHuid(dimm) ); + continue; + } + } + #ifndef __HOSTBOOT_RUNTIME + // IPL, clear Gard + mru.gardState = NO_GARD; + #endif + } + } + } + } + #endif // __HOSTBOOT_MODULE + #endif // CONFIG_NVDIMM + + #undef PRDF_FUNC +} + +//------------------------------------------------------------------------------ + bool ServiceDataCollector::isGardRequested() { bool gardRecordExit = false; diff --git a/src/usr/diag/prdf/common/iipconst.h b/src/usr/diag/prdf/common/iipconst.h index 07c5ded16..5c71cf8f7 100755 --- a/src/usr/diag/prdf/common/iipconst.h +++ b/src/usr/diag/prdf/common/iipconst.h @@ -65,12 +65,13 @@ namespace PRDF /* Constants */ /*--------------------------------------------------------------------*/ +// Return code constants #ifndef SUCCESS -#define SUCCESS 0 +static const int32_t SUCCESS = 0; #endif #ifndef FAIL -#define FAIL -1 +static const int32_t FAIL = -1; #endif enum DOMAIN_ID diff --git a/src/usr/diag/prdf/common/plat/axone/axone_mc.rule b/src/usr/diag/prdf/common/plat/axone/axone_mc.rule index 4f63011fc..f23fee7d2 100644 --- a/src/usr/diag/prdf/common/plat/axone/axone_mc.rule +++ b/src/usr/diag/prdf/common/plat/axone/axone_mc.rule @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2018 +# Contributors Listed Below - COPYRIGHT 2018,2019 # [+] International Business Machines Corp. # # @@ -141,39 +141,39 @@ chip axone_mc }; ############################################################################ - # P9 MC target MCBISTFIR + # P9 MC target MCMISCFIR ############################################################################ - register MCBISTFIR + register MCMISCFIR { - name "P9 MC target MCBISTFIR"; + name "P9 MC target MCMISCFIR"; scomaddr 0x07012300; reset (&, 0x07012301); mask (|, 0x07012305); capture group default; }; - register MCBISTFIR_MASK + register MCMISCFIR_MASK { - name "P9 MC target MCBISTFIR MASK"; + name "P9 MC target MCMISCFIR MASK"; scomaddr 0x07012303; capture group default; }; - register MCBISTFIR_ACT0 + register MCMISCFIR_ACT0 { - name "P9 MC target MCBISTFIR ACT0"; + name "P9 MC target MCMISCFIR ACT0"; scomaddr 0x07012306; capture group default; - capture req nonzero("MCBISTFIR"); + capture req nonzero("MCMISCFIR"); }; - register MCBISTFIR_ACT1 + register MCMISCFIR_ACT1 { - name "P9 MC target MCBISTFIR ACT1"; + name "P9 MC target MCMISCFIR ACT1"; scomaddr 0x07012307; capture group default; - capture req nonzero("MCBISTFIR"); + capture req nonzero("MCMISCFIR"); }; # Include registers not defined by the xml @@ -253,9 +253,9 @@ group gMC_CHIPLET_FIR attntype CHECK_STOP, RECOVERABLE (rMC_CHIPLET_FIR, bit(11)) ? analyzeConnectedMCC3; /** MC_CHIPLET_FIR[12] - * Attention from MCBISTFIR + * Attention from MCMISCFIR */ - (rMC_CHIPLET_FIR, bit(12)) ? analyzeMCBISTFIR; + (rMC_CHIPLET_FIR, bit(12)) ? analyzeMCMISCFIR; /** MC_CHIPLET_FIR[13] * Attention from IOOMIFIR 0 @@ -358,9 +358,9 @@ group gMC_CHIPLET_UCS_FIR attntype UNIT_CS (rMC_CHIPLET_UCS_FIR, bit(8)) ? analyzeConnectedMCC3; /** MC_CHIPLET_UCS_FIR[9] - * Attention from MCBISTFIR + * Attention from MCMISCFIR */ - (rMC_CHIPLET_UCS_FIR, bit(9)) ? analyzeMCBISTFIR; + (rMC_CHIPLET_UCS_FIR, bit(9)) ? analyzeMCMISCFIR; /** MC_CHIPLET_UCS_FIR[10] * Attention from IOOMIFIR 0 @@ -448,9 +448,9 @@ group gMC_CHIPLET_HA_FIR attntype HOST_ATTN (rMC_CHIPLET_HA_FIR, bit(8)) ? analyzeConnectedMCC3; /** MC_CHIPLET_HA_FIR[9] - * Attention from MCBISTFIR + * Attention from MCMISCFIR */ - (rMC_CHIPLET_HA_FIR, bit(9)) ? analyzeMCBISTFIR; + (rMC_CHIPLET_HA_FIR, bit(9)) ? analyzeMCMISCFIR; }; @@ -563,94 +563,94 @@ group gMC_LFIR }; ################################################################################ -# P9 MC target MCBISTFIR +# P9 MC target MCMISCFIR ################################################################################ -rule rMCBISTFIR +rule rMCMISCFIR { CHECK_STOP: - MCBISTFIR & ~MCBISTFIR_MASK & ~MCBISTFIR_ACT0 & ~MCBISTFIR_ACT1; + MCMISCFIR & ~MCMISCFIR_MASK & ~MCMISCFIR_ACT0 & ~MCMISCFIR_ACT1; RECOVERABLE: - MCBISTFIR & ~MCBISTFIR_MASK & ~MCBISTFIR_ACT0 & MCBISTFIR_ACT1; + MCMISCFIR & ~MCMISCFIR_MASK & ~MCMISCFIR_ACT0 & MCMISCFIR_ACT1; HOST_ATTN: - MCBISTFIR & ~MCBISTFIR_MASK & MCBISTFIR_ACT0 & ~MCBISTFIR_ACT1; + MCMISCFIR & ~MCMISCFIR_MASK & MCMISCFIR_ACT0 & ~MCMISCFIR_ACT1; UNIT_CS: - MCBISTFIR & ~MCBISTFIR_MASK & MCBISTFIR_ACT0 & MCBISTFIR_ACT1; + MCMISCFIR & ~MCMISCFIR_MASK & MCMISCFIR_ACT0 & MCMISCFIR_ACT1; }; -group gMCBISTFIR +group gMCMISCFIR filter singlebit, cs_root_cause { - /** MCBISTFIR[0] + /** MCMISCFIR[0] * WAT debug bus attn */ - (rMCBISTFIR, bit(0)) ? defaultMaskedError; + (rMCMISCFIR, bit(0)) ? defaultMaskedError; - /** MCBISTFIR[1] + /** MCMISCFIR[1] * WAT debug register parity error */ - (rMCBISTFIR, bit(1)) ? defaultMaskedError; + (rMCMISCFIR, bit(1)) ? defaultMaskedError; - /** MCBISTFIR[2] + /** MCMISCFIR[2] * SCOM recoverable register parity error */ - (rMCBISTFIR, bit(2)) ? defaultMaskedError; + (rMCMISCFIR, bit(2)) ? self_th_1; - /** MCBISTFIR[3] + /** MCMISCFIR[3] * Spare */ - (rMCBISTFIR, bit(3)) ? defaultMaskedError; + (rMCMISCFIR, bit(3)) ? defaultMaskedError; - /** MCBISTFIR[4] + /** MCMISCFIR[4] * Chan 0A application interrupt */ - (rMCBISTFIR, bit(4)) ? defaultMaskedError; + (rMCMISCFIR, bit(4)) ? defaultMaskedError; - /** MCBISTFIR[5] + /** MCMISCFIR[5] * Chan 0B application interrupt */ - (rMCBISTFIR, bit(5)) ? defaultMaskedError; + (rMCMISCFIR, bit(5)) ? defaultMaskedError; - /** MCBISTFIR[6] + /** MCMISCFIR[6] * Chan 1A application interrupt */ - (rMCBISTFIR, bit(6)) ? defaultMaskedError; + (rMCMISCFIR, bit(6)) ? defaultMaskedError; - /** MCBISTFIR[7] + /** MCMISCFIR[7] * Chan 1B application interrupt */ - (rMCBISTFIR, bit(7)) ? defaultMaskedError; + (rMCMISCFIR, bit(7)) ? defaultMaskedError; - /** MCBISTFIR[8] + /** MCMISCFIR[8] * Chan 2A application interrupt */ - (rMCBISTFIR, bit(8)) ? defaultMaskedError; + (rMCMISCFIR, bit(8)) ? defaultMaskedError; - /** MCBISTFIR[9] + /** MCMISCFIR[9] * Chan 2B application interrupt */ - (rMCBISTFIR, bit(9)) ? defaultMaskedError; + (rMCMISCFIR, bit(9)) ? defaultMaskedError; - /** MCBISTFIR[10] + /** MCMISCFIR[10] * Chan 3A application interrupt */ - (rMCBISTFIR, bit(10)) ? defaultMaskedError; + (rMCMISCFIR, bit(10)) ? defaultMaskedError; - /** MCBISTFIR[11] + /** MCMISCFIR[11] * Chan 3B application interrupt */ - (rMCBISTFIR, bit(11)) ? defaultMaskedError; + (rMCMISCFIR, bit(11)) ? defaultMaskedError; - /** MCBISTFIR[12] + /** MCMISCFIR[12] * Internal SCOM error */ - (rMCBISTFIR, bit(12)) ? defaultMaskedError; + (rMCMISCFIR, bit(12)) ? defaultMaskedError; - /** MCBISTFIR[13] + /** MCMISCFIR[13] * Internal SCOM error clone */ - (rMCBISTFIR, bit(13)) ? defaultMaskedError; + (rMCMISCFIR, bit(13)) ? defaultMaskedError; }; diff --git a/src/usr/diag/prdf/common/plat/axone/axone_mc_actions.rule b/src/usr/diag/prdf/common/plat/axone/axone_mc_actions.rule index aab2297ef..7c639bf5e 100644 --- a/src/usr/diag/prdf/common/plat/axone/axone_mc_actions.rule +++ b/src/usr/diag/prdf/common/plat/axone/axone_mc_actions.rule @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2017,2018 +# Contributors Listed Below - COPYRIGHT 2017,2019 # [+] International Business Machines Corp. # # @@ -28,7 +28,7 @@ ############################################################################### actionclass analyzeMC_LFIR { analyze(gMC_LFIR); }; -actionclass analyzeMCBISTFIR { analyze(gMCBISTFIR); }; +actionclass analyzeMCMISCFIR { analyze(gMCMISCFIR); }; ############################################################################### # Analyze connected diff --git a/src/usr/diag/prdf/common/plat/axone/axone_mc_regs.rule b/src/usr/diag/prdf/common/plat/axone/axone_mc_regs.rule new file mode 100644 index 000000000..150e6895a --- /dev/null +++ b/src/usr/diag/prdf/common/plat/axone/axone_mc_regs.rule @@ -0,0 +1,47 @@ +# IBM_PROLOG_BEGIN_TAG +# This is an automatically generated prolog. +# +# $Source: src/usr/diag/prdf/common/plat/axone/axone_mc_regs.rule $ +# +# OpenPOWER HostBoot Project +# +# Contributors Listed Below - COPYRIGHT 2019 +# [+] International Business Machines Corp. +# +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# IBM_PROLOG_END_TAG + +################################################################################ +# Additional registers for MC target, not defined in XML +################################################################################ + + ############################################################################ + # PCB Slave Error Regs + ############################################################################ + + register MC_ERROR_REG + { + name "MC PCB Slave error reg"; + scomaddr 0x070F001F; + capture group PllFIRs; + }; + + register MC_CONFIG_REG + { + name "MC PCB Slave config reg"; + scomaddr 0x070F001E; + capture group PllFIRs; + }; + diff --git a/src/usr/diag/prdf/common/plat/axone/axone_mcc.rule b/src/usr/diag/prdf/common/plat/axone/axone_mcc.rule index bf632abbb..31f663c77 100644 --- a/src/usr/diag/prdf/common/plat/axone/axone_mcc.rule +++ b/src/usr/diag/prdf/common/plat/axone/axone_mcc.rule @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2018 +# Contributors Listed Below - COPYRIGHT 2018,2019 # [+] International Business Machines Corp. # # @@ -180,22 +180,22 @@ rule rDSTLFIR group gDSTLFIR filter singlebit, - cs_root_cause + cs_root_cause(0,4) { /** DSTLFIR[0] * AFU initiated Checkstop on Subchannel A */ - (rDSTLFIR, bit(0)) ? defaultMaskedError; + (rDSTLFIR, bit(0)) ? analyze_ocmb_chnl0_UERE; /** DSTLFIR[1] * AFU initiated Recoverable Attn on Subchannel A */ - (rDSTLFIR, bit(1)) ? defaultMaskedError; + (rDSTLFIR, bit(1)) ? analyze_ocmb_chnl0; /** DSTLFIR[2] * AFU initiated Special Attn on Subchannel A */ - (rDSTLFIR, bit(2)) ? defaultMaskedError; + (rDSTLFIR, bit(2)) ? analyze_ocmb_chnl0; /** DSTLFIR[3] * AFU initiated Application Interrupt Attn on Subchannel A @@ -205,17 +205,17 @@ group gDSTLFIR /** DSTLFIR[4] * AFU initiated Checkstop on Subchannel B */ - (rDSTLFIR, bit(4)) ? defaultMaskedError; + (rDSTLFIR, bit(4)) ? analyze_ocmb_chnl1_UERE; /** DSTLFIR[5] * AFU initiated Recoverable Attn on Subchannel B */ - (rDSTLFIR, bit(5)) ? defaultMaskedError; + (rDSTLFIR, bit(5)) ? analyze_ocmb_chnl1; /** DSTLFIR[6] * AFU initiated Special Attn on Subchannel B */ - (rDSTLFIR, bit(6)) ? defaultMaskedError; + (rDSTLFIR, bit(6)) ? analyze_ocmb_chnl1; /** DSTLFIR[7] * AFU initiated Application Interrupt Attn on Subchannel B @@ -225,52 +225,52 @@ group gDSTLFIR /** DSTLFIR[8] * Async crossing parity error */ - (rDSTLFIR, bit(8)) ? defaultMaskedError; + (rDSTLFIR, bit(8)) ? self_th_1; /** DSTLFIR[9] * Async crossing sequence error */ - (rDSTLFIR, bit(9)) ? defaultMaskedError; + (rDSTLFIR, bit(9)) ? self_th_1; /** DSTLFIR[10] * Config reg recoverable parity error */ - (rDSTLFIR, bit(10)) ? defaultMaskedError; + (rDSTLFIR, bit(10)) ? self_th_1; /** DSTLFIR[11] * Config reg fatal parity error */ - (rDSTLFIR, bit(11)) ? defaultMaskedError; + (rDSTLFIR, bit(11)) ? self_th_1; /** DSTLFIR[12] * Subchannel A counter error */ - (rDSTLFIR, bit(12)) ? defaultMaskedError; + (rDSTLFIR, bit(12)) ? chnl0_omi_bus_th_1; /** DSTLFIR[13] * Subchannel B counter error */ - (rDSTLFIR, bit(13)) ? defaultMaskedError; + (rDSTLFIR, bit(13)) ? chnl1_omi_bus_th_1; /** DSTLFIR[14] * Subchannel A timeout error */ - (rDSTLFIR, bit(14)) ? defaultMaskedError; + (rDSTLFIR, bit(14)) ? chnl0_omi_bus_th_32_perDay; /** DSTLFIR[15] * Subchannel B timeout error */ - (rDSTLFIR, bit(15)) ? defaultMaskedError; + (rDSTLFIR, bit(15)) ? chnl1_omi_bus_th_32_perDay; /** DSTLFIR[16] * Subchannel A buffer overuse error */ - (rDSTLFIR, bit(16)) ? defaultMaskedError; + (rDSTLFIR, bit(16)) ? chnl0_ocmb_th_1; /** DSTLFIR[17] * Subchannel B buffer overuse error */ - (rDSTLFIR, bit(17)) ? defaultMaskedError; + (rDSTLFIR, bit(17)) ? chnl1_ocmb_th_1; /** DSTLFIR[18] * Subchannel A DL link down @@ -293,14 +293,29 @@ group gDSTLFIR (rDSTLFIR, bit(21)) ? defaultMaskedError; /** DSTLFIR[22] - * Internal SCOM error + * DSTLFIR channel timeout on subch A */ - (rDSTLFIR, bit(22)) ? defaultMaskedError; + (rDSTLFIR, bit(22)) ? chnl0_omi_bus_th_1; /** DSTLFIR[23] - * Internal SCOM error clone + * DSTLFIR channel timeout on subch B + */ + (rDSTLFIR, bit(23)) ? chnl1_omi_bus_th_1; + + /** DSTLFIR[24:25] + * spare + */ + (rDSTLFIR, bit(24|25)) ? defaultMaskedError; + + /** DSTLFIR[26] + * Internal SCOM Error + */ + (rDSTLFIR, bit(26)) ? defaultMaskedError; + + /** DSTLFIR[27] + * Internal SCOM Error Clone */ - (rDSTLFIR, bit(23)) ? defaultMaskedError; + (rDSTLFIR, bit(27)) ? defaultMaskedError; }; @@ -327,22 +342,22 @@ group gUSTLFIR /** USTLFIR[0] * Chan A unexpected data error */ - (rUSTLFIR, bit(0)) ? defaultMaskedError; + (rUSTLFIR, bit(0)) ? chnl0_ocmb_th_1; /** USTLFIR[1] * Chan B unexpected data error */ - (rUSTLFIR, bit(1)) ? defaultMaskedError; + (rUSTLFIR, bit(1)) ? chnl1_ocmb_th_1; /** USTLFIR[2] * Chan A invalid template error */ - (rUSTLFIR, bit(2)) ? defaultMaskedError; + (rUSTLFIR, bit(2)) ? chnl0_ocmb_th_1; /** USTLFIR[3] * Chan B invalid template error */ - (rUSTLFIR, bit(3)) ? defaultMaskedError; + (rUSTLFIR, bit(3)) ? chnl1_ocmb_th_1; /** USTLFIR[4] * Chan A half speed mode @@ -357,12 +372,12 @@ group gUSTLFIR /** USTLFIR[6] * WDF buffer CE */ - (rUSTLFIR, bit(6)) ? defaultMaskedError; + (rUSTLFIR, bit(6)) ? self_th_32perDay; /** USTLFIR[7] * WDF buffer UE */ - (rUSTLFIR, bit(7)) ? defaultMaskedError; + (rUSTLFIR, bit(7)) ? self_th_1; /** USTLFIR[8] * WDF buffer SUE @@ -372,32 +387,32 @@ group gUSTLFIR /** USTLFIR[9] * WDF buffer overrun */ - (rUSTLFIR, bit(9)) ? defaultMaskedError; + (rUSTLFIR, bit(9)) ? self_th_1; /** USTLFIR[10] * WDF tag parity error */ - (rUSTLFIR, bit(10)) ? defaultMaskedError; + (rUSTLFIR, bit(10)) ? self_th_1; /** USTLFIR[11] * WDF scom sequencer error */ - (rUSTLFIR, bit(11)) ? defaultMaskedError; + (rUSTLFIR, bit(11)) ? self_th_1; /** USTLFIR[12] * WDF pwctl sequencer error */ - (rUSTLFIR, bit(12)) ? defaultMaskedError; + (rUSTLFIR, bit(12)) ? self_th_1; /** USTLFIR[13] * WDF misc_reg parity error */ - (rUSTLFIR, bit(13)) ? defaultMaskedError; + (rUSTLFIR, bit(13)) ? self_th_1; /** USTLFIR[14] * WDF MCA async error */ - (rUSTLFIR, bit(14)) ? defaultMaskedError; + (rUSTLFIR, bit(14)) ? self_th_1; /** USTLFIR[15] * WDF Data Syndrome NE0 @@ -407,32 +422,32 @@ group gUSTLFIR /** USTLFIR[16] * WDF CMT parity error */ - (rUSTLFIR, bit(16)) ? defaultMaskedError; + (rUSTLFIR, bit(16)) ? self_th_1; /** USTLFIR[17] - * TBD + * spare */ (rUSTLFIR, bit(17)) ? defaultMaskedError; /** USTLFIR[18] - * TBD + * spare */ (rUSTLFIR, bit(18)) ? defaultMaskedError; /** USTLFIR[19] - * TBD + * Read Buffers overflowed/underflowed */ - (rUSTLFIR, bit(19)) ? defaultMaskedError; + (rUSTLFIR, bit(19)) ? all_ocmb_and_mcc_th_1; /** USTLFIR[20] * WRT Buffer CE */ - (rUSTLFIR, bit(20)) ? defaultMaskedError; + (rUSTLFIR, bit(20)) ? parent_proc_th_32perDay; /** USTLFIR[21] * WRT Buffer UE */ - (rUSTLFIR, bit(21)) ? defaultMaskedError; + (rUSTLFIR, bit(21)) ? parent_proc_th_1; /** USTLFIR[22] * WRT Buffer SUE @@ -442,12 +457,12 @@ group gUSTLFIR /** USTLFIR[23] * WRT scom sequencer error */ - (rUSTLFIR, bit(23)) ? defaultMaskedError; + (rUSTLFIR, bit(23)) ? self_th_1; /** USTLFIR[24] * WRT misc_reg parity error */ - (rUSTLFIR, bit(24)) ? defaultMaskedError; + (rUSTLFIR, bit(24)) ? self_th_1; /** USTLFIR[25:26] * WRT error information spares @@ -457,22 +472,22 @@ group gUSTLFIR /** USTLFIR[27] * Chan A fail response checkstop */ - (rUSTLFIR, bit(27)) ? defaultMaskedError; + (rUSTLFIR, bit(27)) ? chnl0_ocmb_th_1; /** USTLFIR[28] * Chan B fail response checkstop */ - (rUSTLFIR, bit(28)) ? defaultMaskedError; + (rUSTLFIR, bit(28)) ? chnl1_ocmb_th_1; /** USTLFIR[29] * Chan A fail response recoverable */ - (rUSTLFIR, bit(29)) ? defaultMaskedError; + (rUSTLFIR, bit(29)) ? threshold_and_mask_chnl0_ocmb_th_1; /** USTLFIR[30] * Chan B fail response recoverable */ - (rUSTLFIR, bit(30)) ? defaultMaskedError; + (rUSTLFIR, bit(30)) ? threshold_and_mask_chnl1_ocmb_th_1; /** USTLFIR[31] * Chan A lol drop checkstop @@ -487,72 +502,72 @@ group gUSTLFIR /** USTLFIR[33] * Chan A lol drop recoverable */ - (rUSTLFIR, bit(33)) ? defaultMaskedError; + (rUSTLFIR, bit(33)) ? chnl0_ocmb_H_omi_L_th_1; /** USTLFIR[34] * Chan B lol drop recoverable */ - (rUSTLFIR, bit(34)) ? defaultMaskedError; + (rUSTLFIR, bit(34)) ? chnl1_ocmb_H_omi_L_th_1; /** USTLFIR[35] * Chan A flit parity error */ - (rUSTLFIR, bit(35)) ? defaultMaskedError; + (rUSTLFIR, bit(35)) ? chnl0_omi_th_1; /** USTLFIR[36] * Chan B flit parity error */ - (rUSTLFIR, bit(36)) ? defaultMaskedError; + (rUSTLFIR, bit(36)) ? chnl1_omi_th_1; /** USTLFIR[37] * Chan A fatal parity error */ - (rUSTLFIR, bit(37)) ? defaultMaskedError; + (rUSTLFIR, bit(37)) ? chnl0_omi_th_1; /** USTLFIR[38] * Chan B fatal parity error */ - (rUSTLFIR, bit(38)) ? defaultMaskedError; + (rUSTLFIR, bit(38)) ? chnl1_omi_th_1; /** USTLFIR[39] * Chan A more than 2 data flits for template 9 */ - (rUSTLFIR, bit(39)) ? defaultMaskedError; + (rUSTLFIR, bit(39)) ? chnl0_ocmb_th_1; /** USTLFIR[40] * Chan B more than 2 data flits for template 9 */ - (rUSTLFIR, bit(40)) ? defaultMaskedError; + (rUSTLFIR, bit(40)) ? chnl1_ocmb_th_1; /** USTLFIR[41] * Chan A excess bad data bits */ - (rUSTLFIR, bit(41)) ? defaultMaskedError; + (rUSTLFIR, bit(41)) ? chnl0_ocmb_th_1; /** USTLFIR[42] * Chan B excess bad data bits */ - (rUSTLFIR, bit(42)) ? defaultMaskedError; + (rUSTLFIR, bit(42)) ? chnl1_ocmb_th_1; /** USTLFIR[43] * Chan A memory read data returned in template 0 */ - (rUSTLFIR, bit(43)) ? defaultMaskedError; + (rUSTLFIR, bit(43)) ? chnl0_ocmb_th_1; /** USTLFIR[44] * Chan B memory read data returned in template 0 */ - (rUSTLFIR, bit(44)) ? defaultMaskedError; + (rUSTLFIR, bit(44)) ? chnl1_ocmb_th_1; /** USTLFIR[45] * Chan A MMIO in lol mode */ - (rUSTLFIR, bit(45)) ? defaultMaskedError; + (rUSTLFIR, bit(45)) ? chnl0_omi_th_1; /** USTLFIR[46] * Chan B MMIO in lol mode */ - (rUSTLFIR, bit(46)) ? defaultMaskedError; + (rUSTLFIR, bit(46)) ? chnl1_omi_th_1; /** USTLFIR[47] * Chan A bad data @@ -567,62 +582,62 @@ group gUSTLFIR /** USTLFIR[49] * Chan A excess data error */ - (rUSTLFIR, bit(49)) ? defaultMaskedError; + (rUSTLFIR, bit(49)) ? chnl0_ocmb_th_1; /** USTLFIR[50] * Chan B excess data error */ - (rUSTLFIR, bit(50)) ? defaultMaskedError; + (rUSTLFIR, bit(50)) ? chnl1_ocmb_th_1; /** USTLFIR[51] * Chan A Bad CRC data not valid error */ - (rUSTLFIR, bit(51)) ? defaultMaskedError; + (rUSTLFIR, bit(51)) ? chnl0_omi_th_1; /** USTLFIR[52] * Chan B Bad CRC data not valid error */ - (rUSTLFIR, bit(52)) ? defaultMaskedError; + (rUSTLFIR, bit(52)) ? chnl1_omi_th_1; /** USTLFIR[53] * Chan A FIFO overflow error */ - (rUSTLFIR, bit(53)) ? defaultMaskedError; + (rUSTLFIR, bit(53)) ? chnl0_omi_th_1; /** USTLFIR[54] * Chan B FIFO overflow error */ - (rUSTLFIR, bit(54)) ? defaultMaskedError; + (rUSTLFIR, bit(54)) ? chnl1_omi_th_1; /** USTLFIR[55] * Chan A invalid cmd error */ - (rUSTLFIR, bit(55)) ? defaultMaskedError; + (rUSTLFIR, bit(55)) ? chnl0_ocmb_th_1; /** USTLFIR[56] * Chan B invalid cmd error */ - (rUSTLFIR, bit(56)) ? defaultMaskedError; + (rUSTLFIR, bit(56)) ? chnl1_ocmb_th_1; /** USTLFIR[57] * Fatal reg parity error */ - (rUSTLFIR, bit(57)) ? defaultMaskedError; + (rUSTLFIR, bit(57)) ? self_th_1; /** USTLFIR[58] * Recoverable reg parity error */ - (rUSTLFIR, bit(58)) ? defaultMaskedError; + (rUSTLFIR, bit(58)) ? self_th_1; /** USTLFIR[59] * Chan A invalid DL DP combo */ - (rUSTLFIR, bit(59)) ? defaultMaskedError; + (rUSTLFIR, bit(59)) ? chnl0_ocmb_th_1; /** USTLFIR[60] * Chan B invalid DL DP combo */ - (rUSTLFIR, bit(60)) ? defaultMaskedError; + (rUSTLFIR, bit(60)) ? chnl1_ocmb_th_1; /** USTLFIR[61] * spare diff --git a/src/usr/diag/prdf/common/plat/axone/axone_mcc_actions.rule b/src/usr/diag/prdf/common/plat/axone/axone_mcc_actions.rule index 38edbaaea..e34035165 100644 --- a/src/usr/diag/prdf/common/plat/axone/axone_mcc_actions.rule +++ b/src/usr/diag/prdf/common/plat/axone/axone_mcc_actions.rule @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2018 +# Contributors Listed Below - COPYRIGHT 2018,2019 # [+] International Business Machines Corp. # # @@ -24,9 +24,163 @@ # IBM_PROLOG_END_TAG ################################################################################ +# Callouts +################################################################################ + +actionclass chnl0_omi +{ + callout(connected(TYPE_OMI,0), MRU_MED); +}; + +actionclass chnl1_omi +{ + callout(connected(TYPE_OMI,1), MRU_MED); +}; + +actionclass chnl0_omi_L +{ + callout(connected(TYPE_OMI,0), MRU_LOW); +}; + +actionclass chnl1_omi_L +{ + callout(connected(TYPE_OMI,1), MRU_LOW); +}; + +actionclass chnl0_ocmb +{ + callout(connected(TYPE_OCMB_CHIP,0), MRU_MED); +}; + +actionclass chnl1_ocmb +{ + callout(connected(TYPE_OCMB_CHIP,1), MRU_MED); +}; + +actionclass chnl0_omi_bus +{ + funccall("omiParentCalloutBusInterfacePlugin_0"); +}; + +actionclass chnl1_omi_bus +{ + funccall("omiParentCalloutBusInterfacePlugin_1"); +}; + +actionclass chnl0_omi_bus_th_1 +{ + chnl0_omi_bus; + threshold1; +}; + +actionclass chnl1_omi_bus_th_1 +{ + chnl1_omi_bus; + threshold1; +}; + +actionclass chnl0_omi_bus_th_32_perDay +{ + chnl0_omi_bus; + threshold32pday; +}; + +actionclass chnl1_omi_bus_th_32_perDay +{ + chnl1_omi_bus; + threshold32pday; +}; + +actionclass chnl0_omi_th_1 +{ + chnl0_omi; + threshold1; +}; + +actionclass chnl1_omi_th_1 +{ + chnl1_omi; + threshold1; +}; + +actionclass chnl0_ocmb_th_1 +{ + chnl0_ocmb; + threshold1; +}; + +actionclass chnl1_ocmb_th_1 +{ + chnl1_ocmb; + threshold1; +}; + +actionclass all_ocmb_and_mcc_th_1 +{ + chnl0_ocmb; + chnl1_ocmb; + calloutSelfMed; + threshold1; +}; + +actionclass chnl0_ocmb_H_omi_L_th_1 +{ + chnl0_ocmb; + chnl0_omi_L; + threshold1; +}; + +actionclass chnl1_ocmb_H_omi_L_th_1 +{ + chnl1_ocmb; + chnl1_omi_L; + threshold1; +}; + +actionclass threshold_and_mask_chnl0_ocmb_th_1 +{ + threshold_and_mask; + chnl0_ocmb; + threshold1; +}; + +actionclass threshold_and_mask_chnl1_ocmb_th_1 +{ + threshold_and_mask; + chnl1_ocmb; + threshold1; +}; + +################################################################################ # Analyze groups ################################################################################ actionclass analyzeDSTLFIR { analyze(gDSTLFIR); }; actionclass analyzeUSTLFIR { analyze(gUSTLFIR); }; +################################################################################ +# Analyze connected +################################################################################ + +actionclass analyze_ocmb_chnl0 +{ + try( funccall("checkOcmb_0"), analyze(connected(TYPE_OCMB_CHIP, 0)) ); +}; + +actionclass analyze_ocmb_chnl1 +{ + try( funccall("checkOcmb_1"), analyze(connected(TYPE_OCMB_CHIP, 1)) ); +}; + +actionclass analyze_ocmb_chnl0_UERE +{ + SueSource; + analyze_ocmb_chnl0; +}; + +actionclass analyze_ocmb_chnl1_UERE +{ + SueSource; + analyze_ocmb_chnl1; +}; + diff --git a/src/usr/diag/prdf/common/plat/axone/axone_mcc_regs.rule b/src/usr/diag/prdf/common/plat/axone/axone_mcc_regs.rule new file mode 100644 index 000000000..001a54e5c --- /dev/null +++ b/src/usr/diag/prdf/common/plat/axone/axone_mcc_regs.rule @@ -0,0 +1,80 @@ +# IBM_PROLOG_BEGIN_TAG +# This is an automatically generated prolog. +# +# $Source: src/usr/diag/prdf/common/plat/axone/axone_mcc_regs.rule $ +# +# OpenPOWER HostBoot Project +# +# Contributors Listed Below - COPYRIGHT 2019 +# [+] International Business Machines Corp. +# +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# IBM_PROLOG_END_TAG + + +############################################################################### +# Additional registers for mcc, not defined in XML +############################################################################### + + ########################################################################### + # P9 Axone target Channel Fail Config registers + ########################################################################### + + register DSTLCFG2 + { + name "P9 Axone DSTL Error Injection Register"; + scomaddr 0x0701090E; + capture group default; + }; + + register USTLFAILMASK + { + name "P9 Axone USTL Fail Response Channel Fail Mask"; + scomaddr 0x07010A13; + capture group default; + }; + + ########################################################################### + # P9 Axone target DSTLFIR + ########################################################################### + + register DSTLFIR_AND + { + name "P9 MCC target DSTLFIR atomic AND"; + scomaddr 0x07010901; + capture group never; + access write_only; + }; + + register DSTLFIR_MASK_OR + { + name "P9 MCC target DSTLFIR MASK atomic OR"; + scomaddr 0x07010905; + capture group never; + access write_only; + }; + + ########################################################################### + # P9 Axone target USTLFIR + ########################################################################### + + register USTLFIR_MASK_OR + { + name "P9 MCC target USTLFIR MASK atomic OR"; + scomaddr 0x07010A05; + capture group never; + access write_only; + }; + diff --git a/src/usr/diag/prdf/common/plat/axone/axone_mi.rule b/src/usr/diag/prdf/common/plat/axone/axone_mi.rule index 078163819..56366a7f5 100644 --- a/src/usr/diag/prdf/common/plat/axone/axone_mi.rule +++ b/src/usr/diag/prdf/common/plat/axone/axone_mi.rule @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2018 +# Contributors Listed Below - COPYRIGHT 2018,2019 # [+] International Business Machines Corp. # # @@ -148,27 +148,27 @@ group gMCFIR /** MCFIR[0] * MC internal recoverable error */ - (rMCFIR, bit(0)) ? defaultMaskedError; + (rMCFIR, bit(0)) ? self_th_1; /** MCFIR[1] * MC internal non recoverable error */ - (rMCFIR, bit(1)) ? defaultMaskedError; + (rMCFIR, bit(1)) ? parent_proc_th_1; /** MCFIR[2] * Powerbus protocol error */ - (rMCFIR, bit(2)) ? defaultMaskedError; + (rMCFIR, bit(2)) ? level2_th_1; /** MCFIR[3] * Inband bar hit with incorrect ttype */ - (rMCFIR, bit(3)) ? defaultMaskedError; + (rMCFIR, bit(3)) ? level2_M_self_L_th_1; /** MCFIR[4] * Multiple bar */ - (rMCFIR, bit(4)) ? defaultMaskedError; + (rMCFIR, bit(4)) ? self_th_1; /** MCFIR[5] * PB write ECC syndrome NE0 @@ -183,7 +183,7 @@ group gMCFIR /** MCFIR[8] * Command list timeout */ - (rMCFIR, bit(8)) ? defaultMaskedError; + (rMCFIR, bit(8)) ? threshold_and_mask_level2; /** MCFIR[9:10] * reserved diff --git a/src/usr/diag/prdf/common/plat/axone/axone_mi_regs.rule b/src/usr/diag/prdf/common/plat/axone/axone_mi_regs.rule index 0e47e05a5..d9441d719 100644 --- a/src/usr/diag/prdf/common/plat/axone/axone_mi_regs.rule +++ b/src/usr/diag/prdf/common/plat/axone/axone_mi_regs.rule @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2018 +# Contributors Listed Below - COPYRIGHT 2018,2019 # [+] International Business Machines Corp. # # @@ -48,34 +48,66 @@ capture group default; }; - register MCFGP + register MCFGP0 { - name "MCFGP"; - scomaddr 0x501080A; + name "MCFGP0"; + scomaddr 0x0501080A; capture group default; capture group MirrorConfig; }; - register MCFGPA + register MCFGP1 { - name "MCFGPA"; + name "MCFGP1"; scomaddr 0x0501080B; capture group default; capture group MirrorConfig; }; - register MCFGPM + register MCFGP0A { - name "MCFGPM"; - scomaddr 0x501080C; + name "MCFGP0A"; + scomaddr 0x0501080E; capture group default; capture group MirrorConfig; }; - register MCFGPMA + register MCFGP1A { - name "MCFGPMA"; - scomaddr 0x0501080D; + name "MCFGP1A"; + scomaddr 0x0501080F; + capture group default; + capture group MirrorConfig; + }; + + register MCFGPM0 + { + name "MCFGPM0"; + scomaddr 0x5010820; + capture group default; + capture group MirrorConfig; + }; + + register MCFGPM0A + { + name "MCFGPM0A"; + scomaddr 0x05010821; + capture group default; + capture group MirrorConfig; + }; + + register MCFGPM1 + { + name "MCFGPM1"; + scomaddr 0x5010830; + capture group default; + capture group MirrorConfig; + }; + + register MCFGPM1A + { + name "MCFGPM1A"; + scomaddr 0x05010831; capture group default; capture group MirrorConfig; }; diff --git a/src/usr/diag/prdf/common/plat/axone/axone_npu.rule b/src/usr/diag/prdf/common/plat/axone/axone_npu.rule index ede5ef5cc..49c71d74a 100644 --- a/src/usr/diag/prdf/common/plat/axone/axone_npu.rule +++ b/src/usr/diag/prdf/common/plat/axone/axone_npu.rule @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2018 +# Contributors Listed Below - COPYRIGHT 2018,2019 # [+] International Business Machines Corp. # # @@ -214,7 +214,7 @@ rule rNPU0FIR group gNPU0FIR filter singlebit, - cs_root_cause(1,2,3,4,5,6,7,9,10,16,18,29,31,42,44) + cs_root_cause(1,2,3,4,5,6,7,9,10,16,18,19,25,29,31,40,42,44,45) { /** NPU0FIR[0] * NTL array CE @@ -354,7 +354,7 @@ group gNPU0FIR /** NPU0FIR[27] * Invalid access to secure memory attempted */ - (rNPU0FIR, bit(27)) ? defaultMaskedError; + (rNPU0FIR, bit(27)) ? self_th_1; /** NPU0FIR[28] * spare @@ -489,12 +489,12 @@ rule rNPU1FIR group gNPU1FIR filter singlebit, - cs_root_cause + cs_root_cause(0,2,4,6,8,10,13,14,15,20,22,25,27,29,31,32,33,34,35,37,39,40,41,42,47,49,51,53,55,57) { /** NPU1FIR[0] * NDL Brick0 stall */ - (rNPU1FIR, bit(0)) ? defaultMaskedError; + (rNPU1FIR, bit(0)) ? self_th_1; /** NPU1FIR[1] * NDL Brick0 nostall @@ -504,7 +504,7 @@ group gNPU1FIR /** NPU1FIR[2] * NDL Brick1 stall */ - (rNPU1FIR, bit(2)) ? defaultMaskedError; + (rNPU1FIR, bit(2)) ? self_th_1; /** NPU1FIR[3] * NDL Brick1 nostall @@ -514,7 +514,7 @@ group gNPU1FIR /** NPU1FIR[4] * NDL Brick2 stall */ - (rNPU1FIR, bit(4)) ? defaultMaskedError; + (rNPU1FIR, bit(4)) ? self_th_1; /** NPU1FIR[5] * NDL Brick2 nostall @@ -524,7 +524,7 @@ group gNPU1FIR /** NPU1FIR[6] * NDL Brick3 stall */ - (rNPU1FIR, bit(6)) ? defaultMaskedError; + (rNPU1FIR, bit(6)) ? self_th_1; /** NPU1FIR[7] * NDL Brick3 nostall @@ -534,7 +534,7 @@ group gNPU1FIR /** NPU1FIR[8] * NDL Brick4 stall */ - (rNPU1FIR, bit(8)) ? defaultMaskedError; + (rNPU1FIR, bit(8)) ? self_th_1; /** NPU1FIR[9] * NDL Brick4 nostall @@ -544,7 +544,7 @@ group gNPU1FIR /** NPU1FIR[10] * NDL Brick5 stall */ - (rNPU1FIR, bit(10)) ? defaultMaskedError; + (rNPU1FIR, bit(10)) ? self_th_1; /** NPU1FIR[11] * NDL Brick5 nostall @@ -554,22 +554,22 @@ group gNPU1FIR /** NPU1FIR[12] * MISC Register ring error (ie noack) */ - (rNPU1FIR, bit(12)) ? defaultMaskedError; + (rNPU1FIR, bit(12)) ? self_th_32perDay; /** NPU1FIR[13] - * MISC Parity error from ibr addr regi + * MISC Parity error on MISC Cntrl reg */ - (rNPU1FIR, bit(13)) ? defaultMaskedError; + (rNPU1FIR, bit(13)) ? self_th_1; /** NPU1FIR[14] * MISC Parity error on SCOM D/A addr reg */ - (rNPU1FIR, bit(14)) ? defaultMaskedError; + (rNPU1FIR, bit(14)) ? self_th_1; /** NPU1FIR[15] * MISC Parity error on MISC Cntrl reg */ - (rNPU1FIR, bit(15)) ? defaultMaskedError; + (rNPU1FIR, bit(15)) ? self_th_1; /** NPU1FIR[16] * Reserved @@ -594,7 +594,7 @@ group gNPU1FIR /** NPU1FIR[20] * ATS Effective Address hit multiple TCE */ - (rNPU1FIR, bit(20)) ? defaultMaskedError; + (rNPU1FIR, bit(20)) ? self_th_1; /** NPU1FIR[21] * ATS TCE Page access error @@ -604,72 +604,72 @@ group gNPU1FIR /** NPU1FIR[22] * ATS Timeout on TCE tree walk */ - (rNPU1FIR, bit(22)) ? defaultMaskedError; + (rNPU1FIR, bit(22)) ? self_th_1; /** NPU1FIR[23] * ATS Parity error on TCE cache dir array */ - (rNPU1FIR, bit(23)) ? defaultMaskedError; + (rNPU1FIR, bit(23)) ? self_th_32perDay; /** NPU1FIR[24] * ATS Parity error on TCE cache data array */ - (rNPU1FIR, bit(24)) ? defaultMaskedError; + (rNPU1FIR, bit(24)) ? self_th_32perDay; /** NPU1FIR[25] * ATS ECC UE on Effective Address array */ - (rNPU1FIR, bit(25)) ? defaultMaskedError; + (rNPU1FIR, bit(25)) ? self_th_1; /** NPU1FIR[26] * ATS ECC CE on Effective Address array */ - (rNPU1FIR, bit(26)) ? defaultMaskedError; + (rNPU1FIR, bit(26)) ? self_th_32perDay; /** NPU1FIR[27] * ATS ECC UE on TDRmem array */ - (rNPU1FIR, bit(27)) ? defaultMaskedError; + (rNPU1FIR, bit(27)) ? self_th_1; /** NPU1FIR[28] * ATS ECC CE on TDRmem array */ - (rNPU1FIR, bit(28)) ? defaultMaskedError; + (rNPU1FIR, bit(28)) ? self_th_32perDay; /** NPU1FIR[29] * ATS ECC UE on CQ CTL DMA Read */ - (rNPU1FIR, bit(29)) ? defaultMaskedError; + (rNPU1FIR, bit(29)) ? self_th_1; /** NPU1FIR[30] * ATS ECC CE on CQ CTL DMA Read */ - (rNPU1FIR, bit(30)) ? defaultMaskedError; + (rNPU1FIR, bit(30)) ? self_th_32perDay; /** NPU1FIR[31] * ATS Parity error on TVT entry */ - (rNPU1FIR, bit(31)) ? defaultMaskedError; + (rNPU1FIR, bit(31)) ? self_th_1; /** NPU1FIR[32] * ATS Parity err on IODA Address Reg */ - (rNPU1FIR, bit(32)) ? defaultMaskedError; + (rNPU1FIR, bit(32)) ? self_th_1; /** NPU1FIR[33] * ATS Parity error on ATS Control Register */ - (rNPU1FIR, bit(33)) ? defaultMaskedError; + (rNPU1FIR, bit(33)) ? self_th_1; /** NPU1FIR[34] - * ATS Parity error on ATS Timeout Control Register + * ATS Parity error on ATS reg */ - (rNPU1FIR, bit(34)) ? defaultMaskedError; + (rNPU1FIR, bit(34)) ? self_th_1; /** NPU1FIR[35] * ATS Invalid IODA Table Select entry */ - (rNPU1FIR, bit(35)) ? defaultMaskedError; + (rNPU1FIR, bit(35)) ? self_th_1; /** NPU1FIR[36] * Reserved @@ -679,7 +679,7 @@ group gNPU1FIR /** NPU1FIR[37] * Kill xlate epoch timeout */ - (rNPU1FIR, bit(37)) ? defaultMaskedError; + (rNPU1FIR, bit(37)) ? self_th_1; /** NPU1FIR[38] * PEE secure SMF not secure @@ -689,17 +689,32 @@ group gNPU1FIR /** NPU1FIR[39] * XSL in suspend mode when OTL sends cmd */ - (rNPU1FIR, bit(39)) ? defaultMaskedError; + (rNPU1FIR, bit(39)) ? self_th_1; + + /** NPU1FIR[40] + * Unsupported page size + */ + (rNPU1FIR, bit(40)) ? self_th_1; + + /** NPU1FIR[41] + * Unexpected XLATE release + */ + (rNPU1FIR, bit(41)) ? self_th_1; + + /** NPU1FIR[42] + * Kill XLATE done fail + */ + (rNPU1FIR, bit(42)) ? self_th_1; - /** NPU1FIR[40:46] + /** NPU1FIR[43:46] * Reserved */ - (rNPU1FIR, bit(40|41|42|43|44|45|46)) ? defaultMaskedError; + (rNPU1FIR, bit(43|44|45|46)) ? defaultMaskedError; /** NPU1FIR[47] * NDL Brick6 stall */ - (rNPU1FIR, bit(47)) ? defaultMaskedError; + (rNPU1FIR, bit(47)) ? self_th_1; /** NPU1FIR[48] * NDL Brick6 nostall @@ -709,7 +724,7 @@ group gNPU1FIR /** NPU1FIR[49] * NDL Brick7 stall */ - (rNPU1FIR, bit(49)) ? defaultMaskedError; + (rNPU1FIR, bit(49)) ? self_th_1; /** NPU1FIR[50] * NDL Brick7 nostall @@ -719,7 +734,7 @@ group gNPU1FIR /** NPU1FIR[51] * NDL Brick8 stall */ - (rNPU1FIR, bit(51)) ? defaultMaskedError; + (rNPU1FIR, bit(51)) ? self_th_1; /** NPU1FIR[52] * NDL Brick8 nostall @@ -729,7 +744,7 @@ group gNPU1FIR /** NPU1FIR[53] * NDL Brick9 stall */ - (rNPU1FIR, bit(53)) ? defaultMaskedError; + (rNPU1FIR, bit(53)) ? self_th_1; /** NPU1FIR[54] * NDL Brick9 nostall @@ -739,7 +754,7 @@ group gNPU1FIR /** NPU1FIR[55] * NDL Brick10 stall */ - (rNPU1FIR, bit(55)) ? defaultMaskedError; + (rNPU1FIR, bit(55)) ? self_th_1; /** NPU1FIR[56] * NDL Brick10 nostall @@ -749,7 +764,7 @@ group gNPU1FIR /** NPU1FIR[57] * NDL Brick11 stall */ - (rNPU1FIR, bit(57)) ? defaultMaskedError; + (rNPU1FIR, bit(57)) ? self_th_1; /** NPU1FIR[58] * NDL Brick11 nostall @@ -762,22 +777,22 @@ group gNPU1FIR (rNPU1FIR, bit(59)) ? defaultMaskedError; /** NPU1FIR[60] - * MISC SCOM ring 0 sat 0 signaled internal FSM err + * Misc SCOM ring 0 sat 0 signalled internal FSM error */ (rNPU1FIR, bit(60)) ? defaultMaskedError; /** NPU1FIR[61] - * MISC SCOM ring 0 sat 1 signaled internal FSM err + * Misc SCOM ring 0 sat 1 signalled internal FSM error */ (rNPU1FIR, bit(61)) ? defaultMaskedError; /** NPU1FIR[62] - * Scom Error + * scom error */ (rNPU1FIR, bit(62)) ? defaultMaskedError; /** NPU1FIR[63] - * Scom Error + * scom error */ (rNPU1FIR, bit(63)) ? defaultMaskedError; @@ -799,7 +814,7 @@ rule rNPU2FIR group gNPU2FIR filter singlebit, - cs_root_cause + cs_root_cause(4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,20,21,22,23,24,25,26,27,28,29,30,31,36,37,38,39,40,41,42,43,45,47,48,50,51,52) { /** NPU2FIR[0] * OTL Brick2 translation fault @@ -824,145 +839,145 @@ group gNPU2FIR /** NPU2FIR[4] * OTL TL credit ctr overflow */ - (rNPU2FIR, bit(4)) ? defaultMaskedError; + (rNPU2FIR, bit(4)) ? self_th_1; /** NPU2FIR[5] * OTL RX acTag invalid */ - (rNPU2FIR, bit(5)) ? defaultMaskedError; + (rNPU2FIR, bit(5)) ? self_th_1; /** NPU2FIR[6] * OTL RX acTag points to an invalid entry. */ - (rNPU2FIR, bit(6)) ? defaultMaskedError; + (rNPU2FIR, bit(6)) ? self_th_1; /** NPU2FIR[7] * OTL RX reserved opcode used. */ - (rNPU2FIR, bit(7)) ? defaultMaskedError; + (rNPU2FIR, bit(7)) ? self_th_1; /** NPU2FIR[8] * OTL RX rtn_tl_credit cmd outside slot0. */ - (rNPU2FIR, bit(8)) ? defaultMaskedError; + (rNPU2FIR, bit(8)) ? self_th_1; /** NPU2FIR[9] * OTL RX bad opcode and template combo */ - (rNPU2FIR, bit(9)) ? defaultMaskedError; + (rNPU2FIR, bit(9)) ? self_th_1; /** NPU2FIR[10] * OTL RX unsupported template format. */ - (rNPU2FIR, bit(10)) ? defaultMaskedError; + (rNPU2FIR, bit(10)) ? self_th_1; /** NPU2FIR[11] * OTL RX bad template x00 format. */ - (rNPU2FIR, bit(11)) ? defaultMaskedError; + (rNPU2FIR, bit(11)) ? self_th_1; /** NPU2FIR[12] * OTL RX control flit overrun. */ - (rNPU2FIR, bit(12)) ? defaultMaskedError; + (rNPU2FIR, bit(12)) ? self_th_1; /** NPU2FIR[13] * OTL RX unexpected data flit. */ - (rNPU2FIR, bit(13)) ? defaultMaskedError; + (rNPU2FIR, bit(13)) ? self_th_1; /** NPU2FIR[14] * OTL RX DL link down. */ - (rNPU2FIR, bit(14)) ? defaultMaskedError; + (rNPU2FIR, bit(14)) ? self_th_1; /** NPU2FIR[15] * OTL RX bad data received on command. */ - (rNPU2FIR, bit(15)) ? defaultMaskedError; + (rNPU2FIR, bit(15)) ? self_th_1; /** NPU2FIR[16] * OTL RX bad data received on response. */ - (rNPU2FIR, bit(16)) ? defaultMaskedError; + (rNPU2FIR, bit(16)) ? self_th_1; /** NPU2FIR[17] * OTL RX AP response not allowed */ - (rNPU2FIR, bit(17)) ? defaultMaskedError; + (rNPU2FIR, bit(17)) ? self_th_1; /** NPU2FIR[18] * OR of all OTL parity errors. */ - (rNPU2FIR, bit(18)) ? defaultMaskedError; + (rNPU2FIR, bit(18)) ? self_th_1; /** NPU2FIR[19] * OR of all OTL ECC CE errors. */ - (rNPU2FIR, bit(19)) ? defaultMaskedError; + (rNPU2FIR, bit(19)) ? self_th_32perDay; /** NPU2FIR[20] * OR of all OTL ECC UE errors. */ - (rNPU2FIR, bit(20)) ? defaultMaskedError; + (rNPU2FIR, bit(20)) ? self_th_1; /** NPU2FIR[21] * RXO OP Errors. */ - (rNPU2FIR, bit(21)) ? defaultMaskedError; + (rNPU2FIR, bit(21)) ? self_th_1; /** NPU2FIR[22] * RXO Internal Errors. */ - (rNPU2FIR, bit(22)) ? defaultMaskedError; + (rNPU2FIR, bit(22)) ? self_th_1; /** NPU2FIR[23] * OTL RXI fifo overrun. */ - (rNPU2FIR, bit(23)) ? defaultMaskedError; + (rNPU2FIR, bit(23)) ? self_th_1; /** NPU2FIR[24] * OTL RXI ctrl flit data run len invalid. */ - (rNPU2FIR, bit(24)) ? defaultMaskedError; + (rNPU2FIR, bit(24)) ? self_th_1; /** NPU2FIR[25] * OTL RXI opcode specifies dL=0b00. */ - (rNPU2FIR, bit(25)) ? defaultMaskedError; + (rNPU2FIR, bit(25)) ? self_th_1; /** NPU2FIR[26] * OTL RXI bad data received vc2 */ - (rNPU2FIR, bit(26)) ? defaultMaskedError; + (rNPU2FIR, bit(26)) ? self_th_1; /** NPU2FIR[27] * OTL RXI dcp2 fifo overrun */ - (rNPU2FIR, bit(27)) ? defaultMaskedError; + (rNPU2FIR, bit(27)) ? self_th_1; /** NPU2FIR[28] * OTL RXI vc1 fifo overrun */ - (rNPU2FIR, bit(28)) ? defaultMaskedError; + (rNPU2FIR, bit(28)) ? self_th_1; /** NPU2FIR[29] * OTL RXI vc2 fifo overrun */ - (rNPU2FIR, bit(29)) ? defaultMaskedError; + (rNPU2FIR, bit(29)) ? self_th_1; /** NPU2FIR[30] - * Reserved + * OTL RXI Data link not supported */ - (rNPU2FIR, bit(30)) ? defaultMaskedError; + (rNPU2FIR, bit(30)) ? self_th_1; /** NPU2FIR[31] * OTL TXI opcode error */ - (rNPU2FIR, bit(31)) ? defaultMaskedError; + (rNPU2FIR, bit(31)) ? self_th_1; /** NPU2FIR[32] - * Malformed packet error type 4 + * OTL RXI reserved field not equal to 0 */ (rNPU2FIR, bit(32)) ? defaultMaskedError; @@ -974,42 +989,42 @@ group gNPU2FIR /** NPU2FIR[36] * MMIO invalidate while one in progress. */ - (rNPU2FIR, bit(36)) ? defaultMaskedError; + (rNPU2FIR, bit(36)) ? self_th_1; /** NPU2FIR[37] * Unexpected ITAG on itag completion pt 0 */ - (rNPU2FIR, bit(37)) ? defaultMaskedError; + (rNPU2FIR, bit(37)) ? self_th_1; /** NPU2FIR[38] * Unexpected ITAG on itag completion pt 1 */ - (rNPU2FIR, bit(38)) ? defaultMaskedError; + (rNPU2FIR, bit(38)) ? self_th_1; /** NPU2FIR[39] * Unexpected Read PEE completion. */ - (rNPU2FIR, bit(39)) ? defaultMaskedError; + (rNPU2FIR, bit(39)) ? self_th_1; /** NPU2FIR[40] * Unexpected Checkout response. */ - (rNPU2FIR, bit(40)) ? defaultMaskedError; + (rNPU2FIR, bit(40)) ? self_th_1; /** NPU2FIR[41] * Translation request but SPAP is invalid. */ - (rNPU2FIR, bit(41)) ? defaultMaskedError; + (rNPU2FIR, bit(41)) ? self_th_1; /** NPU2FIR[42] * Read a PEE which was not valid. */ - (rNPU2FIR, bit(42)) ? defaultMaskedError; + (rNPU2FIR, bit(42)) ? self_th_1; /** NPU2FIR[43] * Bloom filter protection error. */ - (rNPU2FIR, bit(43)) ? defaultMaskedError; + (rNPU2FIR, bit(43)) ? self_th_1; /** NPU2FIR[44] * Translation request to non-valid TA @@ -1017,44 +1032,44 @@ group gNPU2FIR (rNPU2FIR, bit(44)) ? defaultMaskedError; /** NPU2FIR[45] - * TA Translation request to an invalid TA + * TA translation request to an invalid TA */ - (rNPU2FIR, bit(45)) ? defaultMaskedError; + (rNPU2FIR, bit(45)) ? self_th_1; /** NPU2FIR[46] * correctable array error (SBE). */ - (rNPU2FIR, bit(46)) ? defaultMaskedError; + (rNPU2FIR, bit(46)) ? self_th_32perDay; /** NPU2FIR[47] * array error (UE or parity). */ - (rNPU2FIR, bit(47)) ? defaultMaskedError; + (rNPU2FIR, bit(47)) ? self_th_1; /** NPU2FIR[48] * S/TLBI buffer overflow. */ - (rNPU2FIR, bit(48)) ? defaultMaskedError; + (rNPU2FIR, bit(48)) ? self_th_1; /** NPU2FIR[49] * SBE CE on Pb cout rsp or PEE read data. */ - (rNPU2FIR, bit(49)) ? defaultMaskedError; + (rNPU2FIR, bit(49)) ? self_th_32perDay; /** NPU2FIR[50] * UE on Pb cut rsp or PEE read data. */ - (rNPU2FIR, bit(50)) ? defaultMaskedError; + (rNPU2FIR, bit(50)) ? self_th_1; /** NPU2FIR[51] * SUE on Pb chkout rsp or Pb PEE rd data. */ - (rNPU2FIR, bit(51)) ? defaultMaskedError; + (rNPU2FIR, bit(51)) ? self_th_1; /** NPU2FIR[52] - * PA mem_hit when bar mode is nonzero + * PA mem hit when bar mode is nonzero */ - (rNPU2FIR, bit(52)) ? defaultMaskedError; + (rNPU2FIR, bit(52)) ? self_th_1; /** NPU2FIR[53] * XSL Reserved, macro bit 17. diff --git a/src/usr/diag/prdf/common/plat/axone/axone_obus.rule b/src/usr/diag/prdf/common/plat/axone/axone_obus.rule index a079fac59..1a346c417 100644 --- a/src/usr/diag/prdf/common/plat/axone/axone_obus.rule +++ b/src/usr/diag/prdf/common/plat/axone/axone_obus.rule @@ -469,12 +469,12 @@ group gIOOLFIR /** IOOLFIR[8] * link0 nak received */ - (rIOOLFIR, bit(8)) ? defaultMaskedError; + (rIOOLFIR, bit(8)) ? threshold_and_mask_self_non_smp_only; /** IOOLFIR[9] * link1 nak received */ - (rIOOLFIR, bit(9)) ? defaultMaskedError; + (rIOOLFIR, bit(9)) ? threshold_and_mask_self_non_smp_only; /** IOOLFIR[10] * link0 replay buffer full @@ -499,22 +499,22 @@ group gIOOLFIR /** IOOLFIR[14] * link0 sl ecc correctable */ - (rIOOLFIR, bit(14)) ? threshold_and_mask_self; + (rIOOLFIR, bit(14)) ? threshold_and_mask_self_smp_only; /** IOOLFIR[15] * link1 sl ecc correctable */ - (rIOOLFIR, bit(15)) ? threshold_and_mask_self; + (rIOOLFIR, bit(15)) ? threshold_and_mask_self_smp_only; /** IOOLFIR[16] * link0 sl ecc ue */ - (rIOOLFIR, bit(16)) ? threshold_and_mask_self; + (rIOOLFIR, bit(16)) ? threshold_and_mask_self_smp_only; /** IOOLFIR[17] * link1 sl ecc ue */ - (rIOOLFIR, bit(17)) ? threshold_and_mask_self; + (rIOOLFIR, bit(17)) ? threshold_and_mask_self_smp_only; /** IOOLFIR[18] * link0 retrain threshold @@ -597,12 +597,12 @@ group gIOOLFIR (rIOOLFIR, bit(33)) ? defaultMaskedError; /** IOOLFIR[34] - * link0 num replay + * link0 num replay or no forward progress */ (rIOOLFIR, bit(34)) ? defaultMaskedError; /** IOOLFIR[35] - * link1 num replay + * link1 num replay or no forward progress */ (rIOOLFIR, bit(35)) ? defaultMaskedError; @@ -619,12 +619,12 @@ group gIOOLFIR /** IOOLFIR[38] * link0 prbs select error */ - (rIOOLFIR, bit(38)) ? threshold_and_mask_self; + (rIOOLFIR, bit(38)) ? threshold_and_mask_self_smp_only; /** IOOLFIR[39] * link1 prbs select error */ - (rIOOLFIR, bit(39)) ? threshold_and_mask_self; + (rIOOLFIR, bit(39)) ? threshold_and_mask_self_smp_only; /** IOOLFIR[40] * link0 tcomplete bad @@ -639,102 +639,102 @@ group gIOOLFIR /** IOOLFIR[42] * link0 no spare lane available */ - (rIOOLFIR, bit(42)) ? obusSmpCallout_L0; + (rIOOLFIR, bit(42)) ? obusSmpCallout_L0_smp_only; /** IOOLFIR[43] * link1 no spare lane available */ - (rIOOLFIR, bit(43)) ? obusSmpCallout_L1; + (rIOOLFIR, bit(43)) ? obusSmpCallout_L1_smp_only; /** IOOLFIR[44] - * link0 spare done + * link0 spare done or degraded mode */ - (rIOOLFIR, bit(44)) ? obusSmpCallout_th32_L0; + (rIOOLFIR, bit(44)) ? spare_lane_degraded_mode_L0; /** IOOLFIR[45] - * link1 spare done + * link1 spare done or degraded mode */ - (rIOOLFIR, bit(45)) ? obusSmpCallout_th32_L1; + (rIOOLFIR, bit(45)) ? spare_lane_degraded_mode_L1; /** IOOLFIR[46] * link0 too many crc errors */ - (rIOOLFIR, bit(46)) ? obusSmpCallout_L0; + (rIOOLFIR, bit(46)) ? obusSmpCallout_L0_smp_only; /** IOOLFIR[47] * link1 too many crc errors */ - (rIOOLFIR, bit(47)) ? obusSmpCallout_L1; + (rIOOLFIR, bit(47)) ? obusSmpCallout_L1_smp_only; /** IOOLFIR[48] - * link0 npu error + * link0 npu error or orx otx dlx errors */ (rIOOLFIR, bit(48)) ? threshold_and_mask_self; /** IOOLFIR[49] - * link1 npu error + * link1 npu error or orx otx dlx errors */ (rIOOLFIR, bit(49)) ? threshold_and_mask_self; /** IOOLFIR[50] * linkx npu error */ - (rIOOLFIR, bit(50)) ? threshold_and_mask_self; + (rIOOLFIR, bit(50)) ? threshold_and_mask_self_smp_only; /** IOOLFIR[51] * osc switch */ - (rIOOLFIR, bit(51)) ? threshold_and_mask_self; + (rIOOLFIR, bit(51)) ? threshold_and_mask_self_smp_only; /** IOOLFIR[52] * link0 correctable array error */ - (rIOOLFIR, bit(52)) ? obusSmpCallout_th32_L0; + (rIOOLFIR, bit(52)) ? self_th_32perDay; /** IOOLFIR[53] * link1 correctable array error */ - (rIOOLFIR, bit(53)) ? obusSmpCallout_th32_L1; + (rIOOLFIR, bit(53)) ? self_th_32perDay; /** IOOLFIR[54] * link0 uncorrectable array error */ - (rIOOLFIR, bit(54)) ? obusSmpFailure_L0; + (rIOOLFIR, bit(54)) ? self_th_1; /** IOOLFIR[55] * link1 uncorrectable array error */ - (rIOOLFIR, bit(55)) ? obusSmpFailure_L1; + (rIOOLFIR, bit(55)) ? self_th_1; /** IOOLFIR[56] * link0 training failed */ - (rIOOLFIR, bit(56)) ? obusSmpFailure_L0; + (rIOOLFIR, bit(56)) ? training_failure_L0; /** IOOLFIR[57] * link1 training failed */ - (rIOOLFIR, bit(57)) ? obusSmpFailure_L1; + (rIOOLFIR, bit(57)) ? training_failure_L1; /** IOOLFIR[58] * link0 unrecoverable error */ - (rIOOLFIR, bit(58)) ? obusSmpFailure_L0; + (rIOOLFIR, bit(58)) ? unrecoverable_error_L0; /** IOOLFIR[59] * link1 unrecoverable error */ - (rIOOLFIR, bit(59)) ? obusSmpFailure_L1; + (rIOOLFIR, bit(59)) ? unrecoverable_error_L1; /** IOOLFIR[60] * link0 internal error */ - (rIOOLFIR, bit(60)) ? obusSmpFailure_L0; + (rIOOLFIR, bit(60)) ? internal_error_L0; /** IOOLFIR[61] * link1 internal error */ - (rIOOLFIR, bit(61)) ? obusSmpFailure_L1; + (rIOOLFIR, bit(61)) ? internal_error_L1; /** IOOLFIR[62] * fir scom err dup diff --git a/src/usr/diag/prdf/common/plat/axone/axone_omic.rule b/src/usr/diag/prdf/common/plat/axone/axone_omic.rule index 09ed59f2d..7b26f7a3a 100644 --- a/src/usr/diag/prdf/common/plat/axone/axone_omic.rule +++ b/src/usr/diag/prdf/common/plat/axone/axone_omic.rule @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2018 +# Contributors Listed Below - COPYRIGHT 2018,2020 # [+] International Business Machines Corp. # # @@ -196,8 +196,10 @@ rule rOMIC }; group gOMIC attntype CHECK_STOP, RECOVERABLE, UNIT_CS, HOST_ATTN - filter singlebit + filter priority(2,0,1) { + # We need to prioritize analysis to the OMIDLFIR here because of potential + # Channel Fail attentions in that FIR that will be reported as RECOVERABLE. (rOMIC, bit(0)) ? analyzeIOOMIFIR; (rOMIC, bit(1)) ? analyzeMCPPEFIR; (rOMIC, bit(2)) ? analyzeOMIDLFIR; @@ -226,17 +228,17 @@ group gIOOMIFIR /** IOOMIFIR[0] * RX invalid state or parity error */ - (rIOOMIFIR, bit(0)) ? defaultMaskedError; + (rIOOMIFIR, bit(0)) ? self_th_1; /** IOOMIFIR[1] * TX invalid state or parity error */ - (rIOOMIFIR, bit(1)) ? defaultMaskedError; + (rIOOMIFIR, bit(1)) ? self_th_1; /** IOOMIFIR[2] * GCR hang error */ - (rIOOMIFIR, bit(2)) ? defaultMaskedError; + (rIOOMIFIR, bit(2)) ? self_th_1; /** IOOMIFIR[3:47] * Unused @@ -359,306 +361,306 @@ rule rOMIDLFIR }; group gOMIDLFIR - filter singlebit, - cs_root_cause + filter priority(0,20,40), + cs_root_cause(0,20,40) { /** OMIDLFIR[0] - * DL0 fatal error + * OMI-DL0 fatal error */ - (rOMIDLFIR, bit(0)) ? defaultMaskedError; + (rOMIDLFIR, bit(0)) ? dl0_fatal_error; /** OMIDLFIR[1] - * DL0 data UE + * OMI-DL0 UE on data flit */ - (rOMIDLFIR, bit(1)) ? defaultMaskedError; + (rOMIDLFIR, bit(1)) ? dl0_omi_th_1; /** OMIDLFIR[2] - * DL0 flit CE + * OMI-DL0 CE on TL flit */ - (rOMIDLFIR, bit(2)) ? defaultMaskedError; + (rOMIDLFIR, bit(2)) ? dl0_omi_th_32perDay; /** OMIDLFIR[3] - * DL0 CRC error + * OMI-DL0 detected a CRC error */ (rOMIDLFIR, bit(3)) ? defaultMaskedError; /** OMIDLFIR[4] - * DL0 nack + * OMI-DL0 received a nack */ (rOMIDLFIR, bit(4)) ? defaultMaskedError; /** OMIDLFIR[5] - * DL0 X4 mode + * OMI-DL0 running in degraded mode */ - (rOMIDLFIR, bit(5)) ? defaultMaskedError; + (rOMIDLFIR, bit(5)) ? dl0_omi_bus_th_1; /** OMIDLFIR[6] - * DL0 EDPL + * OMI-DL0 parity error detection on a lane */ (rOMIDLFIR, bit(6)) ? defaultMaskedError; /** OMIDLFIR[7] - * DL0 timeout + * OMI-DL0 retrained due to no forward progress */ - (rOMIDLFIR, bit(7)) ? defaultMaskedError; + (rOMIDLFIR, bit(7)) ? dl0_omi_bus_th_32perDay; /** OMIDLFIR[8] - * DL0 remote retrain + * OMI-DL0 remote side initiated a retrain */ (rOMIDLFIR, bit(8)) ? defaultMaskedError; /** OMIDLFIR[9] - * DL0 error retrain + * OMI-DL0 retrain due to internal error or software initiated */ - (rOMIDLFIR, bit(9)) ? defaultMaskedError; + (rOMIDLFIR, bit(9)) ? dl0_omi_bus_th_32perDay; /** OMIDLFIR[10] - * DL0 EDPL retrain + * OMI-DL0 threshold reached */ - (rOMIDLFIR, bit(10)) ? defaultMaskedError; + (rOMIDLFIR, bit(10)) ? dl0_omi_bus_th_32perDay; /** OMIDLFIR[11] - * DL0 trained + * OMI-DL0 trained */ (rOMIDLFIR, bit(11)) ? defaultMaskedError; /** OMIDLFIR[12] - * DL0 endpoint bit 0 + * OMI-DL0 endpoint error bit 0 */ (rOMIDLFIR, bit(12)) ? defaultMaskedError; /** OMIDLFIR[13] - * DL0 endpoint bit 1 + * OMI-DL0 endpoint error bit 1 */ (rOMIDLFIR, bit(13)) ? defaultMaskedError; /** OMIDLFIR[14] - * DL0 endpoint bit 2 + * OMI-DL0 endpoint error bit 2 */ (rOMIDLFIR, bit(14)) ? defaultMaskedError; /** OMIDLFIR[15] - * DL0 endpoint bit 3 + * OMI-DL0 endpoint error bit 3 */ (rOMIDLFIR, bit(15)) ? defaultMaskedError; /** OMIDLFIR[16] - * DL0 endpoint bit 4 + * OMI-DL0 endpoint error bit 4 */ (rOMIDLFIR, bit(16)) ? defaultMaskedError; /** OMIDLFIR[17] - * DL0 endpoint bit 5 + * OMI-DL0 endpoint error bit 5 */ (rOMIDLFIR, bit(17)) ? defaultMaskedError; /** OMIDLFIR[18] - * DL0 endpoint bit 6 + * OMI-DL0 endpoint error bit 6 */ (rOMIDLFIR, bit(18)) ? defaultMaskedError; /** OMIDLFIR[19] - * DL0 endpoint bit 7 + * OMI-DL0 endpoint error bit 7 */ (rOMIDLFIR, bit(19)) ? defaultMaskedError; /** OMIDLFIR[20] - * DL1 fatal error + * OMI-DL1 fatal error */ - (rOMIDLFIR, bit(20)) ? defaultMaskedError; + (rOMIDLFIR, bit(20)) ? dl1_fatal_error; /** OMIDLFIR[21] - * DL1 data UE + * OMI-DL1 UE on data flit */ - (rOMIDLFIR, bit(21)) ? defaultMaskedError; + (rOMIDLFIR, bit(21)) ? dl1_omi_th_1; /** OMIDLFIR[22] - * DL1 flit CE + * OMI-DL1 CE on TL flit */ - (rOMIDLFIR, bit(22)) ? defaultMaskedError; + (rOMIDLFIR, bit(22)) ? dl1_omi_th_32perDay; /** OMIDLFIR[23] - * DL1 CRC error + * OMI-DL1 detected a CRC error */ (rOMIDLFIR, bit(23)) ? defaultMaskedError; /** OMIDLFIR[24] - * DL1 nack + * OMI-DL1 received a nack */ (rOMIDLFIR, bit(24)) ? defaultMaskedError; /** OMIDLFIR[25] - * DL1 X4 mode + * OMI-DL1 running in degraded mode */ - (rOMIDLFIR, bit(25)) ? defaultMaskedError; + (rOMIDLFIR, bit(25)) ? dl1_omi_bus_th_1; /** OMIDLFIR[26] - * DL1 EDPL + * OMI-DL1 parity error detection on a lane */ (rOMIDLFIR, bit(26)) ? defaultMaskedError; /** OMIDLFIR[27] - * DL1 timeout + * OMI-DL1 retrained due to no forward progress */ - (rOMIDLFIR, bit(27)) ? defaultMaskedError; + (rOMIDLFIR, bit(27)) ? dl1_omi_bus_th_32perDay; /** OMIDLFIR[28] - * DL1 remote retrain + * OMI-DL1 remote side initiated a retrain */ (rOMIDLFIR, bit(28)) ? defaultMaskedError; /** OMIDLFIR[29] - * DL1 error retrain + * OMI-DL1 retrain due to internal error or software initiated */ - (rOMIDLFIR, bit(29)) ? defaultMaskedError; + (rOMIDLFIR, bit(29)) ? dl1_omi_bus_th_32perDay; /** OMIDLFIR[30] - * DL1 EDPL retrain + * OMI-DL1 threshold reached */ - (rOMIDLFIR, bit(30)) ? defaultMaskedError; + (rOMIDLFIR, bit(30)) ? dl1_omi_bus_th_32perDay; /** OMIDLFIR[31] - * DL1 trained + * OMI-DL1 trained */ (rOMIDLFIR, bit(31)) ? defaultMaskedError; /** OMIDLFIR[32] - * DL1 endpoint bit 0 + * OMI-DL1 endpoint error bit 0 */ (rOMIDLFIR, bit(32)) ? defaultMaskedError; /** OMIDLFIR[33] - * DL1 endpoint bit 1 + * OMI-DL1 endpoint error bit 1 */ (rOMIDLFIR, bit(33)) ? defaultMaskedError; /** OMIDLFIR[34] - * DL1 endpoint bit 2 + * OMI-DL1 endpoint error bit 2 */ (rOMIDLFIR, bit(34)) ? defaultMaskedError; /** OMIDLFIR[35] - * DL1 endpoint bit 3 + * OMI-DL1 endpoint error bit 3 */ (rOMIDLFIR, bit(35)) ? defaultMaskedError; /** OMIDLFIR[36] - * DL1 endpoint bit 4 + * OMI-DL1 endpoint error bit 4 */ (rOMIDLFIR, bit(36)) ? defaultMaskedError; /** OMIDLFIR[37] - * DL1 endpoint bit 5 + * OMI-DL1 endpoint error bit 5 */ (rOMIDLFIR, bit(37)) ? defaultMaskedError; /** OMIDLFIR[38] - * DL1 endpoint bit 6 + * OMI-DL1 endpoint error bit 6 */ (rOMIDLFIR, bit(38)) ? defaultMaskedError; /** OMIDLFIR[39] - * DL1 endpoint bit 7 + * OMI-DL1 endpoint error bit 7 */ (rOMIDLFIR, bit(39)) ? defaultMaskedError; /** OMIDLFIR[40] - * DL2 fatal error + * OMI-DL2 fatal error */ - (rOMIDLFIR, bit(40)) ? defaultMaskedError; + (rOMIDLFIR, bit(40)) ? dl2_fatal_error; /** OMIDLFIR[41] - * DL2 data UE + * OMI-DL2 UE on data flit */ - (rOMIDLFIR, bit(41)) ? defaultMaskedError; + (rOMIDLFIR, bit(41)) ? dl2_omi_th_1; /** OMIDLFIR[42] - * DL2 flit CE + * OMI-DL2 CE on TL flit */ - (rOMIDLFIR, bit(42)) ? defaultMaskedError; + (rOMIDLFIR, bit(42)) ? dl2_omi_th_32perDay; /** OMIDLFIR[43] - * DL2 CRC error + * OMI-DL2 detected a CRC error */ (rOMIDLFIR, bit(43)) ? defaultMaskedError; /** OMIDLFIR[44] - * DL2 nack + * OMI-DL2 received a nack */ (rOMIDLFIR, bit(44)) ? defaultMaskedError; /** OMIDLFIR[45] - * DL2 X4 mode + * OMI-DL2 running in degraded mode */ - (rOMIDLFIR, bit(45)) ? defaultMaskedError; + (rOMIDLFIR, bit(45)) ? dl2_omi_bus_th_1; /** OMIDLFIR[46] - * DL2 EDPL + * OMI-DL2 parity error detection on a lane */ (rOMIDLFIR, bit(46)) ? defaultMaskedError; /** OMIDLFIR[47] - * DL2 timeout + * OMI-DL2 retrained due to no forward progress */ - (rOMIDLFIR, bit(47)) ? defaultMaskedError; + (rOMIDLFIR, bit(47)) ? dl2_omi_bus_th_32perDay; /** OMIDLFIR[48] - * DL2 remote retrain + * OMI-DL2 remote side initiated a retrain */ (rOMIDLFIR, bit(48)) ? defaultMaskedError; /** OMIDLFIR[49] - * DL2 error retrain + * OMI-DL2 retrain due to internal error or software initiated */ - (rOMIDLFIR, bit(49)) ? defaultMaskedError; + (rOMIDLFIR, bit(49)) ? dl2_omi_bus_th_32perDay; /** OMIDLFIR[50] - * DL2 EDPL retrain + * OMI-DL2 threshold reached */ - (rOMIDLFIR, bit(50)) ? defaultMaskedError; + (rOMIDLFIR, bit(50)) ? dl2_omi_bus_th_32perDay; /** OMIDLFIR[51] - * DL2 trained + * OMI-DL2 trained */ (rOMIDLFIR, bit(51)) ? defaultMaskedError; /** OMIDLFIR[52] - * DL2 endpoint bit 0 + * OMI-DL2 endpoint error bit 0 */ (rOMIDLFIR, bit(52)) ? defaultMaskedError; /** OMIDLFIR[53] - * DL2 endpoint bit 1 + * OMI-DL2 endpoint error bit 1 */ (rOMIDLFIR, bit(53)) ? defaultMaskedError; /** OMIDLFIR[54] - * DL2 endpoint bit 2 + * OMI-DL2 endpoint error bit 2 */ (rOMIDLFIR, bit(54)) ? defaultMaskedError; /** OMIDLFIR[55] - * DL2 endpoint bit 3 + * OMI-DL2 endpoint error bit 3 */ (rOMIDLFIR, bit(55)) ? defaultMaskedError; /** OMIDLFIR[56] - * DL2 endpoint bit 4 + * OMI-DL2 endpoint error bit 4 */ (rOMIDLFIR, bit(56)) ? defaultMaskedError; /** OMIDLFIR[57] - * DL2 endpoint bit 5 + * OMI-DL2 endpoint error bit 5 */ (rOMIDLFIR, bit(57)) ? defaultMaskedError; /** OMIDLFIR[58] - * DL2 endpoint bit 6 + * OMI-DL2 endpoint error bit 6 */ (rOMIDLFIR, bit(58)) ? defaultMaskedError; /** OMIDLFIR[59] - * DL2 endpoint bit 7 + * OMI-DL2 endpoint error bit 7 */ (rOMIDLFIR, bit(59)) ? defaultMaskedError; @@ -667,6 +669,21 @@ group gOMIDLFIR */ (rOMIDLFIR, bit(60)) ? defaultMaskedError; + /** OMIDLFIR[61] + * reserved + */ + (rOMIDLFIR, bit(61)) ? defaultMaskedError; + + /** OMIDLFIR[62] + * LFIR internal parity error + */ + (rOMIDLFIR, bit(62)) ? defaultMaskedError; + + /** OMIDLFIR[63] + * SCOM Satellite Error + */ + (rOMIDLFIR, bit(63)) ? defaultMaskedError; + }; ############################################################################## diff --git a/src/usr/diag/prdf/common/plat/axone/axone_omic_actions.rule b/src/usr/diag/prdf/common/plat/axone/axone_omic_actions.rule index ecb6626a8..dbf563b47 100644 --- a/src/usr/diag/prdf/common/plat/axone/axone_omic_actions.rule +++ b/src/usr/diag/prdf/common/plat/axone/axone_omic_actions.rule @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2018 +# Contributors Listed Below - COPYRIGHT 2018,2019 # [+] International Business Machines Corp. # # @@ -24,6 +24,133 @@ # IBM_PROLOG_END_TAG ################################################################################ +# OMIC Actions # +################################################################################ + +actionclass dl0_omi +{ + callout(connected(TYPE_OMI,0), MRU_MED); +}; + +actionclass dl1_omi +{ + callout(connected(TYPE_OMI,1), MRU_MED); +}; + +actionclass dl2_omi +{ + callout(connected(TYPE_OMI,2), MRU_MED); +}; + +actionclass dl0_omi_bus +{ + funccall("omiParentCalloutBusInterfacePlugin_0"); +}; + +actionclass dl1_omi_bus +{ + funccall("omiParentCalloutBusInterfacePlugin_1"); +}; + +actionclass dl2_omi_bus +{ + funccall("omiParentCalloutBusInterfacePlugin_2"); +}; + +/** OMI-DL0 Fatal Error */ +actionclass dl0_fatal_error +{ + try( funccall("DlFatalError_0"), dl0_omi_bus ); + threshold1; +}; + +/** OMI-DL1 Fatal Error */ +actionclass dl1_fatal_error +{ + try( funccall("DlFatalError_1"), dl1_omi_bus ); + threshold1; +}; + +/** OMI-DL2 Fatal Error */ +actionclass dl2_fatal_error +{ + try( funccall("DlFatalError_2"), dl2_omi_bus ); + threshold1; +}; + +actionclass dl0_omi_th_1 +{ + dl0_omi; + threshold1; +}; + +actionclass dl1_omi_th_1 +{ + dl1_omi; + threshold1; +}; + +actionclass dl2_omi_th_1 +{ + dl2_omi; + threshold1; +}; + +actionclass dl0_omi_th_32perDay +{ + dl0_omi; + threshold32pday; +}; + +actionclass dl1_omi_th_32perDay +{ + dl1_omi; + threshold32pday; +}; + +actionclass dl2_omi_th_32perDay +{ + dl2_omi; + threshold32pday; +}; + +actionclass dl0_omi_bus_th_1 +{ + dl0_omi_bus; + threshold1; +}; + +actionclass dl1_omi_bus_th_1 +{ + dl1_omi_bus; + threshold1; +}; + +actionclass dl2_omi_bus_th_1 +{ + dl2_omi_bus; + threshold1; +}; + +actionclass dl0_omi_bus_th_32perDay +{ + dl0_omi_bus; + threshold1; +}; + +actionclass dl1_omi_bus_th_32perDay +{ + dl1_omi_bus; + threshold1; +}; + +actionclass dl2_omi_bus_th_32perDay +{ + dl2_omi_bus; + threshold1; +}; + +################################################################################ # Analyze groups ################################################################################ diff --git a/src/usr/diag/prdf/common/plat/axone/axone_omic_regs.rule b/src/usr/diag/prdf/common/plat/axone/axone_omic_regs.rule new file mode 100644 index 000000000..e698652a6 --- /dev/null +++ b/src/usr/diag/prdf/common/plat/axone/axone_omic_regs.rule @@ -0,0 +1,62 @@ +# IBM_PROLOG_BEGIN_TAG +# This is an automatically generated prolog. +# +# $Source: src/usr/diag/prdf/common/plat/axone/axone_omic_regs.rule $ +# +# OpenPOWER HostBoot Project +# +# Contributors Listed Below - COPYRIGHT 2019 +# [+] International Business Machines Corp. +# +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# IBM_PROLOG_END_TAG + +############################################################################### +# Additional registers for omic, not defined in XML +############################################################################### + + + ########################################################################### + # P9 Axone target OMIDLFIR + ########################################################################### + + register OMIDLFIR_MASK_OR + { + name "P9 OMIC target OMIDLFIR MASK atomic OR"; + scomaddr 0x07013345; + capture group never; + access write_only; + }; + + register DL0_ERROR_HOLD + { + name "P9 Axone target DL0 Error Hold Register"; + scomaddr 0x07013353; + capture group default; + }; + + register DL1_ERROR_HOLD + { + name "P9 Axone target DL1 Error Hold Register"; + scomaddr 0x07013363; + capture group default; + }; + + register DL2_ERROR_HOLD + { + name "P9 Axone target DL2 Error Hold Register"; + scomaddr 0x07013373; + capture group default; + }; diff --git a/src/usr/diag/prdf/common/plat/axone/axone_phb.rule b/src/usr/diag/prdf/common/plat/axone/axone_phb.rule index 844739ee2..1c5bb566d 100644 --- a/src/usr/diag/prdf/common/plat/axone/axone_phb.rule +++ b/src/usr/diag/prdf/common/plat/axone/axone_phb.rule @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2018 +# Contributors Listed Below - COPYRIGHT 2018,2019 # [+] International Business Machines Corp. # # @@ -212,7 +212,7 @@ group gPHBNFIR /** PHBNFIR[0] * BAR Parity Error */ - (rPHBNFIR, bit(0)) ? self_th_1; + (rPHBNFIR, bit(0)) ? parent_proc_th_1; /** PHBNFIR[1] * Parity Errors on Registers besides BAR @@ -252,12 +252,12 @@ group gPHBNFIR /** PHBNFIR[8] * Register Array Parity Error */ - (rPHBNFIR, bit(8)) ? self_th_1; + (rPHBNFIR, bit(8)) ? parent_proc_th_1; /** PHBNFIR[9] * Power Bus Interface Parity Error */ - (rPHBNFIR, bit(9)) ? self_th_1; + (rPHBNFIR, bit(9)) ? parent_proc_th_1; /** PHBNFIR[10] * Power Bus Data Hang @@ -297,7 +297,7 @@ group gPHBNFIR /** PHBNFIR[17] * Hardware Error */ - (rPHBNFIR, bit(17)) ? self_th_1; + (rPHBNFIR, bit(17)) ? parent_proc_th_1; /** PHBNFIR[18] * Unsolicited Power Bus Data diff --git a/src/usr/diag/prdf/common/plat/axone/axone_proc.rule b/src/usr/diag/prdf/common/plat/axone/axone_proc.rule index c37c103be..b936106e2 100644 --- a/src/usr/diag/prdf/common/plat/axone/axone_proc.rule +++ b/src/usr/diag/prdf/common/plat/axone/axone_proc.rule @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2018 +# Contributors Listed Below - COPYRIGHT 2018,2019 # [+] International Business Machines Corp. # # @@ -950,42 +950,6 @@ chip axone_proc }; ############################################################################ - # P9 chip ENHCAFIR - ############################################################################ - - register ENHCAFIR - { - name "P9 chip ENHCAFIR"; - scomaddr 0x05012940; - reset (&, 0x05012941); - mask (|, 0x05012945); - capture group default; - }; - - register ENHCAFIR_MASK - { - name "P9 chip ENHCAFIR MASK"; - scomaddr 0x05012943; - capture group default; - }; - - register ENHCAFIR_ACT0 - { - name "P9 chip ENHCAFIR ACT0"; - scomaddr 0x05012946; - capture group default; - capture req nonzero("ENHCAFIR"); - }; - - register ENHCAFIR_ACT1 - { - name "P9 chip ENHCAFIR ACT1"; - scomaddr 0x05012947; - capture group default; - capture req nonzero("ENHCAFIR"); - }; - - ############################################################################ # P9 chip PBAMFIR ############################################################################ @@ -2758,7 +2722,7 @@ group gNXCQFIR /** NXCQFIR[19] * Uncorrectable error on ERAT arrays */ - (rNXCQFIR, bit(19)) ? nx_th_32perDay; + (rNXCQFIR, bit(19)) ? nx_th_1; /** NXCQFIR[20] * SUE on ERAT arrays @@ -4077,14 +4041,14 @@ group gN3_CHIPLET_FIR (rN3_CHIPLET_FIR, bit(14)) ? analyzePBPPEFIR; /** N3_CHIPLET_FIR[15] - * Attention from PBIOEFIR + * Attention from PBIOOFIR */ - (rN3_CHIPLET_FIR, bit(15)) ? analyzePBIOEFIR; + (rN3_CHIPLET_FIR, bit(15)) ? analyzePBIOOFIR; /** N3_CHIPLET_FIR[16] - * Attention from PBIOOFIR + * Attention from NPU0FIR 1 */ - (rN3_CHIPLET_FIR, bit(16)) ? analyzePBIOOFIR; + (rN3_CHIPLET_FIR, bit(16)) ? analyzeConnectedNPU1; /** N3_CHIPLET_FIR[17] * Attention from INTCQFIR @@ -4106,15 +4070,10 @@ group gN3_CHIPLET_FIR */ (rN3_CHIPLET_FIR, bit(20)) ? analyzePBAMFIR; - /** N3_CHIPLET_FIR[21] - * Attention from NPU0FIR 1 - */ - (rN3_CHIPLET_FIR, bit(21)) ? analyzeConnectedNPU1; - /** N3_CHIPLET_FIR[22] - * Attention from ENHCAFIR + * Attention from PBIOEFIR */ - (rN3_CHIPLET_FIR, bit(22)) ? analyzeENHCAFIR; + (rN3_CHIPLET_FIR, bit(22)) ? analyzePBIOEFIR; /** N3_CHIPLET_FIR[23] * Attention from NPU2FIR 0 @@ -5145,144 +5104,6 @@ group gPSIHBFIR }; ################################################################################ -# P9 chip ENHCAFIR -################################################################################ - -rule rENHCAFIR -{ - CHECK_STOP: - ENHCAFIR & ~ENHCAFIR_MASK & ~ENHCAFIR_ACT0 & ~ENHCAFIR_ACT1; - RECOVERABLE: - ENHCAFIR & ~ENHCAFIR_MASK & ~ENHCAFIR_ACT0 & ENHCAFIR_ACT1; -}; - -group gENHCAFIR - filter singlebit, - cs_root_cause -{ - /** ENHCAFIR[0] - * PB0 data UE - */ - (rENHCAFIR, bit(0)) ? defaultMaskedError; - - /** ENHCAFIR[1] - * PB0 data SUE - */ - (rENHCAFIR, bit(1)) ? defaultMaskedError; - - /** ENHCAFIR[2] - * PB0 data ue - */ - (rENHCAFIR, bit(2)) ? defaultMaskedError; - - /** ENHCAFIR[3] - * spare - */ - (rENHCAFIR, bit(3)) ? defaultMaskedError; - - /** ENHCAFIR[4] - * Castout Drop Counter Full - */ - (rENHCAFIR, bit(4)) ? defaultMaskedError; - - /** ENHCAFIR[5] - * Data Hang Detect - */ - (rENHCAFIR, bit(5)) ? defaultMaskedError; - - /** ENHCAFIR[6] - * Unexpected data or cresp - */ - (rENHCAFIR, bit(6)) ? defaultMaskedError; - - /** ENHCAFIR[7] - * Internal Error - */ - (rENHCAFIR, bit(7)) ? defaultMaskedError; - - /** ENHCAFIR[8] - * ADU checkstop error from power bus data - */ - (rENHCAFIR, bit(8)) ? defaultMaskedError; - - /** ENHCAFIR[9] - * ADU checkstop error from alter display - */ - (rENHCAFIR, bit(9)) ? defaultMaskedError; - - /** ENHCAFIR[10] - * ADU checkstop error from xsco m - */ - (rENHCAFIR, bit(10)) ? defaultMaskedError; - - /** ENHCAFIR[11] - * ADU checkstop from power bus cmd - */ - (rENHCAFIR, bit(11)) ? defaultMaskedError; - - /** ENHCAFIR[12] - * ADU checkstop error from power bus send - */ - (rENHCAFIR, bit(12)) ? defaultMaskedError; - - /** ENHCAFIR[13] - * ADU checkstop from power bus receive - */ - (rENHCAFIR, bit(13)) ? defaultMaskedError; - - /** ENHCAFIR[14] - * ADU recoverable error from pb data - */ - (rENHCAFIR, bit(14)) ? defaultMaskedError; - - /** ENHCAFIR[15] - * ADU recoverable error from alter display - */ - (rENHCAFIR, bit(15)) ? defaultMaskedError; - - /** ENHCAFIR[16] - * ADU recoverable error from xscom - */ - (rENHCAFIR, bit(16)) ? defaultMaskedError; - - /** ENHCAFIR[17] - * ADU recoverable from power bus cmd - */ - (rENHCAFIR, bit(17)) ? defaultMaskedError; - - /** ENHCAFIR[18] - * ADU recoverable error from pb send - */ - (rENHCAFIR, bit(18)) ? defaultMaskedError; - - /** ENHCAFIR[19] - * ADU recoverable error from pb receive - */ - (rENHCAFIR, bit(19)) ? defaultMaskedError; - - /** ENHCAFIR[20] - * NHTM scom error - */ - (rENHCAFIR, bit(20)) ? defaultMaskedError; - - /** ENHCAFIR[21] - * spare - */ - (rENHCAFIR, bit(21)) ? defaultMaskedError; - - /** ENHCAFIR[22] - * scom error - */ - (rENHCAFIR, bit(22)) ? defaultMaskedError; - - /** ENHCAFIR[23] - * scom error - */ - (rENHCAFIR, bit(23)) ? defaultMaskedError; - -}; - -################################################################################ # P9 chip PBAMFIR ################################################################################ diff --git a/src/usr/diag/prdf/common/plat/axone/prdfMccPlugins.C b/src/usr/diag/prdf/common/plat/axone/prdfMccPlugins.C new file mode 100644 index 000000000..804418717 --- /dev/null +++ b/src/usr/diag/prdf/common/plat/axone/prdfMccPlugins.C @@ -0,0 +1,142 @@ +/* IBM_PROLOG_BEGIN_TAG */ +/* This is an automatically generated prolog. */ +/* */ +/* $Source: src/usr/diag/prdf/common/plat/axone/prdfMccPlugins.C $ */ +/* */ +/* OpenPOWER HostBoot Project */ +/* */ +/* Contributors Listed Below - COPYRIGHT 2019 */ +/* [+] International Business Machines Corp. */ +/* */ +/* */ +/* Licensed under the Apache License, Version 2.0 (the "License"); */ +/* you may not use this file except in compliance with the License. */ +/* You may obtain a copy of the License at */ +/* */ +/* http://www.apache.org/licenses/LICENSE-2.0 */ +/* */ +/* Unless required by applicable law or agreed to in writing, software */ +/* distributed under the License is distributed on an "AS IS" BASIS, */ +/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */ +/* implied. See the License for the specific language governing */ +/* permissions and limitations under the License. */ +/* */ +/* IBM_PROLOG_END_TAG */ + +// Framework includes +#include <iipServiceDataCollector.h> +#include <prdfExtensibleChip.H> +#include <prdfPluginMap.H> + +// Platform includes +#include <prdfMemUtils.H> +#include <prdfPlatServices.H> +#include <prdfMemExtraSig.H> + +using namespace TARGETING; + +namespace PRDF +{ + +using namespace PlatServices; + +namespace axone_mcc +{ + +//############################################################################## +// +// Special plugins +// +//############################################################################## + +/** + * @brief Analysis code that is called before the main analyze() function. + * @param i_chip A MCC chip. + * @param io_sc The step code data struct. + * @param o_analyzed True if analysis is done on this chip, false otherwise. + * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. + */ +int32_t PreAnalysis( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc, + bool & o_analyzed ) +{ + // Check for a channel failure before analyzing this chip. + o_analyzed = MemUtils::analyzeChnlFail<TYPE_MCC>( i_chip, io_sc ); + + return SUCCESS; +} +PRDF_PLUGIN_DEFINE( axone_mcc, PreAnalysis ); + +/** + * @brief Plugin function called after analysis is complete but before PRD + * exits. + * @param i_chip A MCC chip. + * @param io_sc The step code data struct. + * @note This is especially useful for any analysis that still needs to be + * done after the framework clears the FIR bits that were at attention. + * @return SUCCESS. + */ +int32_t PostAnalysis( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc ) +{ + // If there was a channel failure some cleanup is required to ensure + // there are no more attentions from this channel. + MemUtils::cleanupChnlFail<TYPE_MCC>( i_chip, io_sc ); + + return SUCCESS; +} +PRDF_PLUGIN_DEFINE( axone_mcc, PostAnalysis ); + +//############################################################################## +// +// DSTLFIR +// +//############################################################################## + +/** + * @brief Plugin function called to avoid analyzing to a checkstop on an OCMB. + * @param i_chip A MCC chip. + * @param io_sc The step code data struct. + * @param i_pos Position of the OMI/OCMB relative to the MCC. + * @return SUCCESS if the primary attn is CS, else PRD_SCAN_COMM_REGISTER_ZERO. + */ +int32_t checkOcmb( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc, + uint8_t i_pos ) +{ + int32_t rc = PRD_SCAN_COMM_REGISTER_ZERO; + + #ifdef CONFIG_ENABLE_CHECKSTOP_ANALYSIS + // We do not have support for the OCMB in the checkstop analysis path. + // As such, we will simply indicate there is an attention from the OCMB and + // add second level support and both sides of the bus as callouts. + if ( CHECK_STOP == io_sc.service_data->getPrimaryAttnType() ) + { + TargetHandle_t omi = getConnectedChild( i_chip->getTrgt(), TYPE_OMI, + i_pos ); + ExtensibleChip * ocmb = getConnectedChild( i_chip, TYPE_OCMB_CHIP, + i_pos ); + + io_sc.service_data->SetCallout( LEVEL2_SUPPORT, MRU_MED, NO_GARD ); + io_sc.service_data->SetCallout( omi, MRU_LOW, NO_GARD ); + io_sc.service_data->SetCallout( ocmb->getTrgt(), MRU_LOW, NO_GARD ); + + rc = SUCCESS; + } + #endif + + return rc; +} + +#define CHECK_OCMB_PLUGIN( POS ) \ +int32_t checkOcmb_##POS( ExtensibleChip * i_chip, \ + STEP_CODE_DATA_STRUCT & io_sc ) \ +{ \ + return checkOcmb( i_chip, io_sc, POS ); \ +} \ +PRDF_PLUGIN_DEFINE( axone_mcc, checkOcmb_##POS ); + +CHECK_OCMB_PLUGIN( 0 ); +CHECK_OCMB_PLUGIN( 1 ); + +} // end namespace axone_mcc + +} // end namespace PRDF + diff --git a/src/usr/diag/prdf/common/plat/axone/prdfOmicPlugins.C b/src/usr/diag/prdf/common/plat/axone/prdfOmicPlugins.C new file mode 100644 index 000000000..f6ea182b9 --- /dev/null +++ b/src/usr/diag/prdf/common/plat/axone/prdfOmicPlugins.C @@ -0,0 +1,173 @@ +/* IBM_PROLOG_BEGIN_TAG */ +/* This is an automatically generated prolog. */ +/* */ +/* $Source: src/usr/diag/prdf/common/plat/axone/prdfOmicPlugins.C $ */ +/* */ +/* OpenPOWER HostBoot Project */ +/* */ +/* Contributors Listed Below - COPYRIGHT 2019,2020 */ +/* [+] International Business Machines Corp. */ +/* */ +/* */ +/* Licensed under the Apache License, Version 2.0 (the "License"); */ +/* you may not use this file except in compliance with the License. */ +/* You may obtain a copy of the License at */ +/* */ +/* http://www.apache.org/licenses/LICENSE-2.0 */ +/* */ +/* Unless required by applicable law or agreed to in writing, software */ +/* distributed under the License is distributed on an "AS IS" BASIS, */ +/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */ +/* implied. See the License for the specific language governing */ +/* permissions and limitations under the License. */ +/* */ +/* IBM_PROLOG_END_TAG */ + +// Framework includes +#include <iipServiceDataCollector.h> +#include <prdfExtensibleChip.H> +#include <prdfPluginMap.H> + +// Platform includes +#include <prdfMemUtils.H> +#include <prdfPlatServices.H> + +using namespace TARGETING; + +namespace PRDF +{ + +using namespace PlatServices; + +namespace axone_omic +{ + +//############################################################################## +// +// Special plugins +// +//############################################################################## + +/** + * @brief Analysis code that is called before the main analyze() function. + * @param i_chip An OMIC chip. + * @param io_sc The step code data struct. + * @param o_analyzed True if analysis is done on this chip, false otherwise. + * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. + */ +int32_t PreAnalysis( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc, + bool & o_analyzed ) +{ + // Check for a channel failure before analyzing this chip. + o_analyzed = MemUtils::analyzeChnlFail<TYPE_OMIC>( i_chip, io_sc ); + + return SUCCESS; +} +PRDF_PLUGIN_DEFINE( axone_omic, PreAnalysis ); + +/** + * @brief Plugin function called after analysis is complete but before PRD + * exits. + * @param i_chip An OMIC chip. + * @param io_sc The step code data struct. + * @note This is especially useful for any analysis that still needs to be + * done after the framework clears the FIR bits that were at attention. + * @return SUCCESS. + */ +int32_t PostAnalysis( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc ) +{ + // If there was a channel failure some cleanup is required to ensure + // there are no more attentions from this channel. + MemUtils::cleanupChnlFail<TYPE_OMIC>( i_chip, io_sc ); + + return SUCCESS; +} +PRDF_PLUGIN_DEFINE( axone_omic, PostAnalysis ); + +//############################################################################## +// +// OMIDLFIR +// +//############################################################################## + +/** + * @brief OMIDLFIR[0|20|40] - OMI-DL Fatal Error + * @param i_chip An OMIC chip. + * @param io_sc The step code data struct. + * @param i_dl The DL relative to the OMIC. + * @return PRD_SCAN_COMM_REGISTER_ZERO for the bus callout, else SUCCESS + */ +int32_t DlFatalError( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc, + uint8_t i_dl ) +{ + #define PRDF_FUNC "[axone_omic::DlFatalError] " + + int32_t rc = SUCCESS; + + do + { + // Note: The OMIDLFIR can't actually be set up to report UNIT_CS + // attentions, instead, as a workaround, the relevant channel fail + // bits will be set as recoverable bits and we will manually set + // the attention types to UNIT_CS in our handling of these errors. + io_sc.service_data->setPrimaryAttnType( UNIT_CS ); + + char reg[64]; + sprintf( reg, "DL%d_ERROR_HOLD", i_dl ); + + // Check DL#_ERROR_HOLD[52:63] to determine callout + SCAN_COMM_REGISTER_CLASS * dl_error_hold = i_chip->getRegister( reg ); + + if ( SUCCESS != dl_error_hold->Read() ) + { + PRDF_ERR( PRDF_FUNC "Read() Failed on DL%d_ERROR_HOLD: " + "i_chip=0x%08x", i_dl, i_chip->getHuid() ); + break; + } + + if ( dl_error_hold->IsBitSet(53) || + dl_error_hold->IsBitSet(55) || + dl_error_hold->IsBitSet(57) || + dl_error_hold->IsBitSet(58) || + dl_error_hold->IsBitSet(59) || + dl_error_hold->IsBitSet(60) || + dl_error_hold->IsBitSet(62) || + dl_error_hold->IsBitSet(63) ) + { + // Get and callout the OMI target + TargetHandle_t omi = getConnectedChild( i_chip->getTrgt(), TYPE_OMI, + i_dl ); + io_sc.service_data->SetCallout( omi ); + } + else if ( dl_error_hold->IsBitSet(54) || + dl_error_hold->IsBitSet(56) || + dl_error_hold->IsBitSet(61) ) + { + // callout the OMI target, the OMI bus, and the OCMB + // Return PRD_SCAN_COMM_REGISTER_ZERO so the rule code makes + // the appropriate callout. + rc = PRD_SCAN_COMM_REGISTER_ZERO; + } + + }while(0); + + return rc; + + #undef PRDF_FUNC +} + +#define DL_FATAL_ERROR_PLUGIN( POS ) \ +int32_t DlFatalError_##POS( ExtensibleChip * i_chip, \ + STEP_CODE_DATA_STRUCT & io_sc ) \ +{ \ + return DlFatalError( i_chip, io_sc, POS ); \ +} \ +PRDF_PLUGIN_DEFINE( axone_omic, DlFatalError_##POS ); + +DL_FATAL_ERROR_PLUGIN( 0 ); +DL_FATAL_ERROR_PLUGIN( 1 ); +DL_FATAL_ERROR_PLUGIN( 2 ); + +} // end namespace axone_omic + +} // end namespace PRDF diff --git a/src/usr/diag/prdf/common/plat/axone/prdf_plat_axone.mk b/src/usr/diag/prdf/common/plat/axone/prdf_plat_axone.mk index ea76f9121..24acb5bb6 100644 --- a/src/usr/diag/prdf/common/plat/axone/prdf_plat_axone.mk +++ b/src/usr/diag/prdf/common/plat/axone/prdf_plat_axone.mk @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2016,2018 +# Contributors Listed Below - COPYRIGHT 2016,2019 # [+] International Business Machines Corp. # # @@ -37,5 +37,7 @@ prd_incpath += ${PRD_SRC_PATH}/common/plat/axone # Object files common to both FSP and Hostboot ################################################################################ -# plat/cumulus/ (rule plugin related) +# plat/axone/ (rule plugin related) +prd_rule_plugin += prdfMccPlugins.o +prd_rule_plugin += prdfOmicPlugins.o diff --git a/src/usr/diag/prdf/common/plat/cumulus/cumulus_mc_regs.rule b/src/usr/diag/prdf/common/plat/cumulus/cumulus_mc_regs.rule index 50a0170c2..027a0c08c 100644 --- a/src/usr/diag/prdf/common/plat/cumulus/cumulus_mc_regs.rule +++ b/src/usr/diag/prdf/common/plat/cumulus/cumulus_mc_regs.rule @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2018 +# Contributors Listed Below - COPYRIGHT 2018,2019 # [+] International Business Machines Corp. # # @@ -50,3 +50,20 @@ capture group default; }; + ############################################################################ + # PCB Slave Error Regs + ############################################################################ + + register MC_ERROR_REG + { + name "MC PCB Slave error reg"; + scomaddr 0x070F001F; + capture group PllFIRs; + }; + + register MC_CONFIG_REG + { + name "MC PCB Slave config reg"; + scomaddr 0x070F001E; + capture group PllFIRs; + }; diff --git a/src/usr/diag/prdf/common/plat/cumulus/cumulus_obus.rule b/src/usr/diag/prdf/common/plat/cumulus/cumulus_obus.rule index 8c950bbc7..7275e26a3 100644 --- a/src/usr/diag/prdf/common/plat/cumulus/cumulus_obus.rule +++ b/src/usr/diag/prdf/common/plat/cumulus/cumulus_obus.rule @@ -469,12 +469,12 @@ group gIOOLFIR /** IOOLFIR[8] * link0 nak received */ - (rIOOLFIR, bit(8)) ? defaultMaskedError; + (rIOOLFIR, bit(8)) ? threshold_and_mask_self_non_smp_only; /** IOOLFIR[9] * link1 nak received */ - (rIOOLFIR, bit(9)) ? defaultMaskedError; + (rIOOLFIR, bit(9)) ? threshold_and_mask_self_non_smp_only; /** IOOLFIR[10] * link0 replay buffer full @@ -499,22 +499,22 @@ group gIOOLFIR /** IOOLFIR[14] * link0 sl ecc correctable */ - (rIOOLFIR, bit(14)) ? threshold_and_mask_self; + (rIOOLFIR, bit(14)) ? threshold_and_mask_self_smp_only; /** IOOLFIR[15] * link1 sl ecc correctable */ - (rIOOLFIR, bit(15)) ? threshold_and_mask_self; + (rIOOLFIR, bit(15)) ? threshold_and_mask_self_smp_only; /** IOOLFIR[16] * link0 sl ecc ue */ - (rIOOLFIR, bit(16)) ? threshold_and_mask_self; + (rIOOLFIR, bit(16)) ? threshold_and_mask_self_smp_only; /** IOOLFIR[17] * link1 sl ecc ue */ - (rIOOLFIR, bit(17)) ? threshold_and_mask_self; + (rIOOLFIR, bit(17)) ? threshold_and_mask_self_smp_only; /** IOOLFIR[18] * link0 retrain threshold @@ -597,12 +597,12 @@ group gIOOLFIR (rIOOLFIR, bit(33)) ? defaultMaskedError; /** IOOLFIR[34] - * link0 num replay + * link0 num replay or no forward progress */ (rIOOLFIR, bit(34)) ? defaultMaskedError; /** IOOLFIR[35] - * link1 num replay + * link1 num replay or no forward progress */ (rIOOLFIR, bit(35)) ? defaultMaskedError; @@ -619,12 +619,12 @@ group gIOOLFIR /** IOOLFIR[38] * link0 prbs select error */ - (rIOOLFIR, bit(38)) ? threshold_and_mask_self; + (rIOOLFIR, bit(38)) ? threshold_and_mask_self_smp_only; /** IOOLFIR[39] * link1 prbs select error */ - (rIOOLFIR, bit(39)) ? threshold_and_mask_self; + (rIOOLFIR, bit(39)) ? threshold_and_mask_self_smp_only; /** IOOLFIR[40] * link0 tcomplete bad @@ -639,102 +639,102 @@ group gIOOLFIR /** IOOLFIR[42] * link0 no spare lane available */ - (rIOOLFIR, bit(42)) ? obusSmpCallout_L0; + (rIOOLFIR, bit(42)) ? obusSmpCallout_L0_smp_only; /** IOOLFIR[43] * link1 no spare lane available */ - (rIOOLFIR, bit(43)) ? obusSmpCallout_L1; + (rIOOLFIR, bit(43)) ? obusSmpCallout_L1_smp_only; /** IOOLFIR[44] - * link0 spare done + * link0 spare done or degraded mode */ - (rIOOLFIR, bit(44)) ? obusSmpCallout_th32_L0; + (rIOOLFIR, bit(44)) ? spare_lane_degraded_mode_L0; /** IOOLFIR[45] - * link1 spare done + * link1 spare done or degraded mode */ - (rIOOLFIR, bit(45)) ? obusSmpCallout_th32_L1; + (rIOOLFIR, bit(45)) ? spare_lane_degraded_mode_L1; /** IOOLFIR[46] * link0 too many crc errors */ - (rIOOLFIR, bit(46)) ? obusSmpCallout_L0; + (rIOOLFIR, bit(46)) ? obusSmpCallout_L0_smp_only; /** IOOLFIR[47] * link1 too many crc errors */ - (rIOOLFIR, bit(47)) ? obusSmpCallout_L1; + (rIOOLFIR, bit(47)) ? obusSmpCallout_L1_smp_only; /** IOOLFIR[48] - * link0 npu error + * link0 npu error or orx otx dlx errors */ (rIOOLFIR, bit(48)) ? threshold_and_mask_self; /** IOOLFIR[49] - * link1 npu error + * link1 npu error or orx otx dlx errors */ (rIOOLFIR, bit(49)) ? threshold_and_mask_self; /** IOOLFIR[50] * linkx npu error */ - (rIOOLFIR, bit(50)) ? threshold_and_mask_self; + (rIOOLFIR, bit(50)) ? threshold_and_mask_self_smp_only; /** IOOLFIR[51] * osc switch */ - (rIOOLFIR, bit(51)) ? threshold_and_mask_self; + (rIOOLFIR, bit(51)) ? threshold_and_mask_self_smp_only; /** IOOLFIR[52] * link0 correctable array error */ - (rIOOLFIR, bit(52)) ? obusSmpCallout_th32_L0; + (rIOOLFIR, bit(52)) ? self_th_32perDay; /** IOOLFIR[53] * link1 correctable array error */ - (rIOOLFIR, bit(53)) ? obusSmpCallout_th32_L1; + (rIOOLFIR, bit(53)) ? self_th_32perDay; /** IOOLFIR[54] * link0 uncorrectable array error */ - (rIOOLFIR, bit(54)) ? obusSmpFailure_L0; + (rIOOLFIR, bit(54)) ? self_th_1; /** IOOLFIR[55] * link1 uncorrectable array error */ - (rIOOLFIR, bit(55)) ? obusSmpFailure_L1; + (rIOOLFIR, bit(55)) ? self_th_1; /** IOOLFIR[56] * link0 training failed */ - (rIOOLFIR, bit(56)) ? obusSmpFailure_L0; + (rIOOLFIR, bit(56)) ? training_failure_L0; /** IOOLFIR[57] * link1 training failed */ - (rIOOLFIR, bit(57)) ? obusSmpFailure_L1; + (rIOOLFIR, bit(57)) ? training_failure_L1; /** IOOLFIR[58] * link0 unrecoverable error */ - (rIOOLFIR, bit(58)) ? obusSmpFailure_L0; + (rIOOLFIR, bit(58)) ? unrecoverable_error_L0; /** IOOLFIR[59] * link1 unrecoverable error */ - (rIOOLFIR, bit(59)) ? obusSmpFailure_L1; + (rIOOLFIR, bit(59)) ? unrecoverable_error_L1; /** IOOLFIR[60] * link0 internal error */ - (rIOOLFIR, bit(60)) ? obusSmpFailure_L0; + (rIOOLFIR, bit(60)) ? internal_error_L0; /** IOOLFIR[61] * link1 internal error */ - (rIOOLFIR, bit(61)) ? obusSmpFailure_L1; + (rIOOLFIR, bit(61)) ? internal_error_L1; /** IOOLFIR[62] * fir scom err dup diff --git a/src/usr/diag/prdf/common/plat/cumulus/cumulus_phb.rule b/src/usr/diag/prdf/common/plat/cumulus/cumulus_phb.rule index 9c8dcce38..88c917458 100644 --- a/src/usr/diag/prdf/common/plat/cumulus/cumulus_phb.rule +++ b/src/usr/diag/prdf/common/plat/cumulus/cumulus_phb.rule @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2016,2018 +# Contributors Listed Below - COPYRIGHT 2016,2019 # [+] International Business Machines Corp. # # @@ -212,7 +212,7 @@ group gPHBNFIR /** PHBNFIR[0] * BAR Parity Error */ - (rPHBNFIR, bit(0)) ? self_th_1; + (rPHBNFIR, bit(0)) ? parent_proc_th_1; /** PHBNFIR[1] * Parity Errors on Registers besides BAR @@ -252,12 +252,12 @@ group gPHBNFIR /** PHBNFIR[8] * Register Array Parity Error */ - (rPHBNFIR, bit(8)) ? self_th_1; + (rPHBNFIR, bit(8)) ? parent_proc_th_1; /** PHBNFIR[9] * Power Bus Interface Parity Error */ - (rPHBNFIR, bit(9)) ? self_th_1; + (rPHBNFIR, bit(9)) ? parent_proc_th_1; /** PHBNFIR[10] * Power Bus Data Hang @@ -297,7 +297,7 @@ group gPHBNFIR /** PHBNFIR[17] * Hardware Error */ - (rPHBNFIR, bit(17)) ? self_th_1; + (rPHBNFIR, bit(17)) ? parent_proc_th_1; /** PHBNFIR[18] * Unsolicited Power Bus Data diff --git a/src/usr/diag/prdf/common/plat/cumulus/cumulus_proc.rule b/src/usr/diag/prdf/common/plat/cumulus/cumulus_proc.rule index 187cd2a44..ae8e6bb80 100644 --- a/src/usr/diag/prdf/common/plat/cumulus/cumulus_proc.rule +++ b/src/usr/diag/prdf/common/plat/cumulus/cumulus_proc.rule @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2016,2018 +# Contributors Listed Below - COPYRIGHT 2016,2019 # [+] International Business Machines Corp. # # @@ -2893,7 +2893,7 @@ group gNXCQFIR /** NXCQFIR[19] * Uncorrectable error on ERAT arrays */ - (rNXCQFIR, bit(19)) ? nx_th_32perDay; + (rNXCQFIR, bit(19)) ? nx_th_1; /** NXCQFIR[20] * SUE on ERAT arrays diff --git a/src/usr/diag/prdf/common/plat/cumulus/cumulus_proc_actions.rule b/src/usr/diag/prdf/common/plat/cumulus/cumulus_proc_actions.rule index 26d62e95f..91298d653 100644 --- a/src/usr/diag/prdf/common/plat/cumulus/cumulus_proc_actions.rule +++ b/src/usr/diag/prdf/common/plat/cumulus/cumulus_proc_actions.rule @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2018 +# Contributors Listed Below - COPYRIGHT 2018,2019 # [+] International Business Machines Corp. # # @@ -23,6 +23,12 @@ # # IBM_PROLOG_END_TAG +################################################################################ +# Analyze +################################################################################ + +actionclass analyzeENHCAFIR { analyze(gENHCAFIR); }; + ############################################################################### # Analyze connected ############################################################################### diff --git a/src/usr/diag/prdf/common/plat/explorer/explorer_ocmb.rule b/src/usr/diag/prdf/common/plat/explorer/explorer_ocmb.rule index 1abd08c96..c1e5c15a8 100644 --- a/src/usr/diag/prdf/common/plat/explorer/explorer_ocmb.rule +++ b/src/usr/diag/prdf/common/plat/explorer/explorer_ocmb.rule @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2018 +# Contributors Listed Below - COPYRIGHT 2018,2019 # [+] International Business Machines Corp. # # @@ -44,82 +44,82 @@ chip explorer_ocmb ############################################################################# ############################################################################ - # MB Chiplet FIR + # OCMB Chiplet FIR ############################################################################ - register MB_CHIPLET_CS_FIR + register OCMB_CHIPLET_CS_FIR { - name "MB Chiplet Checkstop FIR"; + name "OCMB Chiplet Checkstop FIR"; scomaddr 0x08040000; capture group default; }; - register MB_CHIPLET_RE_FIR + register OCMB_CHIPLET_RE_FIR { - name "MB Chiplet Recoverable FIR"; + name "OCMB Chiplet Recoverable FIR"; scomaddr 0x08040001; capture group default; }; - register MB_CHIPLET_FIR_MASK + register OCMB_CHIPLET_FIR_MASK { - name "MB Chiplet FIR MASK"; + name "OCMB Chiplet FIR MASK"; scomaddr 0x08040002; capture group default; }; ############################################################################ - # MB Chiplet Special Attention FIR + # OCMB Chiplet Special Attention FIR ############################################################################ - register MB_CHIPLET_SPA_FIR + register OCMB_CHIPLET_SPA_FIR { - name "MB Chiplet Special Attention FIR"; + name "OCMB Chiplet Special Attention FIR"; scomaddr 0x08040004; capture group default; }; - register MB_CHIPLET_SPA_FIR_MASK + register OCMB_CHIPLET_SPA_FIR_MASK { - name "MB Chiplet Special Attention FIR MASK"; + name "OCMB Chiplet Special Attention FIR MASK"; scomaddr 0x08040007; capture group default; }; ############################################################################ - # Explorer chip MB_LFIR + # Explorer chip OCMB_LFIR ############################################################################ - register MB_LFIR + register OCMB_LFIR { - name "Explorer chip MB_LFIR"; + name "Explorer chip OCMB_LFIR"; scomaddr 0x0804000a; reset (&, 0x0804000b); mask (|, 0x0804000f); capture group default; }; - register MB_LFIR_MASK + register OCMB_LFIR_MASK { - name "Explorer chip MB_LFIR MASK"; + name "Explorer chip OCMB_LFIR MASK"; scomaddr 0x0804000d; capture group default; }; - register MB_LFIR_ACT0 + register OCMB_LFIR_ACT0 { - name "Explorer chip MB_LFIR ACT0"; + name "Explorer chip OCMB_LFIR ACT0"; scomaddr 0x08040010; capture group default; - capture req nonzero("MB_LFIR"); + capture req nonzero("OCMB_LFIR"); }; - register MB_LFIR_ACT1 + register OCMB_LFIR_ACT1 { - name "Explorer chip MB_LFIR ACT1"; + name "Explorer chip OCMB_LFIR ACT1"; scomaddr 0x08040011; capture group default; - capture req nonzero("MB_LFIR"); + capture req nonzero("OCMB_LFIR"); }; ############################################################################ @@ -355,174 +355,261 @@ chip explorer_ocmb ############################################################################## ################################################################################ -# MB Chiplet FIR +# OCMB Chiplet FIR ################################################################################ -rule rMB_CHIPLET_FIR +rule rOCMB_CHIPLET_FIR { UNIT_CS: - MB_CHIPLET_CS_FIR & ~MB_CHIPLET_FIR_MASK & `1fffffffffffffff`; + OCMB_CHIPLET_CS_FIR & ~OCMB_CHIPLET_FIR_MASK & `1fffffffffffffff`; RECOVERABLE: - (MB_CHIPLET_RE_FIR >> 2) & ~MB_CHIPLET_FIR_MASK & `1fffffffffffffff`; + (OCMB_CHIPLET_RE_FIR >> 2) & ~OCMB_CHIPLET_FIR_MASK & `1fffffffffffffff`; }; -group gMB_CHIPLET_FIR attntype CHECK_STOP, RECOVERABLE +# NOTE: RDFFIR[14|34] are possible side effects of OCMB_LFIR[38], as such, +# OCMB_LFIR must be analyzed first for correct handling. If changes are +# made so the RDFFIR is analyzed first, additional changes to the handling +# of those bits will be required. +group gOCMB_CHIPLET_FIR attntype UNIT_CS, RECOVERABLE filter singlebit { - /** MB_CHIPLET_FIR[3] - * Attention from MB_LFIR + /** OCMB_CHIPLET_FIR[3] + * Attention from OCMB_LFIR */ - (rMB_CHIPLET_FIR, bit(3)) ? analyzeMB_LFIR; + (rOCMB_CHIPLET_FIR, bit(3)) ? analyzeOCMB_LFIR; - /** MB_CHIPLET_FIR[4] + /** OCMB_CHIPLET_FIR[4] * Attention from MMIOFIR */ - (rMB_CHIPLET_FIR, bit(4)) ? analyzeMMIOFIR; + (rOCMB_CHIPLET_FIR, bit(4)) ? analyzeMMIOFIR; - /** MB_CHIPLET_FIR[7] + /** OCMB_CHIPLET_FIR[7] * Attention from SRQFIR */ - (rMB_CHIPLET_FIR, bit(7)) ? analyzeSRQFIR; + (rOCMB_CHIPLET_FIR, bit(7)) ? analyzeSRQFIR; - /** MB_CHIPLET_FIR[8] + /** OCMB_CHIPLET_FIR[8] * Attention from MCBISTFIR */ - (rMB_CHIPLET_FIR, bit(8)) ? analyzeMCBISTFIR; + (rOCMB_CHIPLET_FIR, bit(8)) ? analyzeMCBISTFIR; - /** MB_CHIPLET_FIR[9] + /** OCMB_CHIPLET_FIR[9] * Attention from RDFFIR */ - (rMB_CHIPLET_FIR, bit(9)) ? analyzeRDFFIR; + (rOCMB_CHIPLET_FIR, bit(9)) ? analyzeRDFFIR; - /** MB_CHIPLET_FIR[11] + /** OCMB_CHIPLET_FIR[11] * Attention from TLXFIR */ - (rMB_CHIPLET_FIR, bit(11)) ? analyzeTLXFIR; + (rOCMB_CHIPLET_FIR, bit(11)) ? analyzeTLXFIR; - /** MB_CHIPLET_FIR[12] + /** OCMB_CHIPLET_FIR[12] * Attention from OMIDLFIR */ - (rMB_CHIPLET_FIR, bit(12)) ? analyzeOMIDLFIR; + (rOCMB_CHIPLET_FIR, bit(12)) ? analyzeOMIDLFIR; }; ################################################################################ -# MB Chiplet Special Attention FIR +# OCMB Chiplet Special Attention FIR ################################################################################ -rule rMB_CHIPLET_SPA_FIR +rule rOCMB_CHIPLET_SPA_FIR { HOST_ATTN: - MB_CHIPLET_SPA_FIR & ~MB_CHIPLET_SPA_FIR_MASK; + OCMB_CHIPLET_SPA_FIR & ~OCMB_CHIPLET_SPA_FIR_MASK; }; -group gMB_CHIPLET_SPA_FIR attntype HOST_ATTN +group gOCMB_CHIPLET_SPA_FIR attntype HOST_ATTN filter singlebit { - /** MB_CHIPLET_SPA_FIR[1] + /** OCMB_CHIPLET_SPA_FIR[1] * Attention from MMIOFIR */ - (rMB_CHIPLET_SPA_FIR, bit(1)) ? analyzeMMIOFIR; + (rOCMB_CHIPLET_SPA_FIR, bit(1)) ? analyzeMMIOFIR; - /** MB_CHIPLET_SPA_FIR[4] + /** OCMB_CHIPLET_SPA_FIR[4] * Attention from SRQFIR */ - (rMB_CHIPLET_SPA_FIR, bit(4)) ? analyzeSRQFIR; + (rOCMB_CHIPLET_SPA_FIR, bit(4)) ? analyzeSRQFIR; - /** MB_CHIPLET_SPA_FIR[5] + /** OCMB_CHIPLET_SPA_FIR[5] * Attention from MCBISTFIR */ - (rMB_CHIPLET_SPA_FIR, bit(5)) ? analyzeMCBISTFIR; + (rOCMB_CHIPLET_SPA_FIR, bit(5)) ? analyzeMCBISTFIR; - /** MB_CHIPLET_SPA_FIR[6] + /** OCMB_CHIPLET_SPA_FIR[6] * Attention from RDFFIR */ - (rMB_CHIPLET_SPA_FIR, bit(6)) ? analyzeRDFFIR; + (rOCMB_CHIPLET_SPA_FIR, bit(6)) ? analyzeRDFFIR; - /** MB_CHIPLET_SPA_FIR[8] + /** OCMB_CHIPLET_SPA_FIR[8] * Attention from TLXFIR */ - (rMB_CHIPLET_SPA_FIR, bit(8)) ? analyzeTLXFIR; + (rOCMB_CHIPLET_SPA_FIR, bit(8)) ? analyzeTLXFIR; - /** MB_CHIPLET_SPA_FIR[9] + /** OCMB_CHIPLET_SPA_FIR[9] * Attention from OMIDLFIR */ - (rMB_CHIPLET_SPA_FIR, bit(9)) ? analyzeOMIDLFIR; + (rOCMB_CHIPLET_SPA_FIR, bit(9)) ? analyzeOMIDLFIR; }; ################################################################################ -# Explorer chip MB_LFIR +# Explorer chip OCMB_LFIR ################################################################################ -rule rMB_LFIR +rule rOCMB_LFIR { UNIT_CS: - MB_LFIR & ~MB_LFIR_MASK & ~MB_LFIR_ACT0 & ~MB_LFIR_ACT1; + OCMB_LFIR & ~OCMB_LFIR_MASK & ~OCMB_LFIR_ACT0 & ~OCMB_LFIR_ACT1; RECOVERABLE: - MB_LFIR & ~MB_LFIR_MASK & ~MB_LFIR_ACT0 & MB_LFIR_ACT1; - HOST_ATTN: - MB_LFIR & ~MB_LFIR_MASK & MB_LFIR_ACT0 & ~MB_LFIR_ACT1; + OCMB_LFIR & ~OCMB_LFIR_MASK & ~OCMB_LFIR_ACT0 & OCMB_LFIR_ACT1; }; -group gMB_LFIR +group gOCMB_LFIR filter singlebit, cs_root_cause { - /** MB_LFIR[0] + /** OCMB_LFIR[0] * CFIR access PCB error */ - (rMB_LFIR, bit(0)) ? defaultMaskedError; + (rOCMB_LFIR, bit(0)) ? self_th_32perDay; - /** MB_LFIR[1] + /** OCMB_LFIR[1] * CFIR internal parity error */ - (rMB_LFIR, bit(1)) ? defaultMaskedError; + (rOCMB_LFIR, bit(1)) ? self_th_32perDay; - /** MB_LFIR[2] + /** OCMB_LFIR[2] * LFIR internal parity error */ - (rMB_LFIR, bit(2)) ? defaultMaskedError; + (rOCMB_LFIR, bit(2)) ? self_th_32perDay; - /** MB_LFIR[3] + /** OCMB_LFIR[3] * Debug scom satellite error */ - (rMB_LFIR, bit(3)) ? defaultMaskedError; + (rOCMB_LFIR, bit(3)) ? defaultMaskedError; - /** MB_LFIR[4] + /** OCMB_LFIR[4] * PSCOM Logic: PCB Access Error */ - (rMB_LFIR, bit(4)) ? defaultMaskedError; + (rOCMB_LFIR, bit(4)) ? defaultMaskedError; - /** MB_LFIR[5] + /** OCMB_LFIR[5] * PSCOM Logic: Summarized internal errors */ - (rMB_LFIR, bit(5)) ? defaultMaskedError; + (rOCMB_LFIR, bit(5)) ? defaultMaskedError; - /** MB_LFIR[6] + /** OCMB_LFIR[6] * Trace Logic : Scom Satellite Error - Trace0 */ - (rMB_LFIR, bit(6)) ? defaultMaskedError; + (rOCMB_LFIR, bit(6)) ? defaultMaskedError; - /** MB_LFIR[7] + /** OCMB_LFIR[7] * Trace Logic : Scom Satellite Error - Trace1 */ - (rMB_LFIR, bit(7)) ? defaultMaskedError; + (rOCMB_LFIR, bit(7)) ? defaultMaskedError; - /** MB_LFIR[8] - * unused + /** OCMB_LFIR[8] + * PIB2GIF parity error on FSM or Registers */ - (rMB_LFIR, bit(8)) ? defaultMaskedError; + (rOCMB_LFIR, bit(8)) ? self_th_32perDay; - /** MB_LFIR[9] + /** OCMB_LFIR[9] * MSG access PCB error */ - (rMB_LFIR, bit(9)) ? defaultMaskedError; + (rOCMB_LFIR, bit(9)) ? defaultMaskedError; + + /** OCMB_LFIR[10:18] + * unused + */ + (rOCMB_LFIR, bit(10|11|12|13|14|15|16|17|18)) ? defaultMaskedError; + + /** OCMB_LFIR[19] + * DLL IRQ + */ + (rOCMB_LFIR, bit(19)) ? defaultMaskedError; + + /** OCMB_LFIR[20] + * Watchdog timer interrupt + */ + (rOCMB_LFIR, bit(20)) ? self_th_1; + + /** OCMB_LFIR[21] + * internal temp sensor tripped a threshold + */ + (rOCMB_LFIR, bit(21)) ? defaultMaskedError; + + /** OCMB_LFIR[22] + * GPBC_FATAL_ERROR + */ + (rOCMB_LFIR, bit(22)) ? self_th_1; + + /** OCMB_LFIR[23] + * GPBC_NON_FATAL_ERROR + */ + (rOCMB_LFIR, bit(23)) ? self_th_1; + + /** OCMB_LFIR[24] + * early power off warning + */ + (rOCMB_LFIR, bit(24)) ? defaultMaskedError; + + /** OCMB_LFIR[25] + * TOP fatal interrupts + */ + (rOCMB_LFIR, bit(25)) ? self_th_1; + + /** OCMB_LFIR[26] + * TOP non fatal interrupts + */ + (rOCMB_LFIR, bit(26)) ? level2_M_self_L_th_1; + + /** OCMB_LFIR[27:34] + * Interrupt from OPSe to OCMB + */ + (rOCMB_LFIR, bit(27|28|29|30|31|32|33|34)) ? defaultMaskedError; + + /** OCMB_LFIR[35] + * DDR thermal event + */ + (rOCMB_LFIR, bit(35)) ? defaultMaskedError; + + /** OCMB_LFIR[36] + * DDR4 PHY fatal + */ + (rOCMB_LFIR, bit(36)) ? self_th_1; + + /** OCMB_LFIR[37] + * DDR4 PHY non fatal + */ + (rOCMB_LFIR, bit(37)) ? self_th_32perDay; + + /** OCMB_LFIR[38] + * DDR4 PHY interrupt + */ + (rOCMB_LFIR, bit(38)) ? ddr4_phy_interrupt; - /** MB_LFIR[10:62] - * bits from the microsemi message register (0 to 52) + /** OCMB_LFIR[39:46] + * foxhound fatal */ - (rMB_LFIR, bit(10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37|38|39|40|41|42|43|44|45|46|47|48|49|50|51|52|53|54|55|56|57|58|59|60|61|62)) ? defaultMaskedError; + (rOCMB_LFIR, bit(39|40|41|42|43|44|45|46)) ? foxhound_fatal; + + /** OCMB_LFIR[47:54] + * foxhound non fatal + */ + (rOCMB_LFIR, bit(47|48|49|50|51|52|53|54)) ? defaultMaskedError; + + /** OCMB_LFIR[55:62] + * foxhound serdes interrupt + */ + (rOCMB_LFIR, bit(55|56|57|58|59|60|61|62)) ? defaultMaskedError; + + /** OCMB_LFIR[63] + * GIF2PCB parity error on FSM or Registers + */ + (rOCMB_LFIR, bit(63)) ? self_th_32perDay; }; @@ -557,27 +644,27 @@ group gMMIOFIR /** MMIOFIR[2] * SCOM err */ - (rMMIOFIR, bit(2)) ? defaultMaskedError; + (rMMIOFIR, bit(2)) ? self_th_32perDay; /** MMIOFIR[3] - * FSM err + * FSM perr */ - (rMMIOFIR, bit(3)) ? defaultMaskedError; + (rMMIOFIR, bit(3)) ? self_th_1; /** MMIOFIR[4] * FIFO overflow */ - (rMMIOFIR, bit(4)) ? defaultMaskedError; + (rMMIOFIR, bit(4)) ? self_th_1; /** MMIOFIR[5] * Ctl reg parity err */ - (rMMIOFIR, bit(5)) ? defaultMaskedError; + (rMMIOFIR, bit(5)) ? self_th_1; /** MMIOFIR[6] * Info reg parity error */ - (rMMIOFIR, bit(6)) ? defaultMaskedError; + (rMMIOFIR, bit(6)) ? self_th_1; /** MMIOFIR[7] * SNSC both starts err @@ -622,22 +709,22 @@ rule rSRQFIR group gSRQFIR filter singlebit, - cs_root_cause + cs_root_cause(18) { /** SRQFIR[0] * SRQ recoverable error */ - (rSRQFIR, bit(0)) ? defaultMaskedError; + (rSRQFIR, bit(0)) ? mem_port_th_1; /** SRQFIR[1] * SRQ nonrecoverable error */ - (rSRQFIR, bit(1)) ? defaultMaskedError; + (rSRQFIR, bit(1)) ? mem_port_th_1; /** SRQFIR[2] * Refresh overrun */ - (rSRQFIR, bit(2)) ? defaultMaskedError; + (rSRQFIR, bit(2)) ? mem_port_th_32perDay; /** SRQFIR[3] * WAT error @@ -647,12 +734,12 @@ group gSRQFIR /** SRQFIR[4] * RCD parity error */ - (rSRQFIR, bit(4)) ? defaultMaskedError; + (rSRQFIR, bit(4)) ? srq_rcd_parity_error; /** SRQFIR[5] * MCB logic error */ - (rSRQFIR, bit(5)) ? defaultMaskedError; + (rSRQFIR, bit(5)) ? mem_port_th_1; /** SRQFIR[6] * Emergency throttle @@ -662,7 +749,7 @@ group gSRQFIR /** SRQFIR[7] * NCF MCB parity error */ - (rSRQFIR, bit(7)) ? defaultMaskedError; + (rSRQFIR, bit(7)) ? mem_port_th_1; /** SRQFIR[8] * DDR MBA event n @@ -672,82 +759,82 @@ group gSRQFIR /** SRQFIR[9] * WRQ RRQ hang err */ - (rSRQFIR, bit(9)) ? defaultMaskedError; + (rSRQFIR, bit(9)) ? mem_port_th_1; /** SRQFIR[10] * SM one hot error */ - (rSRQFIR, bit(10)) ? defaultMaskedError; + (rSRQFIR, bit(10)) ? mem_port_th_1; /** SRQFIR[11] * Reg parity error */ - (rSRQFIR, bit(11)) ? defaultMaskedError; + (rSRQFIR, bit(11)) ? mem_port_th_1; /** SRQFIR[12] * Cmd parity error */ - (rSRQFIR, bit(12)) ? defaultMaskedError; + (rSRQFIR, bit(12)) ? mem_port_th_1; /** SRQFIR[13] * Port fail */ - (rSRQFIR, bit(13)) ? defaultMaskedError; + (rSRQFIR, bit(13)) ? mem_port_failure; /** SRQFIR[14] - * Spare + * informational register parity error bit */ - (rSRQFIR, bit(14)) ? defaultMaskedError; + (rSRQFIR, bit(14)) ? threshold_and_mask_mem_port; /** SRQFIR[15] * Debug parity error */ - (rSRQFIR, bit(15)) ? defaultMaskedError; + (rSRQFIR, bit(15)) ? threshold_and_mask_mem_port; /** SRQFIR[16] * WDF unrecoverable mainline error */ - (rSRQFIR, bit(16)) ? defaultMaskedError; + (rSRQFIR, bit(16)) ? mem_port_th_1; /** SRQFIR[17] * WDF mmio error */ - (rSRQFIR, bit(17)) ? defaultMaskedError; + (rSRQFIR, bit(17)) ? mem_port_th_1; /** SRQFIR[18] * WDF array UE on mainline operations (SUE put in mem) */ - (rSRQFIR, bit(18)) ? defaultMaskedError; + (rSRQFIR, bit(18)) ? mem_port_th_1_UERE; /** SRQFIR[19] * WDF mainline dataflow error (SUE not reliably put in mem) */ - (rSRQFIR, bit(19)) ? defaultMaskedError; + (rSRQFIR, bit(19)) ? mem_port_th_1; /** SRQFIR[20] * WDF scom register parity err, affecting mainline config */ - (rSRQFIR, bit(20)) ? defaultMaskedError; + (rSRQFIR, bit(20)) ? mem_port_th_1; /** SRQFIR[21] * WDF scom register parity err, affecting scom ops only */ - (rSRQFIR, bit(21)) ? defaultMaskedError; + (rSRQFIR, bit(21)) ? mem_port_th_1; /** SRQFIR[22] * WDF SCOM fsm parity error */ - (rSRQFIR, bit(22)) ? defaultMaskedError; + (rSRQFIR, bit(22)) ? mem_port_th_1; /** SRQFIR[23] * WDF write buffer array CE */ - (rSRQFIR, bit(23)) ? defaultMaskedError; + (rSRQFIR, bit(23)) ? mem_port_th_32perDay; /** SRQFIR[24] * NCF UE */ - (rSRQFIR, bit(24)) ? defaultMaskedError; + (rSRQFIR, bit(24)) ? mem_port_th_1; /** SRQFIR[25] * TBD @@ -757,17 +844,17 @@ group gSRQFIR /** SRQFIR[26] * NCF logic error */ - (rSRQFIR, bit(26)) ? defaultMaskedError; + (rSRQFIR, bit(26)) ? mem_port_th_1; /** SRQFIR[27] * NCF parity error */ - (rSRQFIR, bit(27)) ? defaultMaskedError; + (rSRQFIR, bit(27)) ? mem_port_th_1; /** SRQFIR[28] * NCF correctable error */ - (rSRQFIR, bit(28)) ? defaultMaskedError; + (rSRQFIR, bit(28)) ? mem_port_th_32perDay; /** SRQFIR[29] * Internal scom error @@ -807,17 +894,17 @@ group gMCBISTFIR /** MCBISTFIR[1] * Command address timeout */ - (rMCBISTFIR, bit(1)) ? defaultMaskedError; + (rMCBISTFIR, bit(1)) ? self_th_1; /** MCBISTFIR[2] * Internal FSM error */ - (rMCBISTFIR, bit(2)) ? defaultMaskedError; + (rMCBISTFIR, bit(2)) ? self_th_1; /** MCBISTFIR[3] * MCBIST broadcast out of sync */ - (rMCBISTFIR, bit(3)) ? defaultMaskedError; + (rMCBISTFIR, bit(3)) ? self_th_1; /** MCBISTFIR[4] * MCBIST data error @@ -852,7 +939,7 @@ group gMCBISTFIR /** MCBISTFIR[10] * MCBIST program complete */ - (rMCBISTFIR, bit(10)) ? defaultMaskedError; + (rMCBISTFIR, bit(10)) ? mcbist_program_complete; /** MCBISTFIR[11] * MCBIST CCS subtest done @@ -865,14 +952,14 @@ group gMCBISTFIR (rMCBISTFIR, bit(12)) ? defaultMaskedError; /** MCBISTFIR[13] - * SCOM recoverable reg parity error + * SCOM recoverable register parity error */ - (rMCBISTFIR, bit(13)) ? defaultMaskedError; + (rMCBISTFIR, bit(13)) ? self_th_1; /** MCBISTFIR[14] * SCOM fatal reg parity error */ - (rMCBISTFIR, bit(14)) ? defaultMaskedError; + (rMCBISTFIR, bit(14)) ? self_th_1; /** MCBISTFIR[15] * SCOM WAT and debug reg parity error @@ -917,57 +1004,57 @@ rule rRDFFIR group gRDFFIR filter singlebit, - cs_root_cause + cs_root_cause(14,15,17,35,37) { /** RDFFIR[0] * Mainline read MPE on rank 0 */ - (rRDFFIR, bit(0)) ? defaultMaskedError; + (rRDFFIR, bit(0)) ? verify_chip_mark_0; /** RDFFIR[1] * Mainline read MPE on rank 1 */ - (rRDFFIR, bit(1)) ? defaultMaskedError; + (rRDFFIR, bit(1)) ? verify_chip_mark_1; /** RDFFIR[2] * Mainline read MPE on rank 2 */ - (rRDFFIR, bit(2)) ? defaultMaskedError; + (rRDFFIR, bit(2)) ? verify_chip_mark_2; /** RDFFIR[3] - * Maineline read MPE on rank 3 + * Mainline read MPE on rank 3 */ - (rRDFFIR, bit(3)) ? defaultMaskedError; + (rRDFFIR, bit(3)) ? verify_chip_mark_3; /** RDFFIR[4] * Mainline read MPE on rank 4 */ - (rRDFFIR, bit(4)) ? defaultMaskedError; + (rRDFFIR, bit(4)) ? verify_chip_mark_4; /** RDFFIR[5] * Mainline read MPE on rank 5 */ - (rRDFFIR, bit(5)) ? defaultMaskedError; + (rRDFFIR, bit(5)) ? verify_chip_mark_5; /** RDFFIR[6] * Mainline read MPE on rank 6 */ - (rRDFFIR, bit(6)) ? defaultMaskedError; + (rRDFFIR, bit(6)) ? verify_chip_mark_6; /** RDFFIR[7] * Mainline read MPE on rank 7 */ - (rRDFFIR, bit(7)) ? defaultMaskedError; + (rRDFFIR, bit(7)) ? verify_chip_mark_7; /** RDFFIR[8] * Mainline read NCE */ - (rRDFFIR, bit(8)) ? defaultMaskedError; + (rRDFFIR, bit(8)) ? mainline_nce_tce_handling; /** RDFFIR[9] * Mainline read TCE */ - (rRDFFIR, bit(9)) ? defaultMaskedError; + (rRDFFIR, bit(9)) ? mainline_nce_tce_handling; /** RDFFIR[10] * Mainline read SCE @@ -987,27 +1074,27 @@ group gRDFFIR /** RDFFIR[13] * Mainline read AUE */ - (rRDFFIR, bit(13)) ? defaultMaskedError; + (rRDFFIR, bit(13)) ? mainline_aue_iaue_handling; /** RDFFIR[14] * Mainline read UE */ - (rRDFFIR, bit(14)) ? defaultMaskedError; + (rRDFFIR, bit(14)) ? mainline_ue_handling_UERE; /** RDFFIR[15] * Mainline read RCD */ - (rRDFFIR, bit(15)) ? defaultMaskedError; + (rRDFFIR, bit(15)) ? rdf_rcd_parity_error_UERE; /** RDFFIR[16] * Mainline read IAUE */ - (rRDFFIR, bit(16)) ? defaultMaskedError; + (rRDFFIR, bit(16)) ? mainline_aue_iaue_handling; /** RDFFIR[17] * Mainline read IUE */ - (rRDFFIR, bit(17)) ? defaultMaskedError; + (rRDFFIR, bit(17)) ? mainline_iue_handling; /** RDFFIR[18] * Mainline read IRCD @@ -1017,7 +1104,7 @@ group gRDFFIR /** RDFFIR[19] * Mainline read IMPE */ - (rRDFFIR, bit(19)) ? defaultMaskedError; + (rRDFFIR, bit(19)) ? memory_impe_handling; /** RDFFIR[20:27] * Maintenance MPE @@ -1052,7 +1139,7 @@ group gRDFFIR /** RDFFIR[33] * Maintenance AUE */ - (rRDFFIR, bit(33)) ? defaultMaskedError; + (rRDFFIR, bit(33)) ? maintenance_aue_handling; /** RDFFIR[34] * Maintenance UE @@ -1062,72 +1149,72 @@ group gRDFFIR /** RDFFIR[35] * Maintenance RCD */ - (rRDFFIR, bit(35)) ? defaultMaskedError; + (rRDFFIR, bit(35)) ? rdf_rcd_parity_error_UERE; /** RDFFIR[36] * Maintenance IAUE */ - (rRDFFIR, bit(36)) ? defaultMaskedError; + (rRDFFIR, bit(36)) ? maintenance_iaue_handling; /** RDFFIR[37] * Maintenance IUE */ - (rRDFFIR, bit(37)) ? defaultMaskedError; + (rRDFFIR, bit(37)) ? maintenance_iue_handling; /** RDFFIR[38] - * Maintenance IRCD + * Maintenance IRCD */ (rRDFFIR, bit(38)) ? defaultMaskedError; /** RDFFIR[39] * Maintenance IMPE */ - (rRDFFIR, bit(39)) ? defaultMaskedError; + (rRDFFIR, bit(39)) ? memory_impe_handling; /** RDFFIR[40] * RDDATA valid error */ - (rRDFFIR, bit(40)) ? defaultMaskedError; + (rRDFFIR, bit(40)) ? mem_port_th_32perDay; /** RDFFIR[41] * SCOM status register parity error */ - (rRDFFIR, bit(41)) ? defaultMaskedError; + (rRDFFIR, bit(41)) ? threshold_and_mask_mem_port; /** RDFFIR[42] * SCOM recoverable register parity error */ - (rRDFFIR, bit(42)) ? defaultMaskedError; + (rRDFFIR, bit(42)) ? mem_port_th_1; /** RDFFIR[43] * SCOM unrecoverable register parity error */ - (rRDFFIR, bit(43)) ? defaultMaskedError; + (rRDFFIR, bit(43)) ? mem_port_th_1; /** RDFFIR[44] * ECC corrector internal parity error */ - (rRDFFIR, bit(44)) ? defaultMaskedError; + (rRDFFIR, bit(44)) ? mem_port_th_1; /** RDFFIR[45] * Rd Buff ECC CHK Cor CE DW0 Detected */ - (rRDFFIR, bit(45)) ? defaultMaskedError; + (rRDFFIR, bit(45)) ? mem_port_th_32perDay; /** RDFFIR[46] * Rd Buff ECC CHK Cor CE DW1 Detected */ - (rRDFFIR, bit(46)) ? defaultMaskedError; + (rRDFFIR, bit(46)) ? mem_port_th_32perDay; /** RDFFIR[47] * Rd Buff ECC CHK Cor UE DW0 Detected */ - (rRDFFIR, bit(47)) ? defaultMaskedError; + (rRDFFIR, bit(47)) ? mem_port_th_1; /** RDFFIR[48] * Rd Buff ECC CHK Cor UE DW1 Detected */ - (rRDFFIR, bit(48)) ? defaultMaskedError; + (rRDFFIR, bit(48)) ? mem_port_th_1; /** RDFFIR[49:59] * Reserved @@ -1177,67 +1264,67 @@ group gTLXFIR /** TLXFIR[0] * Info reg parity error */ - (rTLXFIR, bit(0)) ? defaultMaskedError; + (rTLXFIR, bit(0)) ? threshold_and_mask_self; /** TLXFIR[1] * Ctrl reg parity error */ - (rTLXFIR, bit(1)) ? defaultMaskedError; + (rTLXFIR, bit(1)) ? self_th_1; /** TLXFIR[2] * TLX VC0 return credit counter overflow */ - (rTLXFIR, bit(2)) ? defaultMaskedError; + (rTLXFIR, bit(2)) ? omi_bus_th_1; /** TLXFIR[3] * TLX VC1 return credit counter overflow */ - (rTLXFIR, bit(3)) ? defaultMaskedError; + (rTLXFIR, bit(3)) ? omi_bus_th_1; /** TLXFIR[4] * TLX dcp0 return credit counter overflow */ - (rTLXFIR, bit(4)) ? defaultMaskedError; + (rTLXFIR, bit(4)) ? omi_bus_th_1; /** TLXFIR[5] * TLX dcp1 return credit counter overflow */ - (rTLXFIR, bit(5)) ? defaultMaskedError; + (rTLXFIR, bit(5)) ? omi_bus_th_1; /** TLXFIR[6] * TLX credit management block error */ - (rTLXFIR, bit(6)) ? defaultMaskedError; + (rTLXFIR, bit(6)) ? self_th_1; /** TLXFIR[7] * TLX credit management block parity error */ - (rTLXFIR, bit(7)) ? defaultMaskedError; + (rTLXFIR, bit(7)) ? self_th_1; /** TLXFIR[8] * TLXT fatal parity error */ - (rTLXFIR, bit(8)) ? defaultMaskedError; + (rTLXFIR, bit(8)) ? self_th_1; /** TLXFIR[9] * TLXT recoverable error */ - (rTLXFIR, bit(9)) ? defaultMaskedError; + (rTLXFIR, bit(9)) ? analyzeTLXERR1; /** TLXFIR[10] * TLXT configuration error */ - (rTLXFIR, bit(10)) ? defaultMaskedError; + (rTLXFIR, bit(10)) ? level2_M_self_L_th_1; /** TLXFIR[11] * TLXT informational parity error */ - (rTLXFIR, bit(11)) ? defaultMaskedError; + (rTLXFIR, bit(11)) ? self_th_1; /** TLXFIR[12] * TLXT hard error */ - (rTLXFIR, bit(12)) ? defaultMaskedError; + (rTLXFIR, bit(12)) ? self_th_1; /** TLXFIR[13:15] * Reserved @@ -1257,47 +1344,47 @@ group gTLXFIR /** TLXFIR[18] * OC malformed */ - (rTLXFIR, bit(18)) ? defaultMaskedError; + (rTLXFIR, bit(18)) ? omi_bus_th_1; /** TLXFIR[19] * OC protocol error */ - (rTLXFIR, bit(19)) ? defaultMaskedError; + (rTLXFIR, bit(19)) ? omi_th_1; /** TLXFIR[20] * Address translate error */ - (rTLXFIR, bit(20)) ? defaultMaskedError; + (rTLXFIR, bit(20)) ? self_th_1; /** TLXFIR[21] * Metadata unc or data parity error */ - (rTLXFIR, bit(21)) ? defaultMaskedError; + (rTLXFIR, bit(21)) ? self_th_1; /** TLXFIR[22] * OC unsupported group 2 */ - (rTLXFIR, bit(22)) ? defaultMaskedError; + (rTLXFIR, bit(22)) ? omi_bus_th_1; /** TLXFIR[23] * OC unsupported group 1 */ - (rTLXFIR, bit(23)) ? defaultMaskedError; + (rTLXFIR, bit(23)) ? omi_bus_th_1; /** TLXFIR[24] * Bit flip control error */ - (rTLXFIR, bit(24)) ? defaultMaskedError; + (rTLXFIR, bit(24)) ? self_th_1; /** TLXFIR[25] * Control HW error */ - (rTLXFIR, bit(25)) ? defaultMaskedError; + (rTLXFIR, bit(25)) ? self_th_1; /** TLXFIR[26] * ECC corrected and others */ - (rTLXFIR, bit(26)) ? defaultMaskedError; + (rTLXFIR, bit(26)) ? self_th_32perDay; /** TLXFIR[27] * Trace stop @@ -1316,6 +1403,37 @@ group gTLXFIR }; +rule rTLX_ERR1_REPORT +{ + RECOVERABLE: + TLX_ERR1_REPORT & ~TLX_ERR1_REPORT_MASK; +}; + +group gTLX_ERR1_REPORT + filter singlebit, + cs_root_cause +{ + /** TLX_ERR1_REPORT[37] + * TLXT FIFO CE + */ + (rTLXFIR, bit(37)) ? self_th_32perDay; + + /** TLX_ERR1_REPORT[39] + * Unexpected Interrupt Response + */ + (rTLXFIR, bit(39)) ? parent_proc_th_32perDay; + + /** TLX_ERR1_REPORT[40] + * BDI Poisoned + */ + (rTLXFIR, bit(40)) ? self_th_1; + + /** TLX_ERR1_REPORT[41] + * TLXT Metadata UE + */ + (rTLXFIR, bit(41)) ? self_th_1; +}; + ################################################################################ # Explorer chip OMIDLFIR ################################################################################ @@ -1335,112 +1453,112 @@ group gOMIDLFIR cs_root_cause { /** OMIDLFIR[0] - * DL0 fatal error + * OMI-DL0 fatal error */ - (rOMIDLFIR, bit(0)) ? defaultMaskedError; + (rOMIDLFIR, bit(0)) ? dl_fatal_error; /** OMIDLFIR[1] - * Dl0 data UE + * OMI-DL0 UE on data flit */ - (rOMIDLFIR, bit(1)) ? defaultMaskedError; + (rOMIDLFIR, bit(1)) ? self_th_1; /** OMIDLFIR[2] - * Dl0 flit CE + * OMI-DL0 CE on TL flit */ - (rOMIDLFIR, bit(2)) ? defaultMaskedError; + (rOMIDLFIR, bit(2)) ? self_th_32perDay; /** OMIDLFIR[3] - * Dl0 CRC error + * OMI-DL0 detected a CRC error */ (rOMIDLFIR, bit(3)) ? defaultMaskedError; /** OMIDLFIR[4] - * DL0 nack + * OMI-DL0 received a nack */ (rOMIDLFIR, bit(4)) ? defaultMaskedError; /** OMIDLFIR[5] - * DL0 X4 mode + * OMI-DL0 running in degraded mode */ - (rOMIDLFIR, bit(5)) ? defaultMaskedError; + (rOMIDLFIR, bit(5)) ? omi_bus_th_1; /** OMIDLFIR[6] - * DL0 EDPL + * OMI-DL0 parity error detection on a lane */ (rOMIDLFIR, bit(6)) ? defaultMaskedError; /** OMIDLFIR[7] - * DL0 timeout + * OMI-DL0 retrained due to no forward progress */ - (rOMIDLFIR, bit(7)) ? defaultMaskedError; + (rOMIDLFIR, bit(7)) ? omi_bus_th_32perDay; /** OMIDLFIR[8] - * DL0 remote retrain + * OMI-DL0 remote side initiated a retrain */ (rOMIDLFIR, bit(8)) ? defaultMaskedError; /** OMIDLFIR[9] - * DL0 error retrain + * OMI-DL0 retrain due to internal error or software initiated */ - (rOMIDLFIR, bit(9)) ? defaultMaskedError; + (rOMIDLFIR, bit(9)) ? omi_bus_th_32perDay; /** OMIDLFIR[10] - * DL0 EDPL retrain + * OMI-DL0 threshold reached */ - (rOMIDLFIR, bit(10)) ? defaultMaskedError; + (rOMIDLFIR, bit(10)) ? omi_bus_th_32perDay; /** OMIDLFIR[11] - * DL0 trained + * OMI-DL0 trained */ (rOMIDLFIR, bit(11)) ? defaultMaskedError; /** OMIDLFIR[12] - * DL0 endpoint bit 0 + * OMI-DL0 endpoint error bit 0 */ (rOMIDLFIR, bit(12)) ? defaultMaskedError; /** OMIDLFIR[13] - * DL0 endpoint bit 1 + * OMI-DL0 endpoint error bit 1 */ (rOMIDLFIR, bit(13)) ? defaultMaskedError; /** OMIDLFIR[14] - * DL0 endpoint bit 2 + * OMI-DL0 endpoint error bit 2 */ (rOMIDLFIR, bit(14)) ? defaultMaskedError; /** OMIDLFIR[15] - * DL0 endpoint bit 3 + * OMI-DL0 endpoint error bit 3 */ (rOMIDLFIR, bit(15)) ? defaultMaskedError; /** OMIDLFIR[16] - * DL0 endpoint bit 4 + * OMI-DL0 endpoint error bit 4 */ (rOMIDLFIR, bit(16)) ? defaultMaskedError; /** OMIDLFIR[17] - * DL0 endpoint bit 5 + * OMI-DL0 endpoint error bit 5 */ (rOMIDLFIR, bit(17)) ? defaultMaskedError; /** OMIDLFIR[18] - * DL0 endpoint bit 6 + * OMI-DL0 endpoint error bit 6 */ (rOMIDLFIR, bit(18)) ? defaultMaskedError; /** OMIDLFIR[19] - * DL0 endpoint bit 7 + * OMI-DL0 endpoint error bit 7 */ (rOMIDLFIR, bit(19)) ? defaultMaskedError; /** OMIDLFIR[20:39] - * DL1 reserved + * OMI-DL1 reserved */ (rOMIDLFIR, bit(20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37|38|39)) ? defaultMaskedError; /** OMIDLFIR[40:59] - * DL2 reserved + * OMI-DL2 reserved */ (rOMIDLFIR, bit(40|41|42|43|44|45|46|47|48|49|50|51|52|53|54|55|56|57|58|59)) ? defaultMaskedError; @@ -1449,6 +1567,21 @@ group gOMIDLFIR */ (rOMIDLFIR, bit(60)) ? defaultMaskedError; + /** OMIDLFIR[61] + * reserved + */ + (rOMIDLFIR, bit(61)) ? defaultMaskedError; + + /** OMIDLFIR[62] + * LFIR internal parity error + */ + (rOMIDLFIR, bit(62)) ? defaultMaskedError; + + /** OMIDLFIR[63] + * SCOM Satellite Error + */ + (rOMIDLFIR, bit(63)) ? defaultMaskedError; + }; ############################################################################## @@ -1463,6 +1596,5 @@ group gOMIDLFIR ############################################################################## # Include the actions defined for this target -.include "p9_common_actions.rule"; .include "explorer_ocmb_actions.rule"; diff --git a/src/usr/diag/prdf/common/plat/explorer/explorer_ocmb_actions.rule b/src/usr/diag/prdf/common/plat/explorer/explorer_ocmb_actions.rule index 023821b0d..d5b6e3fad 100644 --- a/src/usr/diag/prdf/common/plat/explorer/explorer_ocmb_actions.rule +++ b/src/usr/diag/prdf/common/plat/explorer/explorer_ocmb_actions.rule @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2018 +# Contributors Listed Below - COPYRIGHT 2018,2019 # [+] International Business Machines Corp. # # @@ -55,22 +55,12 @@ actionclass threshold32pday threshold( field(32 / day) ); }; -################################################################################ -# Threshold and Mask policy -################################################################################ - -/** - * Threshold 32/day (field) and 1 (mnfg). Do not predictively callout on - * threshold in the field, instead just mask. - */ -actionclass threshold_and_mask +/** Threshold of 5 per day */ +actionclass threshold5pday { - threshold32pday; - funccall("ClearServiceCallFlag"); + threshold( field(5 / day) ); }; -actionclass threshold_and_mask_self { calloutSelfMed; threshold_and_mask; }; - ################################################################################ # Special Flags # ################################################################################ @@ -99,6 +89,50 @@ actionclass callout2ndLvlMed actionclass calloutSelfLowNoGard { callout(MRU_LOW, NO_GARD); }; +actionclass level2_M_self_L +{ + callout2ndLvlMed; + calloutSelfLow; +}; + +actionclass omi +{ + callout(connected(TYPE_OMI), MRU_MED); +}; + +actionclass omi_bus +{ + calloutSelfMedA; + callout(connected(TYPE_OMI), MRU_MEDA); + funccall("calloutBusInterfacePlugin"); +}; + +actionclass mem_port +{ + callout(connected(TYPE_MEM_PORT,0), MRU_MED); +}; + +actionclass mem_port_L +{ + callout(connected(TYPE_MEM_PORT,0), MRU_LOW); +}; + +actionclass all_dimm_H +{ + funccall("CalloutAttachedDimmsHigh"); +}; + +actionclass all_dimm_H_memport_L +{ + all_dimm_H; + mem_port_L; +}; + +actionclass parent_proc +{ + callout(connected(TYPE_PROC), MRU_MED); +}; + ################################################################################ # Callouts with thresholds # ################################################################################ @@ -109,15 +143,15 @@ actionclass self_th_1 threshold1; }; -actionclass self_th_5perHour +actionclass self_th_32perDay { calloutSelfMed; - threshold5phour; + threshold32pday; }; -actionclass self_th_32perDay +actionclass parent_proc_th_32perDay { - calloutSelfMed; + parent_proc; threshold32pday; }; @@ -127,12 +161,83 @@ actionclass level2_th_1 threshold1; }; +actionclass level2_th_32perDay +{ + callout2ndLvlMed; + threshold32pday; +}; + +actionclass level2_M_self_L_th_1 +{ + level2_M_self_L; + threshold1; +}; + +actionclass omi_th_1 +{ + omi; + threshold1; +}; + +actionclass omi_bus_th_1 +{ + omi_bus; + threshold1; +}; + +actionclass omi_bus_th_32perDay +{ + omi_bus; + threshold32pday; +}; + +actionclass mem_port_th_1 +{ + mem_port; + threshold1; +}; + +actionclass mem_port_th_32perDay +{ + mem_port; + threshold32pday; +}; + +################################################################################ +# Special # +################################################################################ + +/** + * Threshold 32/day (field) and 1 (mnfg). Do not predictively callout on + * threshold in the field, instead just mask. + */ +actionclass threshold_and_mask +{ + threshold32pday; + funccall("ClearServiceCallFlag"); +}; + +actionclass threshold_and_mask_self { calloutSelfMed; threshold_and_mask; }; + +actionclass threshold_and_mask_level2 +{ + level2_th_32perDay; + threshold_and_mask; +}; + +actionclass threshold_and_mask_mem_port +{ + mem_port_th_32perDay; + threshold_and_mask; +}; + ################################################################################ # Callouts with flags # ################################################################################ -actionclass self_th_1_UERE { self_th_1; SueSource; }; -actionclass level2_th_1_UERE { level2_th_1; SueSource; }; +actionclass self_th_1_UERE { self_th_1; SueSource; }; +actionclass level2_th_1_UERE { level2_th_1; SueSource; }; +actionclass mem_port_th_1_UERE { mem_port_th_1; SueSource; }; ################################################################################ # Default callouts # @@ -153,14 +258,166 @@ actionclass TBDDefaultCallout }; ################################################################################ +# OCMB Actions # +################################################################################ + +/** DDR4 PHY Interrupt */ +actionclass ddr4_phy_interrupt +{ + calloutSelfHigh; + threshold5pday; + funccall("Ddr4PhyInterrupt"); +}; + +/** Foxhound Fatal */ +actionclass foxhound_fatal +{ + funccall("FoxhoundFatal"); + threshold1; +}; + +/** OMI-DL Fatal Error */ +actionclass dl_fatal_error +{ + try( funccall("DlFatalError"), omi_bus ); + threshold1; +}; + +/** MCBIST program complete */ +actionclass mcbist_program_complete +{ + funccall("McbistCmdComplete"); +}; + +/** Verify Chip Mark */ +actionclass verify_chip_mark_0 { funccall("AnalyzeFetchMpe_0"); }; +actionclass verify_chip_mark_1 { funccall("AnalyzeFetchMpe_1"); }; +actionclass verify_chip_mark_2 { funccall("AnalyzeFetchMpe_2"); }; +actionclass verify_chip_mark_3 { funccall("AnalyzeFetchMpe_3"); }; +actionclass verify_chip_mark_4 { funccall("AnalyzeFetchMpe_4"); }; +actionclass verify_chip_mark_5 { funccall("AnalyzeFetchMpe_5"); }; +actionclass verify_chip_mark_6 { funccall("AnalyzeFetchMpe_6"); }; +actionclass verify_chip_mark_7 { funccall("AnalyzeFetchMpe_7"); }; + +/** Mainline NCE/TCE handling */ +actionclass mainline_nce_tce_handling +{ + funccall("AnalyzeFetchNceTce"); +}; + +/** Handle Mainline AUEs/IAUEs */ +actionclass mainline_aue_iaue_handling +{ + funccall("AnalyzeFetchAueIaue"); + mem_port_L; + threshold1; +}; + +/** Mainline UE handling */ +actionclass mainline_ue_handling +{ + threshold( field(33 / 30 min ) ); # To prevent flooding. Will be unmasked + # when background scrubbing resumes after + # targeted diagnostics is complete. + funccall("AnalyzeFetchUe"); +}; + +actionclass mainline_ue_handling_UERE +{ + SueSource; + mainline_ue_handling; +}; + +/** Handle Mainline IUEs */ +actionclass mainline_iue_handling +{ + # An IUE itself is not a SUE source, however, a threshold of IUEs will + # trigger a port failure, which will generate SUEs. The port failure could + # also crash the machine so we want to make sure this bit is flagged as an + # SUE just in case it is needed in the checkstop analysis. + SueSource; + # Thresholding done in the plugin + funccall("AnalyzeMainlineIue"); +}; + +/** Handle Maintenance IUEs */ +actionclass maintenance_iue_handling +{ + # An IUE itself is not a SUE source, however, a threshold of IUEs will + # trigger a port failure, which will generate SUEs. The port failure could + # also crash the machine so we want to make sure this bit is flagged as an + # SUE just in case it is needed in the checkstop analysis. + SueSource; + # Thresholding done in the plugin + funccall("AnalyzeMaintIue"); +}; + +actionclass memory_impe_handling +{ + funccall("AnalyzeImpe"); +}; + +/** Handle Maintenance AUEs */ +actionclass maintenance_aue_handling +{ + funccall("AnalyzeMaintAue"); + mem_port_L; + threshold1; +}; + +/** Handle Maintenance IAUEs */ +actionclass maintenance_iaue_handling +{ + all_dimm_H_memport_L; + threshold1; +}; + +/** RDF RCD Parity Error */ +actionclass rdf_rcd_parity_error +{ + funccall("RdfRcdParityError"); + threshold1; +}; + +actionclass rdf_rcd_parity_error_UERE +{ + rdf_rcd_parity_error; + SueSource; +}; + +/** SRQ RCD Parity Error */ +actionclass srq_rcd_parity_error +{ + all_dimm_H_memport_L; + threshold32pday; +}; + +actionclass srq_rcd_parity_error_UERE +{ + srq_rcd_parity_error; + SueSource; +}; + +actionclass mem_port_failure +{ + all_dimm_H_memport_L; + threshold1; # Threshold 1 +}; + +################################################################################ # Analyze groups ################################################################################ -actionclass analyzeMB_LFIR { analyze(gMB_LFIR); }; +actionclass analyzeOCMB_LFIR { analyze(gOCMB_LFIR); }; actionclass analyzeMMIOFIR { analyze(gMMIOFIR); }; actionclass analyzeSRQFIR { analyze(gSRQFIR); }; actionclass analyzeMCBISTFIR { analyze(gMCBISTFIR); }; actionclass analyzeRDFFIR { analyze(gRDFFIR); }; actionclass analyzeTLXFIR { analyze(gTLXFIR); }; +actionclass analyzeTLXERR1 +{ + analyze(gTLX_ERR1_REPORT); + funccall("clearAndMaskTlxtRe"); +}; actionclass analyzeOMIDLFIR { analyze(gOMIDLFIR); }; diff --git a/src/usr/diag/prdf/common/plat/explorer/explorer_ocmb_regs.rule b/src/usr/diag/prdf/common/plat/explorer/explorer_ocmb_regs.rule index a4a526124..c2205f2dd 100644 --- a/src/usr/diag/prdf/common/plat/explorer/explorer_ocmb_regs.rule +++ b/src/usr/diag/prdf/common/plat/explorer/explorer_ocmb_regs.rule @@ -223,3 +223,259 @@ capture group never; access write_only; }; + + ############################################################################ + # P9 Hardware Mark Stores + ############################################################################ + + register HW_MS0 + { + name "P9 Hardware Mark Store rank 0"; + scomaddr 0x08011C10; + capture group default; + }; + + register HW_MS1 + { + name "P9 Hardware Mark Store rank 1"; + scomaddr 0x08011C11; + capture group default; + }; + + register HW_MS2 + { + name "P9 Hardware Mark Store rank 2"; + scomaddr 0x08011C12; + capture group default; + }; + + register HW_MS3 + { + name "P9 Hardware Mark Store rank 3"; + scomaddr 0x08011C13; + capture group default; + }; + + register HW_MS4 + { + name "P9 Hardware Mark Store rank 4"; + scomaddr 0x08011C14; + capture group default; + }; + + register HW_MS5 + { + name "P9 Hardware Mark Store rank 5"; + scomaddr 0x08011C15; + capture group default; + }; + + register HW_MS6 + { + name "P9 Hardware Mark Store rank 6"; + scomaddr 0x08011C16; + capture group default; + }; + + register HW_MS7 + { + name "P9 Hardware Mark Store rank 7"; + scomaddr 0x08011C17; + capture group default; + }; + + ############################################################################ + # P9 Firmware Mark Stores + ############################################################################ + + register FW_MS0 + { + name "P9 Firmware Mark Store 0"; + scomaddr 0x08011C18; + capture group default; + }; + + register FW_MS1 + { + name "P9 Firmware Mark Store 1"; + scomaddr 0x08011C19; + capture group default; + }; + + register FW_MS2 + { + name "P9 Firmware Mark Store 2"; + scomaddr 0x08011C1A; + capture group default; + }; + + register FW_MS3 + { + name "P9 Firmware Mark Store 3"; + scomaddr 0x08011C1B; + capture group default; + }; + + register FW_MS4 + { + name "P9 Firmware Mark Store 4"; + scomaddr 0x08011C1C; + capture group default; + }; + + register FW_MS5 + { + name "P9 Firmware Mark Store 5"; + scomaddr 0x08011C1D; + capture group default; + }; + + register FW_MS6 + { + name "P9 Firmware Mark Store 6"; + scomaddr 0x08011C1E; + capture group default; + }; + + register FW_MS7 + { + name "P9 Firmware Mark Store 7"; + scomaddr 0x08011C1F; + capture group default; + }; + + ########################################################################### + # P9 OCMB target OMIDLFIR + ########################################################################### + + register DL0_ERROR_HOLD + { + name "P9 OCMB target DL0 Error Hold Register"; + scomaddr 0x08012813; + capture group default; + }; + + ########################################################################### + # P9 OCMB target TLXFIR + ########################################################################### + + register TLXFIR_AND + { + name "Explorer chip TLXFIR AND"; + scomaddr 0x08012401; + capture group never; + access write_only; + }; + + register TLXFIR_MASK_OR + { + name "Explorer chip TLXFIR MASK OR"; + scomaddr 0x08012405; + capture group never; + access write_only; + }; + + register TLX_ERR1_REPORT + { + name "P9 OCMB target TLX Error Report Register"; + scomaddr 0x0801241D; + reset (&, 0x0801241D); + mask (|, 0x08012415); + capture group default; + }; + + register TLX_ERR1_REPORT_MASK + { + name "P9 OCMB target TLX Error Report Register Mask"; + scomaddr 0x08012415; + capture group default; + }; + + ############################################################################ + # Explorer ECC Address Registers + ############################################################################ + + register MBNCER + { + name "Explorer Mainline NCE Address Trap Register"; + scomaddr 0x0801186A; + capture group default; + }; + + register MBRCER + { + name "Explorer Mainline RCE Address Trap Register"; + scomaddr 0x0801186B; + capture group default; + }; + + register MBMPER + { + name "Explorer Mainline MPE Address Trap Register"; + scomaddr 0x0801186C; + capture group default; + }; + + register MBUER + { + name "Explorer Mainline UE Address Trap Register"; + scomaddr 0x0801186D; + capture group default; + }; + + register MBAUER + { + name "Explorer Mainline AUE Address Trap Register"; + scomaddr 0x0801186E; + capture group default; + }; + + ############################################################################ + # Misc + ############################################################################ + + register FARB0 + { + name "MB_SIM.SRQ.MBA_FARB0Q"; + scomaddr 0x08011415; + capture group default; + }; + + register EXP_MSR + { + name "Explorer Mark Shadow Register"; + scomaddr 0x08011C0C; + capture group default; + }; + + register MC_ADDR_TRANS + { + name "P9 OCMB target address translation register0"; + scomaddr 0x0801186F; + capture group default; + }; + + register MC_ADDR_TRANS1 + { + name "P9 OCMB target address translation register1"; + scomaddr 0x08011870; + capture group default; + }; + + register MC_ADDR_TRANS2 + { + name "P9 OCMB target address translation register2"; + scomaddr 0x08011871; + capture group default; + }; + + ############################################################################ + # Interrupt status register + ############################################################################ + + register INTER_STATUS_REG + { + name "TPTOP.PIB.PCBMS.INTERRUPT_TYPE_REG"; + scomaddr 0x000F001A; + capture group default; + }; diff --git a/src/usr/diag/prdf/common/plat/explorer/prdfExplorerPlugins_common.C b/src/usr/diag/prdf/common/plat/explorer/prdfExplorerPlugins_common.C new file mode 100644 index 000000000..de385aab9 --- /dev/null +++ b/src/usr/diag/prdf/common/plat/explorer/prdfExplorerPlugins_common.C @@ -0,0 +1,574 @@ +/* IBM_PROLOG_BEGIN_TAG */ +/* This is an automatically generated prolog. */ +/* */ +/* $Source: src/usr/diag/prdf/common/plat/explorer/prdfExplorerPlugins_common.C $ */ +/* */ +/* OpenPOWER HostBoot Project */ +/* */ +/* Contributors Listed Below - COPYRIGHT 2019 */ +/* [+] International Business Machines Corp. */ +/* */ +/* */ +/* Licensed under the Apache License, Version 2.0 (the "License"); */ +/* you may not use this file except in compliance with the License. */ +/* You may obtain a copy of the License at */ +/* */ +/* http://www.apache.org/licenses/LICENSE-2.0 */ +/* */ +/* Unless required by applicable law or agreed to in writing, software */ +/* distributed under the License is distributed on an "AS IS" BASIS, */ +/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */ +/* implied. See the License for the specific language governing */ +/* permissions and limitations under the License. */ +/* */ +/* IBM_PROLOG_END_TAG */ + +// Framework includes +#include <iipServiceDataCollector.h> +#include <prdfExtensibleChip.H> +#include <prdfPluginMap.H> + +// Platform includes +#include <prdfMemDbUtils.H> +#include <prdfMemEccAnalysis.H> +#include <prdfMemUtils.H> +#include <prdfPlatServices.H> + +using namespace TARGETING; + +namespace PRDF +{ + +using namespace PlatServices; + +namespace explorer_ocmb +{ + +//############################################################################## +// +// Special plugins +// +//############################################################################## + +/** + * @brief Plugin that initializes the data bundle. + * @param i_chip An OCMB chip. + * @return SUCCESS + */ +int32_t Initialize( ExtensibleChip * i_chip ) +{ + i_chip->getDataBundle() = new OcmbDataBundle( i_chip ); + return SUCCESS; +} +PRDF_PLUGIN_DEFINE( explorer_ocmb, Initialize ); + +/** + * @brief Plugin function called after analysis is complete but before PRD + * exits. + * @param i_chip An OCMB chip. + * @param io_sc The step code data struct. + * @note This is especially useful for any analysis that still needs to be + * done after the framework clears the FIR bits that were at attention. + * @return SUCCESS. + */ +int32_t PostAnalysis( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc ) +{ + #define PRDF_FUNC "[explorer_ocmb::PostAnalysis] " + + #ifdef __HOSTBOOT_RUNTIME + + // If the IUE threshold in our data bundle has been reached, we trigger + // a port fail. Once we trigger the port fail, the system may crash + // right away. Since PRD is running in the hypervisor, it is possible we + // may not get the error log. To better our chances, we trigger the port + // fail here after the error log has been committed. + if ( MemEcc::queryIueTh<TYPE_OCMB_CHIP>(i_chip, io_sc) ) + { + if ( SUCCESS != MemEcc::triggerPortFail<TYPE_OCMB_CHIP>(i_chip) ) + { + PRDF_ERR( PRDF_FUNC "triggerPortFail(0x%08x) failed", + i_chip->getHuid() ); + } + } + + #endif // __HOSTBOOT_RUNTIME + + // Cleanup processor FIR bits on the other side of the channel. + MemUtils::cleanupChnlAttns<TYPE_OCMB_CHIP>( i_chip, io_sc ); + + return SUCCESS; + + #undef PRDF_FUNC +} +PRDF_PLUGIN_DEFINE( explorer_ocmb, PostAnalysis ); + + +//############################################################################## +// +// OCMB_LFIR +// +//############################################################################## + +/** + * @brief OCMB_LFIR[38] - DDR4 PHY interrupt + * @param i_chip An OCMB chip. + * @param io_sc The step code data struct. + * @return SUCCESS + */ +int32_t Ddr4PhyInterrupt( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + #define PRDF_FUNC "[explorer_ocmb::Ddr4PhyInterrupt] " + + SCAN_COMM_REGISTER_CLASS * rdffir = i_chip->getRegister( "RDFFIR" ); + + // If Mainline UE (RDFFIR[14]) or Maint UE (RDFFIR[34]) are on at the same + // time as this: + if ( rdffir->IsBitSet(14) || rdffir->IsBitSet(34) ) + { + // callout Explorer on 1st + io_sc.service_data->SetThresholdMaskId(0); + + // mask maint and mainline UE which are assumed to be side-effects + SCAN_COMM_REGISTER_CLASS * rdffir_mask_or = + i_chip->getRegister( "RDFFIR_MASK_OR" ); + + rdffir_mask_or->SetBit(14); + rdffir_mask_or->SetBit(34); + + if ( SUCCESS != rdffir_mask_or->Write() ) + { + PRDF_ERR( PRDF_FUNC "Write() failed on RDFFIR_MASK_OR: 0x%08x", + i_chip->getHuid() ); + } + } + else + { + //TODO RTC 200583 + // callout Explorer on threshold (5/day) + // NOTE: in this case we will have to clear both hw driven checkers + // manually before clearing the FIR + } + + return SUCCESS; + + #undef PRDF_FUNC +} +PRDF_PLUGIN_DEFINE( explorer_ocmb, Ddr4PhyInterrupt ); + +//------------------------------------------------------------------------------ + +/** + * @brief OCMB_LFIR[39:46] - Foxhound Fatal + * @param i_chip An OCMB chip. + * @param io_sc The step code data struct. + * @return SUCCESS + */ +int32_t FoxhoundFatal( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc ) +{ + #define PRDF_FUNC "[explorer_ocmb::FoxhoundFatal] " + + //TODO RTC 200583 + + return SUCCESS; + + #undef PRDF_FUNC +} +PRDF_PLUGIN_DEFINE( explorer_ocmb, FoxhoundFatal ); + +//############################################################################## +// +// OMIDLFIR +// +//############################################################################## + +/** + * @brief OMIDLFIR[0] - OMI-DL0 Fatal Error + * @param i_chip An OCMB chip. + * @param io_sc The step code data struct. + * @return PRD_SCAN_COMM_REGISTER_ZERO for the bus callout, else SUCCESS + */ +int32_t DlFatalError( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc ) +{ + #define PRDF_FUNC "[explorer_ocmb::DlFatalError] " + + int32_t rc = SUCCESS; + + do + { + // Check DL0_ERROR_HOLD[52:63] to determine callout + SCAN_COMM_REGISTER_CLASS * dl0_error_hold = + i_chip->getRegister( "DL0_ERROR_HOLD" ); + + if ( SUCCESS != dl0_error_hold->Read() ) + { + PRDF_ERR( PRDF_FUNC "Read() Failed on DL0_ERROR_HOLD: " + "i_chip=0x%08x", i_chip->getHuid() ); + break; + } + + if ( dl0_error_hold->IsBitSet(53) || + dl0_error_hold->IsBitSet(55) || + dl0_error_hold->IsBitSet(57) || + dl0_error_hold->IsBitSet(58) || + dl0_error_hold->IsBitSet(59) || + dl0_error_hold->IsBitSet(60) || + dl0_error_hold->IsBitSet(62) || + dl0_error_hold->IsBitSet(63) ) + { + // callout OCMB + io_sc.service_data->SetCallout( i_chip->getTrgt() ); + } + else if ( dl0_error_hold->IsBitSet(54) || + dl0_error_hold->IsBitSet(56) || + dl0_error_hold->IsBitSet(61) ) + { + // callout the OMI target, the OMI bus, and the OCMB. + // Return PRD_SCAN_COMM_REGISTER_ZERO so the rule code knows to + // make the correct callout. + rc = PRD_SCAN_COMM_REGISTER_ZERO; + } + + }while(0); + + return rc; + + #undef PRDF_FUNC +} +PRDF_PLUGIN_DEFINE( explorer_ocmb, DlFatalError ); + +//############################################################################## +// +// RDFFIR +// +//############################################################################## + +/** + * @brief Adds all attached DIMMs at HIGH priority. + * @param i_chip An OCMB chip. + * @param io_sc The step code data struct. + * @return SUCCESS + */ +int32_t CalloutAttachedDimmsHigh( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + for ( auto & dimm : getConnected(i_chip->getTrgt(), TYPE_DIMM) ) + io_sc.service_data->SetCallout( dimm, MRU_HIGH ); + + return SUCCESS; // nothing to return to rule code +} +PRDF_PLUGIN_DEFINE( explorer_ocmb, CalloutAttachedDimmsHigh ); + +//------------------------------------------------------------------------------ + +/** + * @brief RDF RCD Parity Error + * @param i_chip An OCMB chip. + * @param io_sc The step code data struct. + * @return SUCCESS + */ +int32_t RdfRcdParityError( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + #define PRDF_FUNC "[explorer_ocmb::RdfRcdParityError] " + + do + { + SCAN_COMM_REGISTER_CLASS * rdffir = i_chip->getRegister( "RDFFIR" ); + if ( SUCCESS != rdffir->Read() ) + { + PRDF_ERR( PRDF_FUNC "Read() Failed on RDFFIR: " + "i_chip=0x%08x", i_chip->getHuid() ); + break; + } + + // If RDFFIR[40] on at the same time, this is 'missing rddata valid' + // case, which returns SUE + if ( rdffir->IsBitSet(40) ) + { + // callout MEM_PORT on 1st occurrence + TargetHandle_t memPort = + getConnectedChild( i_chip->getTrgt(), TYPE_MEM_PORT, 0 ); + io_sc.service_data->SetCallout( memPort ); + } + // Else this is 'confirmed RCD parity error' case + else + { + // callout DIMM high priority, MEM_PORT low on 1st occurrence + CalloutAttachedDimmsHigh( i_chip, io_sc ); + TargetHandle_t memPort = + getConnectedChild( i_chip->getTrgt(), TYPE_MEM_PORT, 0 ); + io_sc.service_data->SetCallout( memPort, MRU_LOW ); + } + + // Mask bit 40 as well + SCAN_COMM_REGISTER_CLASS * rdffir_mask_or = + i_chip->getRegister( "RDFFIR_MASK_OR" ); + + rdffir_mask_or->SetBit(40); + if ( SUCCESS != rdffir_mask_or->Write() ) + { + PRDF_ERR( PRDF_FUNC "Write() Failed on RDFFIR_MASK_OR: " + "i_chip=0x%08x", i_chip->getHuid() ); + break; + } + + }while(0); + + return SUCCESS; + + #undef PRDF_FUNC +} +PRDF_PLUGIN_DEFINE( explorer_ocmb, RdfRcdParityError ); + +//------------------------------------------------------------------------------ + +/** + * @brief RDFFIR[0:7] - Mainline MPE. + * @param i_chip OCMB chip. + * @param io_sc The step code data struct. + * @return SUCCESS + */ +#define PLUGIN_FETCH_MPE_ERROR( RANK ) \ +int32_t AnalyzeFetchMpe_##RANK( ExtensibleChip * i_chip, \ + STEP_CODE_DATA_STRUCT & io_sc ) \ +{ \ + MemRank rank ( RANK ); \ + MemEcc::analyzeFetchMpe<TYPE_OCMB_CHIP>( i_chip, rank, io_sc ); \ + return SUCCESS; \ +} \ +PRDF_PLUGIN_DEFINE( explorer_ocmb, AnalyzeFetchMpe_##RANK ); + +PLUGIN_FETCH_MPE_ERROR( 0 ) +PLUGIN_FETCH_MPE_ERROR( 1 ) +PLUGIN_FETCH_MPE_ERROR( 2 ) +PLUGIN_FETCH_MPE_ERROR( 3 ) +PLUGIN_FETCH_MPE_ERROR( 4 ) +PLUGIN_FETCH_MPE_ERROR( 5 ) +PLUGIN_FETCH_MPE_ERROR( 6 ) +PLUGIN_FETCH_MPE_ERROR( 7 ) + +#undef PLUGIN_FETCH_MPE_ERROR + +//------------------------------------------------------------------------------ + +/** + * @brief RDFFIR[8:9] - Mainline NCE and/or TCE. + * @param i_chip OCMB chip. + * @param io_sc The step code data struct. + * @return SUCCESS + */ +int32_t AnalyzeFetchNceTce( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + MemEcc::analyzeFetchNceTce<TYPE_OCMB_CHIP>( i_chip, io_sc ); + return SUCCESS; // nothing to return to rule code +} +PRDF_PLUGIN_DEFINE( explorer_ocmb, AnalyzeFetchNceTce ); + +//------------------------------------------------------------------------------ + +/** + * @brief RDFFIR[14] - Mainline UE. + * @param i_chip OCMB chip. + * @param io_sc The step code data struct. + * @return SUCCESS + */ +int32_t AnalyzeFetchUe( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + MemEcc::analyzeFetchUe<TYPE_OCMB_CHIP>( i_chip, io_sc ); + return SUCCESS; // nothing to return to rule code +} +PRDF_PLUGIN_DEFINE( explorer_ocmb, AnalyzeFetchUe ); + +//------------------------------------------------------------------------------ + +/** + * @brief RDFFIR[17] - Mainline read IUE. + * @param i_chip OCMB chip. + * @param io_sc The step code data struct. + * @return PRD_NO_CLEAR_FIR_BITS if IUE threshold is reached, else SUCCESS. + */ +int32_t AnalyzeMainlineIue( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + int32_t rc = SUCCESS; + MemEcc::analyzeMainlineIue<TYPE_OCMB_CHIP>( i_chip, io_sc ); + + #ifdef __HOSTBOOT_MODULE + + if ( MemEcc::queryIueTh<TYPE_OCMB_CHIP>(i_chip, io_sc) ) + rc = PRD_NO_CLEAR_FIR_BITS; + + #endif + + return rc; // nothing to return to rule code +} +PRDF_PLUGIN_DEFINE( explorer_ocmb, AnalyzeMainlineIue ); + +//------------------------------------------------------------------------------ + +/** + * @brief RDFFIR[37] - Maint IUE. + * @param i_chip OCMB chip. + * @param io_sc The step code data struct. + * @return PRD_NO_CLEAR_FIR_BITS if IUE threshold is reached, else SUCCESS. + */ +int32_t AnalyzeMaintIue( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + int32_t rc = SUCCESS; + MemEcc::analyzeMaintIue<TYPE_OCMB_CHIP>( i_chip, io_sc ); + + #ifdef __HOSTBOOT_MODULE + + if ( MemEcc::queryIueTh<TYPE_OCMB_CHIP>(i_chip, io_sc) ) + rc = PRD_NO_CLEAR_FIR_BITS; + + #endif + + return rc; // nothing to return to rule code +} +PRDF_PLUGIN_DEFINE( explorer_ocmb, AnalyzeMaintIue ); + +//------------------------------------------------------------------------------ + +/** + * @brief RDFFIR[19,39] - Mainline and Maint IMPE + * @param i_chip OCMB chip. + * @param io_sc The step code data struct. + * @return SUCCESS + */ +int32_t AnalyzeImpe( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc ) +{ + MemEcc::analyzeImpe<TYPE_OCMB_CHIP>( i_chip, io_sc ); + return SUCCESS; // nothing to return to rule code +} +PRDF_PLUGIN_DEFINE( explorer_ocmb, AnalyzeImpe ); + +//------------------------------------------------------------------------------ + +/** + * @brief RDFFIR[13,16] - Mainline AUE and IAUE + * @param i_chip OCMB chip. + * @param io_sc The step code data struct. + * @return SUCCESS + */ +int32_t AnalyzeFetchAueIaue( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + #define PRDF_FUNC "[explorer_ocmb::AnalyzeFetchAueIaue] " + + MemAddr addr; + if ( SUCCESS != getMemReadAddr<TYPE_OCMB_CHIP>(i_chip, + MemAddr::READ_AUE_ADDR, + addr) ) + { + PRDF_ERR( PRDF_FUNC "getMemReadAddr(0x%08x,READ_AUE_ADDR) failed", + i_chip->getHuid() ); + } + else + { + MemRank rank = addr.getRank(); + MemoryMru mm { i_chip->getTrgt(), rank, MemoryMruData::CALLOUT_RANK }; + io_sc.service_data->SetCallout( mm, MRU_HIGH ); + } + + return SUCCESS; // nothing to return to rule code + + #undef PRDF_FUNC +} +PRDF_PLUGIN_DEFINE( explorer_ocmb, AnalyzeFetchAueIaue ); + +//------------------------------------------------------------------------------ + +/** + * @brief RDFFIR[33] - Maintenance AUE + * @param i_chip OCMB chip. + * @param io_sc The step code data struct. + * @return SUCCESS + */ +int32_t AnalyzeMaintAue( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + #define PRDF_FUNC "[explorer_ocmb::AnalyzeMaintAue] " + + MemAddr addr; + if ( SUCCESS != getMemMaintAddr<TYPE_OCMB_CHIP>(i_chip, addr) ) + { + PRDF_ERR( PRDF_FUNC "getMemMaintAddr(0x%08x) failed", + i_chip->getHuid() ); + } + else + { + MemRank rank = addr.getRank(); + MemoryMru mm { i_chip->getTrgt(), rank, MemoryMruData::CALLOUT_RANK }; + io_sc.service_data->SetCallout( mm, MRU_HIGH ); + } + + return SUCCESS; // nothing to return to rule code + + #undef PRDF_FUNC +} +PRDF_PLUGIN_DEFINE( explorer_ocmb, AnalyzeMaintAue ); + + +//############################################################################## +// +// TLXFIR +// +//############################################################################## + +/** + * @brief Clear/Mask TLXFIR[9] + * @param i_chip An OCMB chip. + * @param io_sc The step code data struct. + * @return SUCCESS + */ +int32_t clearAndMaskTlxtRe( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + #define PRDF_FUNC "[explorer_ocmb::clearAndMaskTlxtRe] " + + do + { + // If we are at threshold, mask TLXFIR[9]. + if ( io_sc.service_data->IsAtThreshold() ) + { + SCAN_COMM_REGISTER_CLASS * tlxfir_mask_or = + i_chip->getRegister( "TLXFIR_MASK_OR" ); + + tlxfir_mask_or->SetBit(9); + if ( SUCCESS != tlxfir_mask_or->Write() ) + { + PRDF_ERR( PRDF_FUNC "Write() Failed on TLXFIR_MASK_OR: " + "i_chip=0x%08x", i_chip->getHuid() ); + break; + } + } + + // Clear TLXFIR[9] + SCAN_COMM_REGISTER_CLASS * tlxfir_and = + i_chip->getRegister( "TLXFIR_AND" ); + tlxfir_and->setAllBits(); + + tlxfir_and->ClearBit(9); + if ( SUCCESS != tlxfir_and->Write() ) + { + PRDF_ERR( PRDF_FUNC "Write() Failed on TLXFIR_AND: " + "i_chip=0x%08x", i_chip->getHuid() ); + break; + } + }while(0); + + return SUCCESS; + + #undef PRDF_FUNC +} +PRDF_PLUGIN_DEFINE( explorer_ocmb, clearAndMaskTlxtRe ); + +} // end namespace explorer_ocmb + +} // end namespace PRDF + diff --git a/src/usr/diag/prdf/common/plat/explorer/prdf_plat_explorer.mk b/src/usr/diag/prdf/common/plat/explorer/prdf_plat_explorer.mk new file mode 100644 index 000000000..b79d5cc30 --- /dev/null +++ b/src/usr/diag/prdf/common/plat/explorer/prdf_plat_explorer.mk @@ -0,0 +1,39 @@ +# IBM_PROLOG_BEGIN_TAG +# This is an automatically generated prolog. +# +# $Source: src/usr/diag/prdf/common/plat/explorer/prdf_plat_explorer.mk $ +# +# OpenPOWER HostBoot Project +# +# Contributors Listed Below - COPYRIGHT 2019 +# [+] International Business Machines Corp. +# +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# IBM_PROLOG_END_TAG + +################################################################################ +# Paths common to both FSP and Hostboot +################################################################################ + +prd_vpath += ${PRD_SRC_PATH}/common/plat/explorer + +prd_incpath += ${PRD_SRC_PATH}/common/plat/explorer + +################################################################################ +# Object files common to both FSP and Hostboot +################################################################################ + +# rule plugin related +prd_rule_plugin += prdfExplorerPlugins_common.o diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemAddress.C b/src/usr/diag/prdf/common/plat/mem/prdfMemAddress.C index 1227afeb8..654b39ba0 100644 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemAddress.C +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemAddress.C @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2016,2018 */ +/* Contributors Listed Below - COPYRIGHT 2016,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -46,8 +46,8 @@ using namespace PlatServices; // Class MemAddr //------------------------------------------------------------------------------ -template<> -MemAddr MemAddr::fromReadAddr<TYPE_MCBIST>( uint64_t i_addr ) +template<TARGETING::TYPE T> +MemAddr MemAddr::fromReadAddr( uint64_t i_addr ) { uint64_t mrnk = (i_addr >> 59) & 0x7; // 2: 4 uint64_t srnk = (i_addr >> 56) & 0x7; // 5: 7 @@ -58,6 +58,12 @@ MemAddr MemAddr::fromReadAddr<TYPE_MCBIST>( uint64_t i_addr ) return MemAddr( MemRank(mrnk, srnk), bnk, row, col ); } +template +MemAddr MemAddr::fromReadAddr<TYPE_MCBIST>( uint64_t i_addr ); +template +MemAddr MemAddr::fromReadAddr<TYPE_OCMB_CHIP>( uint64_t i_addr ); + + template<> MemAddr MemAddr::fromReadAddr<TYPE_MEMBUF>( uint64_t i_addr ) { @@ -73,8 +79,8 @@ MemAddr MemAddr::fromReadAddr<TYPE_MEMBUF>( uint64_t i_addr ) return MemAddr( MemRank(mrnk, srnk), bnk, row, col ); } -template<> -MemAddr MemAddr::fromMaintAddr<TYPE_MCBIST>( uint64_t i_addr ) +template<TARGETING::TYPE T> +MemAddr MemAddr::fromMaintAddr( uint64_t i_addr ) { uint64_t rslct = (i_addr >> 59) & 0x3; // 3: 4 uint64_t srnk = (i_addr >> 56) & 0x7; // 5: 7 @@ -88,6 +94,12 @@ MemAddr MemAddr::fromMaintAddr<TYPE_MCBIST>( uint64_t i_addr ) return MemAddr( MemRank(mrnk, srnk), bnk, row, col ); } +template +MemAddr MemAddr::fromMaintAddr<TYPE_MCBIST>( uint64_t i_addr ); +template +MemAddr MemAddr::fromMaintAddr<TYPE_OCMB_CHIP>( uint64_t i_addr ); + + template<> MemAddr MemAddr::fromMaintAddr<TYPE_MBA>( uint64_t i_addr ) { @@ -169,6 +181,53 @@ uint32_t getMemReadAddr<TYPE_MCBIST>( ExtensibleChip * i_chip, uint32_t i_pos, //------------------------------------------------------------------------------ template<> +uint32_t getMemReadAddr<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + MemAddr::ReadReg i_reg, + MemAddr & o_addr ) +{ + #define PRDF_FUNC "[getMemReadAddr<TYPE_OCMB_CHIP>] " + + uint32_t o_rc = SUCCESS; + + // Check parameters + PRDF_ASSERT( nullptr != i_chip ); + PRDF_ASSERT( TYPE_OCMB_CHIP == i_chip->getType() ); + + // Get the register string. + const char * reg_str = ""; + switch ( i_reg ) + { + case MemAddr::READ_NCE_ADDR: reg_str = "MBNCER"; break; + case MemAddr::READ_RCE_ADDR: reg_str = "MBRCER"; break; + case MemAddr::READ_MPE_ADDR: reg_str = "MBMPER"; break; + case MemAddr::READ_UE_ADDR : reg_str = "MBUER" ; break; + case MemAddr::READ_AUE_ADDR: reg_str = "MBAUER"; break; + default: PRDF_ASSERT( false ); + } + + // Read the address register + SCAN_COMM_REGISTER_CLASS * reg = i_chip->getRegister( reg_str ); + o_rc = reg->Read(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "Read() failed on %s: i_chip=0x%08x", + reg_str, i_chip->getHuid() ); + } + else + { + // Get the address object. + uint64_t addr = reg->GetBitFieldJustified( 0, 64 ); + o_addr = MemAddr::fromReadAddr<TYPE_OCMB_CHIP>( addr ); + } + + return o_rc; + + #undef PRDF_FUNC +} + +//------------------------------------------------------------------------------ + +template<> uint32_t getMemReadAddr<TYPE_MEMBUF>( ExtensibleChip * i_chip, uint32_t i_pos, MemAddr::ReadReg i_reg, MemAddr & o_addr ) { @@ -247,15 +306,14 @@ uint32_t getMemReadAddr<TYPE_MBA>( ExtensibleChip * i_chip, //------------------------------------------------------------------------------ -template<> -uint32_t getMemMaintAddr<TYPE_MCBIST>( ExtensibleChip * i_chip, - MemAddr & o_addr ) +template<TARGETING::TYPE T> +uint32_t getMemMaintAddr( ExtensibleChip * i_chip, MemAddr & o_addr ) { - #define PRDF_FUNC "[getMemMaintAddr<TYPE_MCBIST>] " + #define PRDF_FUNC "[getMemMaintAddr<T>] " // Check parameters PRDF_ASSERT( nullptr != i_chip ); - PRDF_ASSERT( TYPE_MCBIST == i_chip->getType() ); + PRDF_ASSERT( T == i_chip->getType() ); // Read the address register SCAN_COMM_REGISTER_CLASS * reg = i_chip->getRegister( "MCBMCAT" ); @@ -269,7 +327,7 @@ uint32_t getMemMaintAddr<TYPE_MCBIST>( ExtensibleChip * i_chip, { // Get the address object. uint64_t addr = reg->GetBitFieldJustified( 0, 64 ); - o_addr = MemAddr::fromMaintAddr<TYPE_MCBIST>( addr ); + o_addr = MemAddr::fromMaintAddr<T>( addr ); } return o_rc; @@ -277,6 +335,13 @@ uint32_t getMemMaintAddr<TYPE_MCBIST>( ExtensibleChip * i_chip, #undef PRDF_FUNC } +template +uint32_t getMemMaintAddr<TYPE_MCBIST>( ExtensibleChip * i_chip, + MemAddr & o_addr ); +template +uint32_t getMemMaintAddr<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + MemAddr & o_addr ); + //------------------------------------------------------------------------------ template<> @@ -389,8 +454,9 @@ uint32_t getMemMaintEndAddr<TYPE_MBA>( ExtensibleChip * i_chip, #ifdef __HOSTBOOT_MODULE -uint32_t getMcbistMaintPort( ExtensibleChip * i_mcbChip, - std::vector<ExtensibleChip *> & o_mcaList ) +template<> +uint32_t getMcbistMaintPort<TYPE_MCBIST>( ExtensibleChip * i_mcbChip, + ExtensibleChipList & o_mcaList ) { #define PRDF_FUNC "[getMcbistMaintPort] " @@ -402,9 +468,9 @@ uint32_t getMcbistMaintPort( ExtensibleChip * i_mcbChip, o_mcaList.clear(); - SCAN_COMM_REGISTER_CLASS * mcbagra = i_mcbChip->getRegister( "MCBAGRA" ); - SCAN_COMM_REGISTER_CLASS * mcbmcat = i_mcbChip->getRegister( "MCBMCAT" ); - SCAN_COMM_REGISTER_CLASS * mcb_cntl = i_mcbChip->getRegister( "MCB_CNTL" ); + SCAN_COMM_REGISTER_CLASS * mcbagra = i_mcbChip->getRegister( "MCBAGRA" ); + SCAN_COMM_REGISTER_CLASS * mcbmcat = i_mcbChip->getRegister( "MCBMCAT" ); + SCAN_COMM_REGISTER_CLASS * mcb_cntl = i_mcbChip->getRegister( "MCB_CNTL" ); do { @@ -446,7 +512,7 @@ uint32_t getMcbistMaintPort( ExtensibleChip * i_mcbChip, } // Get MCAs from all targeted ports. - for ( uint8_t p = 0; p < 4; p++ ) + for ( uint8_t p = 0; p < MAX_MCA_PER_MCBIST; p++ ) { if ( 0 == (portMask & (0x8 >> p)) ) continue; diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemAddress.H b/src/usr/diag/prdf/common/plat/mem/prdfMemAddress.H index 8dc192672..f5120b3b5 100644 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemAddress.H +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemAddress.H @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2016,2018 */ +/* Contributors Listed Below - COPYRIGHT 2016,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -167,7 +167,7 @@ uint32_t getMemReadAddr( ExtensibleChip * i_chip, uint32_t i_pos, /** * @brief Reads the specified mainline memory read address from hardware. - * @param i_chip MCA or MBA. + * @param i_chip MCA, MBA, or OCMB. * @param i_reg The target address register. * @param o_addr The returned address from hardware. * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. @@ -189,7 +189,7 @@ uint32_t getMemReadAddr( ExtensibleChip * i_chip, MemAddr::ReadReg i_reg, * mode or not. Therefore, users must call getMcbistMaintPort() to get the port * information. * - * @param i_chip An MBA or MCBIST chip. + * @param i_chip An MBA, MCBIST, or OCMB chip. * @param o_addr The returned address from hardware. * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. */ @@ -228,11 +228,12 @@ uint32_t getMemMaintEndAddr( ExtensibleChip * i_chip, MemAddr & o_addr ); * * @note Only supported for MCBIST. * @param i_mcbChip An MCBIST chip. - * @param o_mcaList A list of all MCAs targeted by the command. + * @param o_portList A list of all MCAs targeted by the command. * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. */ +template<TARGETING::TYPE T> uint32_t getMcbistMaintPort( ExtensibleChip * i_mcbChip, - std::vector<ExtensibleChip *> & o_mcaList ); + ExtensibleChipList & o_portList ); #endif diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemCaptureData.C b/src/usr/diag/prdf/common/plat/mem/prdfMemCaptureData.C index ebef7ae29..4d55c7c50 100644 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemCaptureData.C +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemCaptureData.C @@ -39,6 +39,7 @@ #include <prdfCenMbaDataBundle.H> #include <prdfPlatServices.H> #include <prdfP9McaDataBundle.H> +#include <prdfOcmbDataBundle.H> #include <prdfMemRowRepair.H> @@ -65,8 +66,16 @@ void addExtMemMruData( const MemoryMru & i_memMru, errlHndl_t io_errl ) { TargetHandle_t trgt = i_memMru.getTrgt(); - // Get the DRAM width. - extMemMru.isX4Dram = isDramWidthX4( trgt ) ? 1 : 0; + if ( TYPE_OCMB_CHIP == getTargetType(trgt) ) + { + TargetHandle_t dimm = getConnectedDimm( trgt, i_memMru.getRank() ); + extMemMru.isX4Dram = isDramWidthX4( dimm ) ? 1 : 0; + } + else + { + // Get the DRAM width. + extMemMru.isX4Dram = isDramWidthX4( trgt ) ? 1 : 0; + } // Get the DIMM type. if ( TYPE_MBA == getTargetType(trgt) ) @@ -97,9 +106,9 @@ void addExtMemMruData( const MemoryMru & i_memMru, errlHndl_t io_errl ) { getDimmDqAttr<TYPE_DIMM>(partList[0], extMemMru.dqMapping); } - else if ( TYPE_MEM_PORT == getTargetType(trgt) ) + else if ( TYPE_OCMB_CHIP == getTargetType(trgt) ) { - getDimmDqAttr<TYPE_MEM_PORT>( trgt, extMemMru.dqMapping ); + getDimmDqAttr<TYPE_OCMB_CHIP>( trgt, extMemMru.dqMapping ); } else { @@ -172,7 +181,6 @@ void captureDramRepairsData( TARGETING::TargetHandle_t i_trgt, if( CEN_VPD_DIMM_SPARE_NO_SPARE != spareConfig ) data.header.isSpareDram = true; - // Iterate all ranks to get DRAM repair data for ( auto & rank : masterRanks ) { @@ -220,8 +228,11 @@ void captureDramRepairsData( TARGETING::TargetHandle_t i_trgt, if ( data.rankDataList.size() > 0 ) { data.header.rankCount = data.rankDataList.size(); - data.header.isEccSp = ( isDramWidthX4( i_trgt ) && - (TYPE_MBA == getTargetType(i_trgt)) ); + data.header.isEccSp = false; + if ( TYPE_MBA == getTargetType(i_trgt) ) + { + data.header.isEccSp = isDramWidthX4( i_trgt ); + } UtilMem dramStream; dramStream << data; @@ -459,6 +470,33 @@ void captureIueCounts<McaDataBundle*>( TARGETING::TargetHandle_t i_trgt, //------------------------------------------------------------------------------ template<> +void captureIueCounts<OcmbDataBundle*>( TARGETING::TargetHandle_t i_trgt, + OcmbDataBundle * i_db, + CaptureData & io_cd ) +{ + #ifdef __HOSTBOOT_MODULE + + uint8_t sz_capData = i_db->iv_iueTh.size()*2; + uint8_t capData[sz_capData] = {}; + uint8_t idx = 0; + + for ( auto & th_pair : i_db->iv_iueTh ) + { + capData[idx] = th_pair.first; + capData[idx+1] = th_pair.second.getCount(); + idx += 2; + } + + // Add data to capture data. + BitString bs ( sz_capData*8, (CPU_WORD *) &capData ); + io_cd.Add( i_trgt, Util::hashString("IUE_COUNTS"), bs ); + + #endif +} + +//------------------------------------------------------------------------------ + +template<> void addEccData<TYPE_MCA>( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc ) { @@ -497,6 +535,33 @@ void addEccData<TYPE_MCBIST>( ExtensibleChip * i_chip, } template<> +void addEccData<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + PRDF_ASSERT( TYPE_OCMB_CHIP == i_chip->getType() ); + + CaptureData & cd = io_sc.service_data->GetCaptureData(); + OcmbDataBundle * db = getOcmbDataBundle( i_chip ); + + TargetHandle_t ocmbTrgt = i_chip->getTrgt(); + + // Add DRAM repairs data from hardware. + captureDramRepairsData<TYPE_OCMB_CHIP>( ocmbTrgt, cd ); + + // Add DRAM repairs data from VPD. + captureDramRepairsVpd<TYPE_OCMB_CHIP>( ocmbTrgt, cd ); + + // Add IUE counts to capture data. + captureIueCounts<OcmbDataBundle*>( ocmbTrgt, db, cd ); + + // Add CE table to capture data. + db->iv_ceTable.addCapData( cd ); + + // Add UE table to capture data. + db->iv_ueTable.addCapData( cd ); +} + +template<> void addEccData<TYPE_MBA>( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc ) { @@ -558,6 +623,22 @@ void addEccData<TYPE_MBA>( TargetHandle_t i_trgt, errlHndl_t io_errl ) ErrDataService::AddCapData( cd, io_errl ); } +template<> +void addEccData<TYPE_OCMB_CHIP>( TargetHandle_t i_trgt, + errlHndl_t io_errl ) +{ + PRDF_ASSERT( TYPE_OCMB_CHIP == getTargetType(i_trgt) ); + + CaptureData cd; + + // Add DRAM repairs data from hardware. + captureDramRepairsData<TYPE_OCMB_CHIP>( i_trgt, cd ); + + // Add DRAM repairs data from VPD. + captureDramRepairsVpd<TYPE_OCMB_CHIP>( i_trgt, cd ); + + ErrDataService::AddCapData( cd, io_errl ); +} //------------------------------------------------------------------------------ diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemCeTable.C b/src/usr/diag/prdf/common/plat/mem/prdfMemCeTable.C index 16645586b..799e32e67 100644 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemCeTable.C +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemCeTable.C @@ -281,7 +281,7 @@ void MemCeTable<T>::addCapData( CaptureData & io_cd ) // Avoid linker errors with the template. template class MemCeTable<TYPE_MCA>; template class MemCeTable<TYPE_MBA>; -template class MemCeTable<TYPE_MEM_PORT>; +template class MemCeTable<TYPE_OCMB_CHIP>; //------------------------------------------------------------------------------ diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemDbUtils.H b/src/usr/diag/prdf/common/plat/mem/prdfMemDbUtils.H index 7605a82fa..80586976e 100644 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemDbUtils.H +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemDbUtils.H @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2018 */ +/* Contributors Listed Below - COPYRIGHT 2018,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -28,6 +28,8 @@ #include <prdfCenMbaDataBundle.H> #include <prdfP9McaDataBundle.H> +#include <prdfOcmbDataBundle.H> +#include <prdfTargetServices.H> namespace PRDF { @@ -62,6 +64,16 @@ uint32_t addCeTableEntry<TARGETING::TYPE_MCA>( ExtensibleChip * i_chip, } template<> inline +uint32_t addCeTableEntry<TARGETING::TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemAddr & i_addr, + const MemSymbol & i_symbol, + bool i_isHard ) +{ + return getOcmbDataBundle(i_chip)->iv_ceTable.addEntry( i_addr, i_symbol, + i_isHard ); +} + +template<> inline uint32_t addCeTableEntry<TARGETING::TYPE_MBA>( ExtensibleChip * i_chip, const MemAddr & i_addr, const MemSymbol & i_symbol, @@ -91,6 +103,14 @@ void addUeTableEntry<TARGETING::TYPE_MCA>( ExtensibleChip * i_chip, } template<> inline +void addUeTableEntry<TARGETING::TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + UE_TABLE::Type i_type, + const MemAddr & i_addr ) +{ + getOcmbDataBundle(i_chip)->iv_ueTable.addEntry( i_type, i_addr ); +} + +template<> inline void addUeTableEntry<TARGETING::TYPE_MBA>( ExtensibleChip * i_chip, UE_TABLE::Type i_type, const MemAddr & i_addr ) @@ -118,6 +138,14 @@ void resetEccFfdc<TARGETING::TYPE_MCA>( ExtensibleChip * i_chip, } template<> inline +void resetEccFfdc<TARGETING::TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemRank & i_rank, + AddrRangeType i_type ) +{ + getOcmbDataBundle(i_chip)->iv_ceTable.deactivateRank( i_rank, i_type ); +} + +template<> inline void resetEccFfdc<TARGETING::TYPE_MBA>( ExtensibleChip * i_chip, const MemRank & i_rank, AddrRangeType i_type ) @@ -134,7 +162,7 @@ void resetEccFfdc<TARGETING::TYPE_MBA>( ExtensibleChip * i_chip, /** * @brief Generic wrapper to push a TdEntry to the Targeted Diagnostics queue. - * @param i_chip MCA or MBA. + * @param i_chip MCA, MBA, or MEM_PORT. * @param i_entry The new TdEntry. * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. */ @@ -155,6 +183,13 @@ void pushToQueue<TARGETING::TYPE_MBA>( ExtensibleChip * i_chip, getMbaDataBundle(i_chip)->getTdCtlr()->pushToQueue( i_entry ); } +template<> inline +void pushToQueue<TARGETING::TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + TdEntry * i_entry ) +{ + getOcmbDataBundle(i_chip)->getTdCtlr()->pushToQueue( i_entry ); +} + #endif // Hostboot IPL/Runtime //############################################################################## @@ -179,6 +214,13 @@ MemIplCeStats<TARGETING::TYPE_MCA> * getIplCeStats( ExtensibleChip * i_chip ) } template<> inline +MemIplCeStats<TARGETING::TYPE_OCMB_CHIP> * getIplCeStats( + ExtensibleChip * i_chip ) +{ + return getOcmbDataBundle(i_chip)->getIplCeStats(); +} + +template<> inline MemIplCeStats<TARGETING::TYPE_MBA> * getIplCeStats( ExtensibleChip * i_chip ) { return getMbaDataBundle(i_chip)->getIplCeStats(); @@ -211,6 +253,13 @@ uint32_t handleTdEvent<TARGETING::TYPE_MCA>( ExtensibleChip * i_chip, } template<> inline +uint32_t handleTdEvent<TARGETING::TYPE_OCMB_CHIP>(ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc) +{ + return getOcmbDataBundle(i_chip)->getTdCtlr()->handleTdEvent( io_sc ); +} + +template<> inline uint32_t handleTdEvent<TARGETING::TYPE_MBA>( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc ) { @@ -242,6 +291,16 @@ void banTps<TARGETING::TYPE_MBA>( ExtensibleChip * i_chip, getMbaDataBundle(i_chip)->getTdCtlr()->banTps( i_chip, i_rank ); } +template<> inline +void banTps<TARGETING::TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemRank & i_rank ) +{ + // Ban TPS on this rank. + getOcmbDataBundle(i_chip)->getTdCtlr()->banTps( i_chip, i_rank ); + // Permanently mask mainline NCEs and TCEs because of the TPS ban. + getOcmbDataBundle(i_chip)->iv_maskMainlineNceTce = true; +} + #endif // Hostboot Runtime only } // end namespace MemDbUtils diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemDqBitmap.C b/src/usr/diag/prdf/common/plat/mem/prdfMemDqBitmap.C index 308e25dab..5db522818 100644 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemDqBitmap.C +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemDqBitmap.C @@ -450,6 +450,9 @@ std::vector<MemSymbol> MemDqBitmap::getSymbolList( uint8_t i_portSlct ) case TYPE_MEM_PORT: symbol = dq2Symbol<TYPE_MEM_PORT>( dq, i_portSlct ); break; + case TYPE_OCMB_CHIP: + symbol = dq2Symbol<TYPE_OCMB_CHIP>(dq, i_portSlct); + break; default: PRDF_ERR( "Invalid trgt type" ); PRDF_ASSERT( false ); @@ -700,7 +703,7 @@ uint32_t MemDqBitmap::setEccSpare( uint8_t i_pins ) // Utility Functions //############################################################################## -uint32_t setDramInVpd( ExtensibleChip * i_chip, const MemRank & i_rank, +uint32_t setDramInVpd( TargetHandle_t i_trgt, const MemRank & i_rank, MemSymbol i_symbol ) { #define PRDF_FUNC "[MemDqBitmap::__setDramInVpd] " @@ -709,14 +712,12 @@ uint32_t setDramInVpd( ExtensibleChip * i_chip, const MemRank & i_rank, do { - TARGETING::TargetHandle_t trgt = i_chip->getTrgt(); - MemDqBitmap dqBitmap; - o_rc = getBadDqBitmap( trgt, i_rank, dqBitmap ); + o_rc = getBadDqBitmap( i_trgt, i_rank, dqBitmap ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "getBadDqBitmap(0x%08x, 0x%02x) failed.", - getHuid(trgt), i_rank.getKey() ); + getHuid(i_trgt), i_rank.getKey() ); break; } @@ -727,11 +728,11 @@ uint32_t setDramInVpd( ExtensibleChip * i_chip, const MemRank & i_rank, break; } - o_rc = setBadDqBitmap( trgt, i_rank, dqBitmap ); + o_rc = setBadDqBitmap( i_trgt, i_rank, dqBitmap ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "setBadDqBitmap(0x%08x, 0x%02x) failed.", - getHuid(trgt), i_rank.getKey() ); + getHuid(i_trgt), i_rank.getKey() ); break; } }while(0); @@ -743,7 +744,7 @@ uint32_t setDramInVpd( ExtensibleChip * i_chip, const MemRank & i_rank, //------------------------------------------------------------------------------ -uint32_t clearDramInVpd( ExtensibleChip * i_chip, const MemRank & i_rank, +uint32_t clearDramInVpd( TargetHandle_t i_trgt, const MemRank & i_rank, MemSymbol i_symbol ) { #define PRDF_FUNC "[MemDqBitmap::__clearDramInVpd] " @@ -752,14 +753,12 @@ uint32_t clearDramInVpd( ExtensibleChip * i_chip, const MemRank & i_rank, do { - TARGETING::TargetHandle_t trgt = i_chip->getTrgt(); - MemDqBitmap dqBitmap; - o_rc = getBadDqBitmap( trgt, i_rank, dqBitmap ); + o_rc = getBadDqBitmap( i_trgt, i_rank, dqBitmap ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "getBadDqBitmap(0x%08x, 0x%02x) failed.", - getHuid(trgt), i_rank.getKey() ); + getHuid(i_trgt), i_rank.getKey() ); break; } @@ -770,11 +769,11 @@ uint32_t clearDramInVpd( ExtensibleChip * i_chip, const MemRank & i_rank, break; } - o_rc = setBadDqBitmap( trgt, i_rank, dqBitmap ); + o_rc = setBadDqBitmap( i_trgt, i_rank, dqBitmap ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "setBadDqBitmap(0x%08x, 0x%02x) failed.", - getHuid(trgt), i_rank.getKey() ); + getHuid(i_trgt), i_rank.getKey() ); break; } }while(0); diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemDqBitmap.H b/src/usr/diag/prdf/common/plat/mem/prdfMemDqBitmap.H index b407d9835..c3648dbc5 100644 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemDqBitmap.H +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemDqBitmap.H @@ -73,7 +73,22 @@ class MemDqBitmap /** @brief Constructor from components */ MemDqBitmap( TARGETING::TargetHandle_t i_trgt, const MemRank & i_rank, BitmapData i_d ) : iv_trgt(i_trgt), iv_rank(i_rank), - iv_x4Dram(PlatServices::isDramWidthX4(i_trgt)), iv_data(i_d){} + iv_x4Dram(true), iv_data(i_d) + { + if ( TARGETING::TYPE_MEM_PORT == PlatServices::getTargetType(iv_trgt) || + TARGETING::TYPE_OCMB_CHIP == + PlatServices::getTargetType(iv_trgt) ) + { + // TODO RTC 210072 - Support multiple ports + TARGETING::TargetHandle_t dimm = + PlatServices::getConnectedDimm( iv_trgt, iv_rank ); + iv_x4Dram = PlatServices::isDramWidthX4( dimm ); + } + else + { + iv_x4Dram = PlatServices::isDramWidthX4( iv_trgt ); + } + } public: // functions @@ -224,7 +239,7 @@ class MemDqBitmap private: // instance variables - TARGETING::TargetHandle_t iv_trgt; ///< Target MBA/MCA/MEM_PORT + TARGETING::TargetHandle_t iv_trgt; ///< Target MBA/MCA/MEM_PORT/OCMB_CHIP MemRank iv_rank; ///< Target rank bool iv_x4Dram; ///< TRUE if iv_trgt uses x4 DRAMs @@ -238,20 +253,21 @@ class MemDqBitmap /** * @brief Sets the inputted dram in DRAM repairs VPD. - * @param i_chip MBA or MCA chip. + * @param i_trgt MBA, MCA, MEM_PORT, or OCMB chip. * @param i_rank Target rank. * @return Non-SUCCESS if an internal function fails. SUCCESS otherwise. */ -uint32_t setDramInVpd( ExtensibleChip * i_chip, const MemRank & i_rank, +uint32_t setDramInVpd( TARGETING::TargetHandle_t i_trgt, const MemRank & i_rank, MemSymbol i_symbol ); /** * @brief Clears the inputted dram in DRAM repairs VPD. - * @param i_chip MBA or MCA chip. + * @param i_trgt MBA, MCA, MEM_PORT, or OCMB chip. * @param i_rank Target rank. * @return Non-SUCCESS if an internal function fails. SUCCESS otherwise. */ -uint32_t clearDramInVpd( ExtensibleChip * i_chip, const MemRank & i_rank, +uint32_t clearDramInVpd( TARGETING::TargetHandle_t i_trgt, + const MemRank & i_rank, MemSymbol i_symbol ); } // end namespace PRDF diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.C b/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.C index 9869a8c08..f206a074e 100644 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.C +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.C @@ -127,6 +127,87 @@ uint32_t handleMemUe<TYPE_MCA>( ExtensibleChip * i_chip, const MemAddr & i_addr, i_chip->getHuid(), i_type ); break; } + + #ifdef __HOSTBOOT_RUNTIME + // Increment the UE counter and store the rank we're on, resetting + // the UE and CE counts if we have stopped on a new rank. + ExtensibleChip * mcb = getConnectedParent( i_chip, TYPE_MCBIST ); + McbistDataBundle * mcbdb = getMcbistDataBundle(mcb); + if ( mcbdb->iv_ceUeRank != i_addr.getRank() ) + { + mcbdb->iv_ceStopCounter.reset(); + mcbdb->iv_ueStopCounter.reset(); + } + mcbdb->iv_ueStopCounter.inc( io_sc ); + mcbdb->iv_ceUeRank = i_addr.getRank(); + #endif + } + + } while (0); + + return o_rc; + + #undef PRDF_FUNC +} + +template<> +uint32_t handleMemUe<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemAddr & i_addr, + UE_TABLE::Type i_type, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + #define PRDF_FUNC "[MemEcc::handleMemUe<TYPE_OCMB_CHIP>] " + + PRDF_ASSERT( nullptr != i_chip ); + PRDF_ASSERT( TYPE_OCMB_CHIP == i_chip->getType() ); + + uint32_t o_rc = SUCCESS; + + do + { + // First check to see if this is a side-effect UE. + SCAN_COMM_REGISTER_CLASS * fir = i_chip->getRegister("OCMB_LFIR"); + o_rc = fir->Read(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "Read() failed on OCMB_LFIR: i_chip=0x%08x", + i_chip->getHuid() ); + break; + } + + // Check OCMB_LFIR[38] to determine if this is a side-effect. + if ( fir->IsBitSet(38) ) + { + // This is a side-effect. Callout the OCMB. + PRDF_TRAC( PRDF_FUNC "Memory UE is side-effect of DDRPHY error" ); + io_sc.service_data->SetCallout( i_chip->getTrgt() ); + io_sc.service_data->setServiceCall(); + } + else + { + // Handle the memory UE. + o_rc = __handleMemUe<TYPE_OCMB_CHIP>( i_chip, i_addr, i_type, + io_sc ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "__handleMemUe(0x%08x,%d) failed", + i_chip->getHuid(), i_type ); + break; + } + + #ifdef __HOSTBOOT_RUNTIME + // Increment the UE counter and store the rank we're on, resetting + // the UE and CE counts if we have stopped on a new rank. + OcmbDataBundle * ocmbdb = getOcmbDataBundle(i_chip); + if ( ocmbdb->iv_ceUeRank != i_addr.getRank() ) + { + ocmbdb->iv_ceStopCounter.reset(); + ocmbdb->iv_ueStopCounter.reset(); + } + ocmbdb->iv_ueStopCounter.inc( io_sc ); + ocmbdb->iv_ceUeRank = i_addr.getRank(); + #endif + } } while (0); @@ -328,6 +409,52 @@ uint32_t maskMemPort<TYPE_MCA>( ExtensibleChip * i_chip ) #undef PRDF_FUNC } +template<> +uint32_t maskMemPort<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip ) +{ + #define PRDF_FUNC "[MemEcc::maskMemPort<TYPE_OCMB_CHIP>] " + + PRDF_ASSERT( nullptr != i_chip ); + PRDF_ASSERT( TYPE_OCMB_CHIP == i_chip->getType() ); + + uint32_t o_rc = SUCCESS; + + do + { + // Mask all FIRs on the OCMB in the chiplet FIRs. + SCAN_COMM_REGISTER_CLASS * chipletMask = + i_chip->getRegister("OCMB_CHIPLET_FIR_MASK"); + SCAN_COMM_REGISTER_CLASS * chipletSpaMask = + i_chip->getRegister("OCMB_CHIPLET_SPA_FIR_MASK"); + + chipletMask->setAllBits(); + chipletSpaMask->setAllBits(); + + o_rc = chipletMask->Write() | chipletSpaMask->Write(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "Write() failed on 0x%08x", i_chip->getHuid() ); + break; + } + + #ifdef __HOSTBOOT_RUNTIME + + // Dynamically deallocate the port. + if ( SUCCESS != MemDealloc::port<TYPE_OCMB_CHIP>( i_chip ) ) + { + PRDF_ERR( PRDF_FUNC "MemDealloc::port<TYPE_OCMB_CHIP>(0x%08x) " + "failed", i_chip->getHuid() ); + } + + #endif + + } while (0); + + return o_rc; + + #undef PRDF_FUNC +} + #endif // __HOSTBOOT_MODULE //------------------------------------------------------------------------------ @@ -390,6 +517,62 @@ uint32_t triggerPortFail<TYPE_MCA>( ExtensibleChip * i_chip ) #undef PRDF_FUNC } +template<> +uint32_t triggerPortFail<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip ) +{ + #define PRDF_FUNC "[MemEcc::triggerPortFail<TYPE_OCMB_CHIP>] " + + PRDF_ASSERT( nullptr != i_chip ); + PRDF_ASSERT( TYPE_OCMB_CHIP == i_chip->getType() ); + + uint32_t o_rc = SUCCESS; + + OcmbDataBundle * db = getOcmbDataBundle( i_chip ); + + do + { + // trigger a port fail + // set FARB0[59] - MBA_FARB0Q_CFG_INJECT_PARITY_ERR_CONSTANT and + // FARB0[40] - MBA_FARB0Q_CFG_INJECT_PARITY_ERR_ADDR5 + SCAN_COMM_REGISTER_CLASS * farb0 = i_chip->getRegister("FARB0"); + + o_rc = farb0->Read(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "Read() FARB0 failed: i_chip=0x%08x", + i_chip->getHuid() ); + break; + } + + farb0->SetBit(59); + farb0->SetBit(40); + + o_rc = farb0->Write(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "Write() FARB0 failed: i_chip=0x%08x", + i_chip->getHuid() ); + break; + } + + // reset thresholds to prevent issuing multiple port failures on + // the same port + for ( auto & resetTh : db->iv_iueTh ) + { + resetTh.second.reset(); + } + + db->iv_iuePortFail = true; + + break; + }while(0); + + + return o_rc; + + #undef PRDF_FUNC +} + #endif // __HOSTBOOT_RUNTIME //------------------------------------------------------------------------------ @@ -420,6 +603,30 @@ bool queryIueTh<TYPE_MCA>( ExtensibleChip * i_chip, return iueAtTh; } +template<> +bool queryIueTh<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + PRDF_ASSERT( nullptr != i_chip ); + PRDF_ASSERT( TYPE_OCMB_CHIP == i_chip->getType() ); + + bool iueAtTh = false; + + OcmbDataBundle * db = getOcmbDataBundle( i_chip ); + + // Loop through all our thresholds + for ( auto & th : db->iv_iueTh ) + { + // If threshold reached + if ( th.second.thReached(io_sc) ) + { + iueAtTh = true; + } + } + + return iueAtTh; +} + #endif //------------------------------------------------------------------------------ @@ -493,6 +700,11 @@ template uint32_t handleMpe<TYPE_MBA>( ExtensibleChip * i_chip, const MemAddr & i_addr, UE_TABLE::Type i_type, STEP_CODE_DATA_STRUCT & io_sc ); +template +uint32_t handleMpe<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemAddr & i_addr, + UE_TABLE::Type i_type, + STEP_CODE_DATA_STRUCT & io_sc ); //------------------------------------------------------------------------------ @@ -581,6 +793,10 @@ template uint32_t analyzeFetchMpe<TYPE_MBA>( ExtensibleChip * i_chip, const MemRank & i_rank, STEP_CODE_DATA_STRUCT & io_sc ); +template +uint32_t analyzeFetchMpe<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemRank & i_rank, + STEP_CODE_DATA_STRUCT & io_sc ); //------------------------------------------------------------------------------ @@ -794,6 +1010,9 @@ uint32_t analyzeFetchNceTce<TYPE_MCA>( ExtensibleChip * i_chip, template uint32_t analyzeFetchNceTce<TYPE_MBA>( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc ); +template +uint32_t analyzeFetchNceTce<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ); //------------------------------------------------------------------------------ @@ -871,6 +1090,9 @@ uint32_t analyzeFetchUe<TYPE_MCA>( ExtensibleChip * i_chip, template uint32_t analyzeFetchUe<TYPE_MBA>( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc ); +template +uint32_t analyzeFetchUe<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ); //------------------------------------------------------------------------------ @@ -955,16 +1177,97 @@ uint32_t handleMemIue<TYPE_MCA>( ExtensibleChip * i_chip, #undef PRDF_FUNC } -//------------------------------------------------------------------------------ - template<> -uint32_t analyzeMainlineIue<TYPE_MCA>( ExtensibleChip * i_chip, +uint32_t handleMemIue<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemRank & i_rank, STEP_CODE_DATA_STRUCT & io_sc ) { + #define PRDF_FUNC "[MemEcc::handleMemIue] " + + PRDF_ASSERT( nullptr != i_chip ); + PRDF_ASSERT( TYPE_OCMB_CHIP == i_chip->getType() ); + + uint32_t o_rc = SUCCESS; + + // Add the DIMM to the callout list. + MemoryMru mm { i_chip->getTrgt(), i_rank, MemoryMruData::CALLOUT_RANK }; + io_sc.service_data->SetCallout( mm ); + + #ifdef __HOSTBOOT_MODULE + + do + { + // Nothing else to do if handling a system checkstop. + if ( CHECK_STOP == io_sc.service_data->getPrimaryAttnType() ) break; + + // Get the data bundle from chip. + OcmbDataBundle * db = getOcmbDataBundle( i_chip ); + + // If we have already caused a port fail, mask the IUE bits. + if ( true == db->iv_iuePortFail ) + { + SCAN_COMM_REGISTER_CLASS * mask_or = + i_chip->getRegister("RDFFIR_MASK_OR"); + + mask_or->SetBit(17); + mask_or->SetBit(37); + + o_rc = mask_or->Write(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "Write() failed on 0x%08x", + i_chip->getHuid() ); + break; + } + } + + // Get the DIMM select. + uint8_t ds = i_rank.getDimmSlct(); + + // Initialize threshold if it doesn't exist yet. + if ( 0 == db->iv_iueTh.count(ds) ) + { + db->iv_iueTh[ds] = TimeBasedThreshold( getIueTh() ); + } + + // Increment the count and check if at threshold. + if ( db->iv_iueTh[ds].inc(io_sc) ) + { + // Make the error log predictive. + io_sc.service_data->setServiceCall(); + + // The port fail will be triggered in the PostAnalysis plugin after + // the error log has been committed. + + // Mask off the entire port to avoid collateral. + o_rc = MemEcc::maskMemPort<TYPE_OCMB_CHIP>( i_chip ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "MemEcc::maskMemPort(0x%08x) failed", + i_chip->getHuid() ); + break; + } + } + + } while (0); + + #endif // __HOSTBOOT_MODULE + + return o_rc; + + #undef PRDF_FUNC +} + +//------------------------------------------------------------------------------ + +template<TARGETING::TYPE T> +uint32_t analyzeMainlineIue( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ) +{ #define PRDF_FUNC "[MemEcc::analyzeMainlineIue] " PRDF_ASSERT( nullptr != i_chip ); - PRDF_ASSERT( TYPE_MCA == i_chip->getType() ); + PRDF_ASSERT( T == i_chip->getType() ); uint32_t o_rc = SUCCESS; @@ -974,7 +1277,7 @@ uint32_t analyzeMainlineIue<TYPE_MCA>( ExtensibleChip * i_chip, // not likely that we will have two independent failure modes at the // same time. So we just assume the address is correct. MemAddr addr; - o_rc = getMemReadAddr<TYPE_MCA>( i_chip, MemAddr::READ_RCE_ADDR, addr ); + o_rc = getMemReadAddr<T>( i_chip, MemAddr::READ_RCE_ADDR, addr ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "getMemReadAddr(0x%08x, READ_RCE_ADDR) failed", @@ -983,7 +1286,7 @@ uint32_t analyzeMainlineIue<TYPE_MCA>( ExtensibleChip * i_chip, } MemRank rank = addr.getRank(); - o_rc = handleMemIue<TYPE_MCA>( i_chip, rank, io_sc ); + o_rc = handleMemIue<T>( i_chip, rank, io_sc ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "handleMemIue(0x%08x,m%ds%d) failed", @@ -998,16 +1301,23 @@ uint32_t analyzeMainlineIue<TYPE_MCA>( ExtensibleChip * i_chip, #undef PRDF_FUNC } +template +uint32_t analyzeMainlineIue<TYPE_MCA>( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ); +template +uint32_t analyzeMainlineIue<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ); + //------------------------------------------------------------------------------ -template<> -uint32_t analyzeMaintIue<TYPE_MCA>( ExtensibleChip * i_chip, - STEP_CODE_DATA_STRUCT & io_sc ) +template<TARGETING::TYPE T> +uint32_t analyzeMaintIue( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ) { #define PRDF_FUNC "[MemEcc::analyzeMaintIue] " PRDF_ASSERT( nullptr != i_chip ); - PRDF_ASSERT( TYPE_MCA == i_chip->getType() ); + PRDF_ASSERT( T == i_chip->getType() ); uint32_t o_rc = SUCCESS; @@ -1015,7 +1325,7 @@ uint32_t analyzeMaintIue<TYPE_MCA>( ExtensibleChip * i_chip, { // Use the current address in the MCBMCAT. MemAddr addr; - o_rc = getMemMaintAddr<TYPE_MCA>( i_chip, addr ); + o_rc = getMemMaintAddr<T>( i_chip, addr ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "getMemMaintAddr(0x%08x) failed", @@ -1024,7 +1334,7 @@ uint32_t analyzeMaintIue<TYPE_MCA>( ExtensibleChip * i_chip, } MemRank rank = addr.getRank(); - o_rc = handleMemIue<TYPE_MCA>( i_chip, rank, io_sc ); + o_rc = handleMemIue<T>( i_chip, rank, io_sc ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "handleMemIue(0x%08x,m%ds%d) failed", @@ -1039,6 +1349,13 @@ uint32_t analyzeMaintIue<TYPE_MCA>( ExtensibleChip * i_chip, #undef PRDF_FUNC } +template +uint32_t analyzeMaintIue<TYPE_MCA>( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ); +template +uint32_t analyzeMaintIue<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ); + //------------------------------------------------------------------------------ template<> @@ -1152,6 +1469,117 @@ uint32_t analyzeImpe<TYPE_MCA>( ExtensibleChip * i_chip, #undef PRDF_FUNC } +template<> +uint32_t analyzeImpe<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + + #define PRDF_FUNC "[MemEcc::analyzeImpe] " + + PRDF_ASSERT( TYPE_OCMB_CHIP == i_chip->getType() ); + + uint32_t o_rc = SUCCESS; + + do + { + // get the mark shadow register + SCAN_COMM_REGISTER_CLASS * msr = i_chip->getRegister("EXP_MSR"); + + o_rc = msr->Read(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "Read() failed on EXP_MSR: i_chip=0x%08x", + i_chip->getHuid() ); + break; + } + + TargetHandle_t trgt = i_chip->getTrgt(); + + // get galois field code - bits 8:15 of MSR + uint8_t galois = msr->GetBitFieldJustified( 8, 8 ); + + // get rank - bits 16:18 of MSR + uint8_t mrnk = msr->GetBitFieldJustified( 16, 3 ); + MemRank rank( mrnk ); + + // get symbol and DRAM + MemSymbol symbol = MemSymbol::fromGalois( trgt, rank, galois ); + if ( !symbol.isValid() ) + { + PRDF_ERR( PRDF_FUNC "Galois 0x%02x from EXP_MSR is invalid: 0x%08x," + "0x%02x", galois, i_chip->getHuid(), rank.getKey() ); + o_rc = FAIL; + break; + } + + // Add the DIMM to the callout list + MemoryMru memmru( trgt, rank, MemoryMruData::CALLOUT_RANK ); + io_sc.service_data->SetCallout( memmru ); + + #ifdef __HOSTBOOT_MODULE + // get data bundle from chip + OcmbDataBundle * db = getOcmbDataBundle( i_chip ); + uint8_t dram = symbol.getDram(); + + // Increment the count and check threshold. + if ( db->getImpeThresholdCounter()->inc(rank, dram, io_sc) ) + { + // Make the error log predictive if DRAM Repairs are disabled or if + // the number of DRAMs on this rank with IMPEs has reached threshold + if ( areDramRepairsDisabled() || + db->getImpeThresholdCounter()->queryDrams(rank, dram, io_sc) ) + { + io_sc.service_data->setServiceCall(); + } + else // Otherwise, place a chip mark on the failing DRAM. + { + MemMark chipMark( trgt, rank, galois ); + o_rc = MarkStore::writeChipMark<TYPE_OCMB_CHIP>( i_chip, rank, + chipMark ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "writeChipMark(0x%08x,0x%02x) failed", + i_chip->getHuid(), rank.getKey() ); + break; + } + + o_rc = MarkStore::chipMarkCleanup<TYPE_OCMB_CHIP>( i_chip, rank, + io_sc ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "chipMarkCleanup(0x%08x,0x%02x) failed", + i_chip->getHuid(), rank.getKey() ); + break; + } + } + } + + // If a predictive callout is made, mask both mainline and maintenance + // attentions. + if ( io_sc.service_data->queryServiceCall() ) + { + SCAN_COMM_REGISTER_CLASS * mask + = i_chip->getRegister( "RDFFIR_MASK_OR" ); + mask->SetBit(19); // mainline + mask->SetBit(39); // maintenance + o_rc = mask->Write(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "Write() failed on RDFFIR_MASK_OR: " + "0x%08x", i_chip->getHuid() ); + break; + } + } + #endif // __HOSTBOOT_MODULE + + } while (0); + + + return o_rc; + + #undef PRDF_FUNC +} + //------------------------------------------------------------------------------ template<> diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.H b/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.H index 735ae436f..0fd71dd8b 100644 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.H +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.H @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2016,2018 */ +/* Contributors Listed Below - COPYRIGHT 2016,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -51,7 +51,7 @@ namespace MemEcc * @brief Adds the memory CE to the callout list and CE table. Will also issue * dynamic memory deallocation when appropriate. Returns true if TPS is * required. - * @param i_chip MCA or MBA. + * @param i_chip MCA, MBA, or OCMB. * @param i_addr Failed address. * @param i_symbol Failed symbol. * @param o_doTps True if TPS is required. False otherwise. @@ -74,7 +74,7 @@ uint32_t handleMemCe( ExtensibleChip * i_chip, const MemAddr & i_addr, * of the DIMMs, the UE table will not be updated and no dynamic memory * deallocation. * - * @param i_chip MCA or MBA. + * @param i_chip MCA, OCMB, or MBA. * @param i_addr Failed address. * @param i_type The type of UE. * @param io_sc The step code data struct. @@ -96,7 +96,7 @@ uint32_t handleMemUe( ExtensibleChip * i_chip, const MemAddr & i_addr, * the port failure is issued in the PostAnalysis plugin after the error log has * been committed. * - * @param i_chip MCA chip. + * @param i_chip MCA or OCMB chip. * @param i_rank Rank containing the IUE. * @param io_sc The step code data struct. * @return Non-SUCCESS if an interal function fails, SUCCESS otherwise. @@ -107,7 +107,7 @@ uint32_t handleMemIue( ExtensibleChip * i_chip, const MemRank & i_rank, /** * @brief Handles a MPE attention. - * @param i_chip MCA or MBA. + * @param i_chip MCA, OCMB, or MBA. * @param i_addr Failed address. * @param i_type The type of UE. * @param io_sc The step code data struct. @@ -119,7 +119,7 @@ uint32_t handleMpe( ExtensibleChip * i_chip, const MemAddr & i_addr, /** * @brief Handles a MPE attention. - * @param i_chip MCA or MBA. + * @param i_chip MCA, OCMB, or MBA. * @param i_rank Target rank. * @param i_type The type of UE. * @param io_sc The step code data struct. @@ -135,7 +135,7 @@ uint32_t handleMpe( ExtensibleChip * i_chip, const MemRank & i_rank, /** * @brief Analyzes a fetch MPE attention. - * @param i_chip MCA or MBA. + * @param i_chip MCA, OCMB, or MBA. * @param i_rank Target rank. * @param io_sc The step code data struct. * @return Non-SUCCESS if an interal function fails, SUCCESS otherwise. @@ -146,7 +146,7 @@ uint32_t analyzeFetchMpe( ExtensibleChip * i_chip, const MemRank & i_rank, /** * @brief Analyzes a fetch NCE/TCE attention. - * @param i_chip MCA or MBA. + * @param i_chip MCA, OCMB, or MBA. * @param io_sc The step code data struct. * @return Non-SUCCESS if an interal function fails, SUCCESS otherwise. */ @@ -156,7 +156,7 @@ uint32_t analyzeFetchNceTce( ExtensibleChip * i_chip, /** * @brief Analyzes a fetch UE attention. - * @param i_chip MCA or MBA. + * @param i_chip MCA, OCMB, or MBA. * @param io_sc The step code data struct. * @return Non-SUCCESS if an interal function fails, SUCCESS otherwise. */ @@ -166,7 +166,7 @@ uint32_t analyzeFetchUe( ExtensibleChip * i_chip, /** * @brief Analyzes a fetch mainline IUE attention. - * @param i_chip MCA or MBA. + * @param i_chip MCA, OCMB, or MBA. * @param io_sc The step code data struct. * @return Non-SUCCESS if an interal function fails, SUCCESS otherwise. */ @@ -177,7 +177,7 @@ uint32_t analyzeMainlineIue( ExtensibleChip * i_chip, /** * @brief Analyzes a fetch maint IUE attention. - * @param i_chip MCA or MBA. + * @param i_chip MCA, OCMB, or MBA. * @param io_sc The step code data struct. * @return Non-SUCCESS if an interal function fails, SUCCESS otherwise. */ @@ -187,7 +187,7 @@ uint32_t analyzeMaintIue( ExtensibleChip * i_chip, /** * @brief Analyzes a maint or mainline IMPE attention. - * @param i_chip MCA or MBA. + * @param i_chip MCA, OCMB, or MBA. * @param io_sc The step code data struct. * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. */ @@ -208,7 +208,7 @@ uint32_t analyzeFetchRcePue( ExtensibleChip * i_chip, /** * @brief Will trigger a port fail. - * @param i_chip MCA chip + * @param i_chip MCA/OCMB chip * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise */ template<TARGETING::TYPE T> @@ -221,7 +221,7 @@ uint32_t triggerPortFail( ExtensibleChip * i_chip ); /** * @brief Will query the data bundle and return if the IUE threshold has been * reached. - * @param i_chip MCA chip + * @param i_chip MCA/OCMB chip * @param io_sc The step code data struct. * @return True if IUE threshold is reached, false if not. */ @@ -231,7 +231,7 @@ bool queryIueTh( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc ); /** * @brief Will mask off an entire memory port. At runtime will issue dynamic * memory deallocation of the port. - * @param i_chip MCA chip + * @param i_chip MCA/OCMB chip * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise */ template<TARGETING::TYPE T> diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemExtraSig.H b/src/usr/diag/prdf/common/plat/mem/prdfMemExtraSig.H index 08b79922e..7bcf0e573 100644 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemExtraSig.H +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemExtraSig.H @@ -88,7 +88,7 @@ PRDR_ERROR_SIGNATURE(VttLost, 0xffff0084, "", "NVDIMM VTT Lost"); PRDR_ERROR_SIGNATURE(NotSelfRefr, 0xffff0085, "", "NVDIMM Dram Not Self Refresh"); PRDR_ERROR_SIGNATURE(CtrlHwErr, 0xffff0086, "", "NVDIMM Controller Hardware Error"); PRDR_ERROR_SIGNATURE(NvmCtrlErr, 0xffff0087, "", "NVDIMM NVM Controller Error"); -PRDR_ERROR_SIGNATURE(NvmLifeErr, 0xffff0088, "", "NVDIMM NVM Lifetime Error"); +PRDR_ERROR_SIGNATURE(NvmLifeErr, 0xffff0088, "", "NVDIMM Final NVM Lifetime Error"); PRDR_ERROR_SIGNATURE(InsuffEnergy, 0xffff0089, "", "NVDIMM Not enough energy for CSAVE"); PRDR_ERROR_SIGNATURE(InvFwErr, 0xffff008A, "", "NVDIMM Invalid Firmware Error"); @@ -98,8 +98,22 @@ PRDR_ERROR_SIGNATURE(EsPolNotSet, 0xffff008D, "", "NVDIMM Energy Source Policy PRDR_ERROR_SIGNATURE(EsHwFail, 0xffff008E, "", "NVDIMM Energy Source Hardware Fail"); PRDR_ERROR_SIGNATURE(EsHlthAssess, 0xffff008F, "", "NVDIMM Energy Source Health Assessment Error"); -PRDR_ERROR_SIGNATURE(EsLifeErr, 0xffff0090, "", "NVDIMM Energy Source Lifetime Error"); -PRDR_ERROR_SIGNATURE(EsTmpErr, 0xffff0091, "", "NVDIMM Energy Source Temp Error"); +PRDR_ERROR_SIGNATURE(EsLifeErr, 0xffff0090, "", "NVDIMM Final Energy Source Lifetime Error"); +PRDR_ERROR_SIGNATURE(EsTmpErrHigh, 0xffff0091, "", "NVDIMM Energy Source Temperature Error - High Temp Threshold"); +PRDR_ERROR_SIGNATURE(EsTmpErrLow, 0xffff0092, "", "NVDIMM Energy Source Temperature Error - Low Temp Threshold"); + +PRDR_ERROR_SIGNATURE(NvmLifeWarn1, 0xffff0093, "", "NVDIMM First NVM Lifetime Warning"); +PRDR_ERROR_SIGNATURE(NvmLifeWarn2, 0xffff0094, "", "NVDIMM Second NVM Lifetime Warning"); +PRDR_ERROR_SIGNATURE(EsLifeWarn1, 0xffff0095, "", "NVDIMM First Energy Source Lifetime Warning"); +PRDR_ERROR_SIGNATURE(EsLifeWarn2, 0xffff0096, "", "NVDIMM Second Energy Source Lifetime Warning"); +PRDR_ERROR_SIGNATURE(EsTmpWarnHigh, 0xffff0097, "", "NVDIMM Energy Source Temperature Warning - High Temp Threshold"); +PRDR_ERROR_SIGNATURE(EsTmpWarnLow, 0xffff0098, "", "NVDIMM Energy Source Temperature Warning - Low Temp Threshold"); +PRDR_ERROR_SIGNATURE(BelowWarnTh, 0xffff0099, "", "NVDIMM Below Warning Threshold"); +PRDR_ERROR_SIGNATURE(IntNvdimmErr, 0xffff009A, "", "NVDIMM Intermittent error"); +PRDR_ERROR_SIGNATURE(NotifStatErr, 0xffff009B, "", "NVDIMM Set Event Notification Status Error"); +PRDR_ERROR_SIGNATURE(FirEvntGone, 0xffff009C, "", "NVDIMM Event Triggering the FIR no longer present"); +PRDR_ERROR_SIGNATURE(EsTmpWarnFa, 0xffff009D, "", "NVDIMM Energy Source Temperature Warning - False Alarm"); +PRDR_ERROR_SIGNATURE(EsTmpErrFa, 0xffff009E, "", "NVDIMM Energy Source Temperature Error - False Alarm"); #endif // __prdfMemExtraSig_H diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemMark.C b/src/usr/diag/prdf/common/plat/mem/prdfMemMark.C index 83bff1876..e43d844c4 100644 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemMark.C +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemMark.C @@ -46,7 +46,7 @@ namespace MarkStore { //############################################################################## -// Utilities to read/write markstore (MCA) +// Utilities to read/write markstore //############################################################################## // - We have the ability to set chip marks via the FWMSx registers, but there @@ -62,15 +62,19 @@ namespace MarkStore // mark per master rank. This matches the P8 behavior. This could be improved // upon later if we have the time, but doubtful. // - Summary: -// - Chip marks will use HWMS0-7 registers (0x07010AD0-0x07010AD7). -// - Symbol marks will use FWMS0-7 registers (0x07010AD8-0x07010ADF). +// - Chip marks will use HWMS0-7 registers: +// Nimbus: (0x07010AD0-0x07010AD7) +// Axone: (0x08011C10-0x08011C17) +// - Symbol marks will use FWMS0-7 registers: +// Nimbus: (0x07010AD8-0x07010ADF) +// Axone: (0x08011C18-0x08011C1F) // - Each register maps to master ranks 0-7. -template<> -uint32_t readChipMark<TYPE_MCA>( ExtensibleChip * i_chip, - const MemRank & i_rank, MemMark & o_mark ) +template<TARGETING::TYPE T> +uint32_t readChipMark( ExtensibleChip * i_chip, const MemRank & i_rank, + MemMark & o_mark ) { - #define PRDF_FUNC "[readChipMark<TYPE_MCA>] " + #define PRDF_FUNC "[readChipMark<T>] " uint32_t o_rc = SUCCESS; o_mark = MemMark(); // ensure invalid @@ -110,14 +114,21 @@ uint32_t readChipMark<TYPE_MCA>( ExtensibleChip * i_chip, #undef PRDF_FUNC } +template +uint32_t readChipMark<TYPE_MCA>( ExtensibleChip * i_chip, + const MemRank & i_rank, MemMark & o_mark ); +template +uint32_t readChipMark<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemRank & i_rank, + MemMark & o_mark ); + //------------------------------------------------------------------------------ -template<> -uint32_t writeChipMark<TYPE_MCA>( ExtensibleChip * i_chip, - const MemRank & i_rank, - const MemMark & i_mark ) +template<TARGETING::TYPE T> +uint32_t writeChipMark( ExtensibleChip * i_chip, const MemRank & i_rank, + const MemMark & i_mark ) { - #define PRDF_FUNC "[writeChipMark<TYPE_MCA>] " + #define PRDF_FUNC "[writeChipMark<T>] " PRDF_ASSERT( i_mark.isValid() ); @@ -153,13 +164,21 @@ uint32_t writeChipMark<TYPE_MCA>( ExtensibleChip * i_chip, #undef PRDF_FUNC } +template +uint32_t writeChipMark<TYPE_MCA>( ExtensibleChip * i_chip, + const MemRank & i_rank, + const MemMark & i_mark ); +template +uint32_t writeChipMark<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemRank & i_rank, + const MemMark & i_mark ); + //------------------------------------------------------------------------------ -template<> -uint32_t clearChipMark<TYPE_MCA>( ExtensibleChip * i_chip, - const MemRank & i_rank ) +template<TARGETING::TYPE T> +uint32_t clearChipMark( ExtensibleChip * i_chip, const MemRank & i_rank ) { - #define PRDF_FUNC "[clearChipMark<TYPE_MCA>] " + #define PRDF_FUNC "[clearChipMark<T>] " uint32_t o_rc = SUCCESS; @@ -185,13 +204,20 @@ uint32_t clearChipMark<TYPE_MCA>( ExtensibleChip * i_chip, #undef PRDF_FUNC } +template +uint32_t clearChipMark<TYPE_MCA>( ExtensibleChip * i_chip, + const MemRank & i_rank ); +template +uint32_t clearChipMark<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemRank & i_rank ); + //------------------------------------------------------------------------------ -template<> -uint32_t readSymbolMark<TYPE_MCA>( ExtensibleChip * i_chip, - const MemRank & i_rank, MemMark & o_mark ) +template<TARGETING::TYPE T> +uint32_t readSymbolMark( ExtensibleChip * i_chip, + const MemRank & i_rank, MemMark & o_mark ) { - #define PRDF_FUNC "[readSymbolMark<TYPE_MCA>] " + #define PRDF_FUNC "[readSymbolMark<T>] " uint32_t o_rc = SUCCESS; o_mark = MemMark(); // ensure invalid @@ -247,14 +273,21 @@ uint32_t readSymbolMark<TYPE_MCA>( ExtensibleChip * i_chip, #undef PRDF_FUNC } +template +uint32_t readSymbolMark<TYPE_MCA>( ExtensibleChip * i_chip, + const MemRank & i_rank, MemMark & o_mark ); +template +uint32_t readSymbolMark<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemRank & i_rank, + MemMark & o_mark ); + //------------------------------------------------------------------------------ -template<> -uint32_t writeSymbolMark<TYPE_MCA>( ExtensibleChip * i_chip, - const MemRank & i_rank, - const MemMark & i_mark ) +template<TARGETING::TYPE T> +uint32_t writeSymbolMark( ExtensibleChip * i_chip, const MemRank & i_rank, + const MemMark & i_mark ) { - #define PRDF_FUNC "[writeSymbolMark<TYPE_MCA>] " + #define PRDF_FUNC "[writeSymbolMark<T>] " PRDF_ASSERT( i_mark.isValid() ); @@ -294,36 +327,47 @@ uint32_t writeSymbolMark<TYPE_MCA>( ExtensibleChip * i_chip, msName, i_chip->getHuid() ); } - // Nimbus symbol mark performance workaround - // When a symbol mark is placed at runtime - #ifdef __HOSTBOOT_RUNTIME + // Nimbus only symbol mark performance workaround + if ( T == TYPE_MCA ) + { + // When a symbol mark is placed at runtime + #ifdef __HOSTBOOT_RUNTIME - // Trigger WAT logic to 'disable bypass' - // Get the ECC Debug/WAT Control register - SCAN_COMM_REGISTER_CLASS * dbgr = i_chip->getRegister( "DBGR" ); + // Trigger WAT logic to 'disable bypass' + // Get the ECC Debug/WAT Control register + SCAN_COMM_REGISTER_CLASS * dbgr = i_chip->getRegister( "DBGR" ); - // Set DBGR[8] = 0b1 - dbgr->SetBit( 8 ); - o_rc = dbgr->Write(); - if ( SUCCESS != o_rc ) - { - PRDF_ERR( PRDF_FUNC "Write() failed on DBGR: mca=0x%08x", - i_chip->getHuid() ); + // Set DBGR[8] = 0b1 + dbgr->SetBit( 8 ); + o_rc = dbgr->Write(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "Write() failed on DBGR: mca=0x%08x", + i_chip->getHuid() ); + } + #endif } - #endif return o_rc; #undef PRDF_FUNC } +template +uint32_t writeSymbolMark<TYPE_MCA>( ExtensibleChip * i_chip, + const MemRank & i_rank, + const MemMark & i_mark ); +template +uint32_t writeSymbolMark<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemRank & i_rank, + const MemMark & i_mark ); + //------------------------------------------------------------------------------ -template<> -uint32_t clearSymbolMark<TYPE_MCA>( ExtensibleChip * i_chip, - const MemRank & i_rank ) +template<TARGETING::TYPE T> +uint32_t clearSymbolMark( ExtensibleChip * i_chip, const MemRank & i_rank ) { - #define PRDF_FUNC "[clearSymbolMark<TYPE_MCA>] " + #define PRDF_FUNC "[clearSymbolMark<T>] " uint32_t o_rc = SUCCESS; @@ -349,6 +393,13 @@ uint32_t clearSymbolMark<TYPE_MCA>( ExtensibleChip * i_chip, #undef PRDF_FUNC } +template +uint32_t clearSymbolMark<TYPE_MCA>( ExtensibleChip * i_chip, + const MemRank & i_rank ); +template +uint32_t clearSymbolMark<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemRank & i_rank ); + //############################################################################## // Utilities to read/write markstore (MBA) //############################################################################## @@ -958,7 +1009,7 @@ void __addCallout( ExtensibleChip * i_chip, const MemRank & i_rank, //------------------------------------------------------------------------------ template<TARGETING::TYPE T> -uint32_t __addRowRepairCallout( ExtensibleChip * i_chip, +uint32_t __addRowRepairCallout( TargetHandle_t i_trgt, const MemRank & i_rank, STEP_CODE_DATA_STRUCT & io_sc ) { @@ -967,7 +1018,7 @@ uint32_t __addRowRepairCallout( ExtensibleChip * i_chip, uint32_t o_rc = SUCCESS; // Get the dimms on this rank on either port. - TargetHandleList dimmList = getConnectedDimms( i_chip->getTrgt(), i_rank ); + TargetHandleList dimmList = getConnectedDimms( i_trgt, i_rank ); // Check for row repairs on each dimm. for ( auto const & dimm : dimmList ) @@ -1073,8 +1124,8 @@ uint32_t __applyRasPolicies<TYPE_MBA>( ExtensibleChip * i_chip, __addCallout( i_chip, i_rank, ecc, io_sc ); // Add the row repairs to the callout list if they exist - o_rc = __addRowRepairCallout<TARGETING::TYPE_MBA>( i_chip, i_rank, - io_sc ); + o_rc = __addRowRepairCallout<TARGETING::TYPE_MBA>( + i_chip->getTrgt(), i_rank, io_sc ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "__addRowRepairCallout(0x%08x,0x%02x) " @@ -1136,6 +1187,125 @@ uint32_t __applyRasPolicies<TYPE_MBA>( ExtensibleChip * i_chip, #undef PRDF_FUNC } +template<> +uint32_t __applyRasPolicies<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemRank & i_rank, + STEP_CODE_DATA_STRUCT & io_sc, + const MemMark & i_chipMark, + const MemMark & i_symMark, + TdEntry * & o_dsdEvent, + bool & o_allRepairsUsed ) +{ + #define PRDF_FUNC "[__applyRasPolicies<TYPE_OCMB_CHIP>] " + + uint32_t o_rc = SUCCESS; + + do + { + const uint8_t ps = i_chipMark.getSymbol().getPortSlct(); + const uint8_t dram = i_chipMark.getSymbol().getDram(); + + TargetHandle_t memPort = getConnectedChild( i_chip->getTrgt(), + TYPE_MEM_PORT, ps ); + + TargetHandle_t dimmTrgt = getConnectedDimm( memPort, i_rank, ps ); + + const bool isX4 = isDramWidthX4( dimmTrgt ); + + // Determine if DRAM sparing is enabled. + bool isEnabled = false; + o_rc = isDramSparingEnabled<TYPE_MEM_PORT>( memPort, i_rank, ps, + isEnabled ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "isDramSparingEnabled() failed." ); + break; + } + + if ( isEnabled ) + { + // Sparing is enabled. Get the current spares in hardware. + MemSymbol sp0, sp1, ecc; + o_rc = mssGetSteerMux<TARGETING::TYPE_OCMB_CHIP>( i_chip->getTrgt(), + i_rank, sp0, sp1, + ecc ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "mssGetSteerMux(0x%08x,0x%02x) failed", + i_chip->getHuid(), i_rank.getKey() ); + break; + } + + // Add the spares to the callout list if they exist. + __addCallout( i_chip, i_rank, sp0, io_sc ); + __addCallout( i_chip, i_rank, sp1, io_sc ); + __addCallout( i_chip, i_rank, ecc, io_sc ); + + // Add the row repairs to the callout list if they exist + o_rc = __addRowRepairCallout<TARGETING::TYPE_OCMB_CHIP>( memPort, + i_rank, + io_sc ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "__addRowRepairCallout(0x%08x,0x%02x) " + "failed.", i_chip->getHuid(), i_rank.getKey() ); + break; + } + + // If the chip mark is on a spare then the spare is bad and hardware + // can not steer it to another DRAM even if one is available (e.g. + // the ECC spare). In this this case, make error log predictive. + if ( ( (0 == ps) && sp0.isValid() && (dram == sp0.getDram()) ) || + ( (1 == ps) && sp1.isValid() && (dram == sp1.getDram()) ) || + ( isX4 && ecc.isValid() && (dram == ecc.getDram()) ) ) + { + o_allRepairsUsed = true; + io_sc.service_data->setSignature( i_chip->getHuid(), + PRDFSIG_VcmBadSpare ); + break; // Nothing more to do. + } + + // Certain DIMMs may have had spares intentially made unavailable by + // the manufacturer. Check the VPD for available spares. + bool spAvail, eccAvail; + o_rc = isSpareAvailable<TYPE_MEM_PORT>( memPort, i_rank, + ps, spAvail, eccAvail ); + if ( spAvail ) + { + // A spare DRAM is available. + o_dsdEvent = new DsdEvent<TYPE_OCMB_CHIP>{ i_chip, i_rank, + i_chipMark }; + } + else if ( eccAvail ) + { + // The ECC spare is available. + o_dsdEvent = new DsdEvent<TYPE_OCMB_CHIP>{ i_chip, i_rank, + i_chipMark, true }; + } + else + { + // Chip mark is in place and sparing is not possible. + o_allRepairsUsed = true; + io_sc.service_data->setSignature( i_chip->getHuid(), + PRDFSIG_AllDramRepairs ); + } + } + // There is no DRAM sparing so simply check if both the chip and symbol + // mark have been used. + else if ( i_chipMark.isValid() && i_symMark.isValid() ) + { + o_allRepairsUsed = true; + io_sc.service_data->setSignature( i_chip->getHuid(), + PRDFSIG_AllDramRepairs ); + } + + } while (0); + + return o_rc; + + #undef PRDF_FUNC +} + //------------------------------------------------------------------------------ template<TARGETING::TYPE T> @@ -1220,6 +1390,9 @@ uint32_t applyRasPolicies( ExtensibleChip * i_chip, const MemRank & i_rank, { io_sc.service_data->setServiceCall(); + // We want to try to avoid garding NVDIMMs, so clear gard for them now. + io_sc.service_data->clearNvdimmMruListGard(); + #ifdef __HOSTBOOT_RUNTIME // No more repairs left so no point doing any more TPS procedures. MemDbUtils::banTps<T>( i_chip, i_rank ); @@ -1241,6 +1414,11 @@ uint32_t applyRasPolicies<TYPE_MBA>( ExtensibleChip * i_chip, const MemRank & i_rank, STEP_CODE_DATA_STRUCT & io_sc, TdEntry * & o_dsdEvent ); +template +uint32_t applyRasPolicies<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemRank & i_rank, + STEP_CODE_DATA_STRUCT & io_sc, + TdEntry * & o_dsdEvent ); //------------------------------------------------------------------------------ @@ -1290,7 +1468,8 @@ uint32_t chipMarkCleanup( ExtensibleChip * i_chip, const MemRank & i_rank, // Set the chip mark in the DRAM Repairs VPD. if ( !areDramRepairsDisabled() ) { - o_rc = setDramInVpd( i_chip, i_rank, chipMark.getSymbol() ); + o_rc = setDramInVpd( i_chip->getTrgt(), i_rank, + chipMark.getSymbol() ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "setDramInVpd(0x%08x,0x%02x) failed", @@ -1314,6 +1493,10 @@ template uint32_t chipMarkCleanup<TYPE_MBA>( ExtensibleChip * i_chip, const MemRank & i_rank, STEP_CODE_DATA_STRUCT & io_sc ); +template +uint32_t chipMarkCleanup<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemRank & i_rank, + STEP_CODE_DATA_STRUCT & io_sc ); #endif // not supported on FSP diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemMark.H b/src/usr/diag/prdf/common/plat/mem/prdfMemMark.H index 2cd28b8dd..86ffa1dc9 100644 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemMark.H +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemMark.H @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2016,2018 */ +/* Contributors Listed Below - COPYRIGHT 2016,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -57,7 +57,7 @@ class MemMark /** * @brief Constructor from components. - * @param i_trgt MBA or MCA target. + * @param i_trgt MBA, MCA, or OCMB target. * @param i_rank The rank this mark is on. * @param i_galois The Galois field. */ @@ -68,7 +68,7 @@ class MemMark /** * @brief Constructor from components. - * @param i_trgt MBA or MCA target. + * @param i_trgt MBA, MCA, or OCMB target. * @param i_rank The rank this mark is on. * @param i_symbol The symbol representing this mark. */ @@ -112,7 +112,7 @@ namespace MarkStore /** * @brief Reads markstore and returns the chip mark for the given rank. - * @param i_chip MBA or MCA chip. + * @param i_chip MBA, MCA, or OCMB chip. * @param i_rank Target rank. * @param o_mark The returned chip mark. * @return Non-SUCCESS if an internal function fails. SUCCESS otherwise. @@ -123,7 +123,7 @@ uint32_t readChipMark( ExtensibleChip * i_chip, const MemRank & i_rank, /** * @brief Writes a chip mark into markstore for the given rank. - * @param i_chip MBA or MCA chip. + * @param i_chip MBA, MCA, or OCMB chip. * @param i_rank Target rank. * @param i_mark Target chip mark. * @return Non-SUCCESS if an internal function fails. SUCCESS otherwise. @@ -134,7 +134,7 @@ uint32_t writeChipMark( ExtensibleChip * i_chip, const MemRank & i_rank, /** * @brief Clear chip mark in markstore for the given rank. - * @param i_chip MBA or MCA chip. + * @param i_chip MBA, MCA, or OCMB chip. * @param i_rank Target rank. * @return Non-SUCCESS if an internal function fails. SUCCESS otherwise. */ @@ -143,7 +143,7 @@ uint32_t clearChipMark( ExtensibleChip * i_chip, const MemRank & i_rank ); /** * @brief Reads markstore and returns the symbol mark for the given rank. - * @param i_chip MBA or MCA chip. + * @param i_chip MBA, MCA. or OCMB chip. * @param i_rank Target rank. * @param o_mark The returned symbol mark. * @return Non-SUCCESS if an internal function fails. SUCCESS otherwise. @@ -154,7 +154,7 @@ uint32_t readSymbolMark( ExtensibleChip * i_chip, const MemRank & i_rank, /** * @brief Writes a symbol mark into markstore for the given rank. - * @param i_chip MBA or MCA chip. + * @param i_chip MBA, MCA, or OCMB chip. * @param i_rank Target rank. * @param i_mark Target symbol mark. * @return Non-SUCCESS if an internal function fails. SUCCESS otherwise. @@ -165,7 +165,7 @@ uint32_t writeSymbolMark( ExtensibleChip * i_chip, const MemRank & i_rank, /** * @brief Clear symbol mark in markstore for the given rank. - * @param i_chip MBA or MCA chip. + * @param i_chip MBA, MCA. or OCMB chip. * @param i_rank Target rank. * @return Non-SUCCESS if an internal function fails. SUCCESS otherwise. */ @@ -187,7 +187,7 @@ uint32_t clearSymbolMark( ExtensibleChip * i_chip, const MemRank & i_rank ); * repairs have been used. * - Returns a new DsdEvent if DRAM sparing is available. * - * @param i_chip MBA or MCA chip. + * @param i_chip MBA, MCA, or OCMB chip. * @param i_rank Target rank. * @param io_sc The step code data struct. * @param o_dsdEvent A new DsdEvent if DRAM sparing is available. Otherwise, @@ -211,7 +211,7 @@ uint32_t applyRasPolicies( ExtensibleChip * i_chip, const MemRank & i_rank, * - Sets the DRAM in the DRAM Repair VPD if DRAM repairs. * - Adds a DSD procedure to the TD queue if a DRAM spare is available * - * @param i_chip MBA or MCA chip. + * @param i_chip MBA, MCA, or OCMB chip. * @param i_rank Target rank. * @param io_sc The step code data struct. * @return Non-SUCCESS if an internal function fails. SUCCESS otherwise. diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemRowRepair.C b/src/usr/diag/prdf/common/plat/mem/prdfMemRowRepair.C index 8ebe6cea8..3ff6cd099 100644 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemRowRepair.C +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemRowRepair.C @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2018 */ +/* Contributors Listed Below - COPYRIGHT 2018,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -126,6 +126,22 @@ uint32_t getRowRepairData<TYPE_MCA>( TargetHandle_t i_dimm, o_rowRepair ); } +template<> +uint32_t getRowRepairData<TYPE_MEM_PORT>( TargetHandle_t i_dimm, + const MemRank & i_rank, MemRowRepair & o_rowRepair ) +{ + return __getRowRepairData<TYPE_MEM_PORT, fapi2::TARGET_TYPE_MEM_PORT>( + i_dimm, i_rank, o_rowRepair ); +} + +template<> +uint32_t getRowRepairData<TYPE_OCMB_CHIP>( TargetHandle_t i_dimm, + const MemRank & i_rank, MemRowRepair & o_rowRepair ) +{ + return __getRowRepairData<TYPE_OCMB_CHIP, fapi2::TARGET_TYPE_OCMB_CHIP>( + i_dimm, i_rank, o_rowRepair ); +} + //------------------------------------------------------------------------------ template<TARGETING::TYPE T, fapi2::TargetType F> @@ -190,34 +206,19 @@ uint32_t setRowRepairData<TYPE_MCA>( TargetHandle_t i_dimm, i_rowRepair ); } -//------------------------------------------------------------------------------ - -template<TARGETING::TYPE T> -void __setRowRepairDataHelper( const MemAddr & i_addr, uint32_t & io_tmp ); - template<> -void __setRowRepairDataHelper<TYPE_MBA>( const MemAddr & i_addr, - uint32_t & io_tmp ) +uint32_t setRowRepairData<TYPE_OCMB_CHIP>( TargetHandle_t i_dimm, + const MemRank & i_rank, + const MemRowRepair & i_rowRepair ) { - #ifdef __HOSTBOOT_MODULE - - // Bank is stored as MBA "(DDR4): bg1-bg0,b1-b0 (4-bit)" in a MemAddr. - // bank group - 2 bits (bg1-bg0) - io_tmp = ( io_tmp << 2 ) | ( (i_addr.getBank() >> 2) & 0x03 ); - - // bank - 3 bits (b2-b0) - io_tmp = ( io_tmp << 3 ) | ( i_addr.getBank() & 0x03 ); - - // Row is stored as "MBA: r17-r0 (18-bit)" in a MemAddr. - // row - 18 bits (r17-r0) - io_tmp = ( io_tmp << 18 ) | ( i_addr.getRow() & 0x0003ffff ); - - #endif // __HOSTBOOT_MODULE + return __setRowRepairData<TYPE_OCMB_CHIP, fapi2::TARGET_TYPE_OCMB_CHIP>( + i_dimm, i_rank, i_rowRepair ); } -template<> -void __setRowRepairDataHelper<TYPE_MCA>( const MemAddr & i_addr, - uint32_t & io_tmp ) +//------------------------------------------------------------------------------ + +template<TARGETING::TYPE T> +void __setRowRepairDataHelper( const MemAddr & i_addr, uint32_t & io_tmp ) { #ifdef __HOSTBOOT_MODULE @@ -242,6 +243,32 @@ void __setRowRepairDataHelper<TYPE_MCA>( const MemAddr & i_addr, #endif // __HOSTBOOT_MODULE } +template +void __setRowRepairDataHelper<TYPE_MCA>( const MemAddr & i_addr, + uint32_t & io_tmp ); +template +void __setRowRepairDataHelper<TYPE_OCMB_CHIP>( const MemAddr & i_addr, + uint32_t & io_tmp ); + +template<> +void __setRowRepairDataHelper<TYPE_MBA>( const MemAddr & i_addr, + uint32_t & io_tmp ) +{ + #ifdef __HOSTBOOT_MODULE + + // Bank is stored as MBA "(DDR4): bg1-bg0,b1-b0 (4-bit)" in a MemAddr. + // bank group - 2 bits (bg1-bg0) + io_tmp = ( io_tmp << 2 ) | ( (i_addr.getBank() >> 2) & 0x03 ); + + // bank - 3 bits (b2-b0) + io_tmp = ( io_tmp << 3 ) | ( i_addr.getBank() & 0x03 ); + + // Row is stored as "MBA: r17-r0 (18-bit)" in a MemAddr. + // row - 18 bits (r17-r0) + io_tmp = ( io_tmp << 18 ) | ( i_addr.getRow() & 0x0003ffff ); + + #endif // __HOSTBOOT_MODULE +} //------------------------------------------------------------------------------ @@ -297,7 +324,7 @@ uint32_t setRowRepairData( TargetHandle_t i_dimm, MemRowRepair l_rowRepair( i_dimm, i_rank, l_data ); - o_rc = setRowRepairData<TYPE_MBA>( i_dimm, i_rank, l_rowRepair ); + o_rc = setRowRepairData<T>( i_dimm, i_rank, l_rowRepair ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "setRowRepairData() failed" ); @@ -323,6 +350,11 @@ uint32_t setRowRepairData<TYPE_MCA>( TargetHandle_t i_dimm, const MemRank & i_rank, const MemAddr & i_addr, uint8_t i_dram ); +template +uint32_t setRowRepairData<TYPE_OCMB_CHIP>( TargetHandle_t i_dimm, + const MemRank & i_rank, + const MemAddr & i_addr, + uint8_t i_dram ); //------------------------------------------------------------------------------ @@ -362,6 +394,9 @@ uint32_t clearRowRepairData<TYPE_MBA>( TargetHandle_t i_dimm, template uint32_t clearRowRepairData<TYPE_MCA>( TargetHandle_t i_dimm, const MemRank & i_rank ); +template +uint32_t clearRowRepairData<TYPE_OCMB_CHIP>( TargetHandle_t i_dimm, + const MemRank & i_rank ); //------------------------------------------------------------------------------ diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemSymbol.C b/src/usr/diag/prdf/common/plat/mem/prdfMemSymbol.C index 561c11dda..d58d6a177 100755 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemSymbol.C +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemSymbol.C @@ -53,7 +53,7 @@ MemSymbol::MemSymbol( TARGETING::TargetHandle_t i_trgt, const MemRank & i_rank, PRDF_ASSERT( nullptr != i_trgt ); PRDF_ASSERT( TYPE_MBA == getTargetType(i_trgt) || TYPE_MCA == getTargetType(i_trgt) || - TYPE_MEM_PORT == getTargetType(i_trgt) ); + TYPE_OCMB_CHIP == getTargetType(i_trgt) ); // Allowing an invalid symbol. Use isValid() to check validity. PRDF_ASSERT( i_pins <= CEN_SYMBOL::BOTH_SYMBOL_DQS ); } @@ -83,9 +83,9 @@ MemSymbol MemSymbol::fromGalois( TargetHandle_t i_trgt, const MemRank & i_rank, if ( 0 != (i_mask & 0xaa) ) pins |= EVEN_SYMBOL_DQ; if ( 0 != (i_mask & 0x55) ) pins |= ODD_SYMBOL_DQ; } - else if ( TYPE_MCA == trgtType || TYPE_MEM_PORT == trgtType ) + else if ( TYPE_MCA == trgtType || TYPE_OCMB_CHIP == trgtType ) { - // 1 pin for MCA/MEM_PORT. + // 1 pin for MCA/TYPE_OCMB_CHIP. if ( 0 != (i_mask & 0xff) ) pins |= ODD_SYMBOL_DQ; } else @@ -112,9 +112,9 @@ uint8_t MemSymbol::getDq() const { dq = symbol2Dq<TYPE_MCA>( iv_symbol ); } - else if ( TYPE_MEM_PORT == trgtType ) + else if ( TYPE_OCMB_CHIP == trgtType ) { - dq = symbol2Dq<TYPE_MEM_PORT>( iv_symbol ); + dq = symbol2Dq<TYPE_OCMB_CHIP>( iv_symbol ); } else { @@ -140,9 +140,9 @@ uint8_t MemSymbol::getPortSlct() const { portSlct = symbol2PortSlct<TYPE_MCA>( iv_symbol ); } - else if ( TYPE_MEM_PORT == trgtType ) + else if ( TYPE_OCMB_CHIP == trgtType ) { - portSlct = symbol2PortSlct<TYPE_MEM_PORT>( iv_symbol ); + portSlct = symbol2PortSlct<TYPE_OCMB_CHIP>( iv_symbol ); } else { @@ -159,22 +159,26 @@ uint8_t MemSymbol::getDram() const { uint8_t dram = 0; TYPE trgtType = getTargetType( iv_trgt ); - bool isX4 = isDramWidthX4( iv_trgt ); + bool isX4 = true; if ( TYPE_MBA == trgtType ) { + isX4 = isDramWidthX4( iv_trgt ); dram = isX4 ? symbol2Nibble<TYPE_MBA>( iv_symbol ) : symbol2Byte <TYPE_MBA>( iv_symbol ); } else if ( TYPE_MCA == trgtType ) { + isX4 = isDramWidthX4( iv_trgt ); dram = isX4 ? symbol2Nibble<TYPE_MCA>( iv_symbol ) : symbol2Byte <TYPE_MCA>( iv_symbol ); } - else if ( TYPE_MEM_PORT == trgtType ) + else if ( TYPE_OCMB_CHIP == trgtType ) { - dram = isX4 ? symbol2Nibble<TYPE_MEM_PORT>( iv_symbol ) - : symbol2Byte <TYPE_MEM_PORT>( iv_symbol ); + TargetHandle_t dimm = getConnectedDimm(iv_trgt, iv_rank, getPortSlct()); + isX4 = isDramWidthX4( dimm ); + dram = isX4 ? symbol2Nibble<TYPE_OCMB_CHIP>( iv_symbol ) + : symbol2Byte <TYPE_OCMB_CHIP>( iv_symbol ); } else { @@ -200,14 +204,24 @@ uint8_t MemSymbol::getDramRelCenDqs() const const uint8_t X4_DRAM_SPARE_UPPER = 19; const uint8_t X8_DRAM_SPARE = 9; + bool isX4 = true; + if ( TYPE_OCMB_CHIP == getTargetType(iv_trgt) ) + { + TargetHandle_t dimm = getConnectedDimm(iv_trgt, iv_rank, getPortSlct()); + isX4 = isDramWidthX4( dimm ); + } + else + { + isX4 = isDramWidthX4( iv_trgt ); + } - uint8_t l_dramWidth = ( isDramWidthX4(iv_trgt) ) ? 4 : 8; + uint8_t l_dramWidth = ( isX4 ) ? 4 : 8; uint8_t l_dram = getDq() / l_dramWidth; // (x8: 0-9, x4: 0-19) // Adjust for spares if ( isDramSpared() ) { - if ( isDramWidthX4(iv_trgt) ) + if ( isX4 ) { uint8_t l_bit = getDq() % DQS_PER_BYTE; l_dram = ( l_bit < 4 ) ? X4_DRAM_SPARE_LOWER : X4_DRAM_SPARE_UPPER; @@ -219,7 +233,7 @@ uint8_t MemSymbol::getDramRelCenDqs() const } else if ( isEccSpared() ) { - l_dram = ( isDramWidthX4(iv_trgt) ) ? X4_ECC_SPARE : X8_ECC_SPARE; + l_dram = ( isX4 ) ? X4_ECC_SPARE : X8_ECC_SPARE; } return l_dram; @@ -231,7 +245,16 @@ uint8_t MemSymbol::getDramRelCenDqs() const uint8_t MemSymbol::getDramPins() const { TYPE trgtType = getTargetType( iv_trgt ); - bool isX4 = isDramWidthX4( iv_trgt ); + bool isX4 = true; + if ( TYPE_OCMB_CHIP == trgtType ) + { + TargetHandle_t dimm = getConnectedDimm(iv_trgt, iv_rank, getPortSlct()); + isX4 = isDramWidthX4( dimm ); + } + else + { + isX4 = isDramWidthX4( iv_trgt ); + } uint32_t dps = 0; uint32_t spd = 0; @@ -241,7 +264,7 @@ uint8_t MemSymbol::getDramPins() const dps = MBA_DQS_PER_SYMBOL; spd = isX4 ? MBA_SYMBOLS_PER_NIBBLE : MBA_SYMBOLS_PER_BYTE; } - else if ( TYPE_MCA == trgtType || TYPE_MEM_PORT == trgtType ) + else if ( TYPE_MCA == trgtType || TYPE_OCMB_CHIP == trgtType ) { dps = MEM_DQS_PER_SYMBOL; spd = isX4 ? MEM_SYMBOLS_PER_NIBBLE : MEM_SYMBOLS_PER_BYTE; @@ -261,7 +284,16 @@ uint8_t MemSymbol::getDramSymbol() const { uint8_t dramSymbol = SYMBOLS_PER_RANK; TYPE trgtType = getTargetType( iv_trgt ); - bool isX4 = isDramWidthX4( iv_trgt ); + bool isX4 = true; + if ( TYPE_OCMB_CHIP == trgtType ) + { + TargetHandle_t dimm = getConnectedDimm(iv_trgt, iv_rank, getPortSlct()); + isX4 = isDramWidthX4( dimm ); + } + else + { + isX4 = isDramWidthX4( iv_trgt ); + } uint8_t dram = getDram(); if ( TYPE_MBA == trgtType ) @@ -274,10 +306,10 @@ uint8_t MemSymbol::getDramSymbol() const dramSymbol = isX4 ? nibble2Symbol<TYPE_MCA>( dram ) : byte2Symbol <TYPE_MCA>( dram ); } - else if ( TYPE_MEM_PORT == trgtType ) + else if ( TYPE_OCMB_CHIP == trgtType ) { - dramSymbol = isX4 ? nibble2Symbol<TYPE_MEM_PORT>( dram ) - : byte2Symbol <TYPE_MEM_PORT>( dram ); + dramSymbol = isX4 ? nibble2Symbol<TYPE_OCMB_CHIP>( dram ) + : byte2Symbol <TYPE_OCMB_CHIP>( dram ); } else { @@ -435,16 +467,16 @@ uint32_t getMemReadSymbol<TYPE_MBA>( ExtensibleChip * i_chip, //------------------------------------------------------------------------------ template<> -uint32_t getMemReadSymbol<TYPE_MEM_PORT>( ExtensibleChip * i_chip, - const MemRank & i_rank, - MemSymbol & o_sym1, - MemSymbol & o_sym2 ) +uint32_t getMemReadSymbol<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemRank & i_rank, + MemSymbol & o_sym1, + MemSymbol & o_sym2 ) { - #define PRDF_FUNC "[getMemReadSymbol<TYPE_MEM_PORT>] " + #define PRDF_FUNC "[getMemReadSymbol<TYPE_OCMB_CHIP>] " // Check parameters PRDF_ASSERT( nullptr != i_chip ); - PRDF_ASSERT( TYPE_MEM_PORT == i_chip->getType() ); + PRDF_ASSERT( TYPE_OCMB_CHIP == i_chip->getType() ); uint32_t o_rc = SUCCESS; @@ -453,14 +485,12 @@ uint32_t getMemReadSymbol<TYPE_MEM_PORT>( ExtensibleChip * i_chip, do { // Get the NCE/TCE galois and mask from hardware. - ExtensibleChip * ocmbChip = getConnectedParent(i_chip, TYPE_OCMB_CHIP); - - SCAN_COMM_REGISTER_CLASS * reg = ocmbChip->getRegister("MBSEVR0"); + SCAN_COMM_REGISTER_CLASS * reg = i_chip->getRegister("MBSEVR0"); o_rc = reg->Read(); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "Read() failed on MBSEVR0: " - "ocmbChip=0x%08x", ocmbChip->getHuid() ); + "i_chip=0x%08x", i_chip->getHuid() ); break; } @@ -480,8 +510,8 @@ uint32_t getMemReadSymbol<TYPE_MEM_PORT>( ExtensibleChip * i_chip, tceGalois, tceMask ); MemSymbol sp0, sp1, ecc; - o_rc = mssGetSteerMux<TYPE_MEM_PORT>( i_chip->getTrgt(), i_rank, - sp0, sp1, ecc ); + o_rc = mssGetSteerMux<TYPE_OCMB_CHIP>( i_chip->getTrgt(), i_rank, + sp0, sp1, ecc ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "mssGetSteerMux() failed. HUID: 0x%08x " diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemSymbol.H b/src/usr/diag/prdf/common/plat/mem/prdfMemSymbol.H index c16972fd8..00b0c7cfd 100755 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemSymbol.H +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemSymbol.H @@ -79,7 +79,7 @@ class MemSymbol /** * @brief Creates a MemSymbol from a symbol. - * @param i_trgt MBA, MCA, or MEM_PORT target. + * @param i_trgt MBA, MCA, or OCMB_CHIP target. * @param i_rank The rank this symbol is on. * @param i_symbol The input symbol. * @param i_pins See enum DqMask. @@ -95,7 +95,7 @@ class MemSymbol /** * @brief Creates a MemSymbol from a Galois field. - * @param i_trgt MBA, MCA, or MEM_PORT target. + * @param i_trgt MBA, MCA, or OCMB_CHIP target. * @param i_rank The rank this symbol is on. * @param i_galois The Galois field. * @param i_mask The bit mask. @@ -122,7 +122,7 @@ class MemSymbol MemRank getRank() const { return iv_rank; }; /** @return The port select for this symbol. Only relevant on MBA. Will - * always return 0 for MCA and MEM_PORT. */ + * always return 0 for MCA and OCMB. */ uint8_t getPortSlct() const; /** @return The DRAM index for this symbol. */ @@ -218,7 +218,7 @@ class MemSymbol /** * @brief Reads the memory NCE/TCE vector trap register from hardware. - * @param i_chip MCA, MBA, or MEM_PORT. + * @param i_chip MCA, MBA, or OCMB_CHIP. * @param i_rank The rank this symbol is on. * @param o_sym1 The first symbol. Should always be valid for both NCE/TCE. * @param o_sym2 The second symbol. Only valid for TCEs. diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemThresholds.C b/src/usr/diag/prdf/common/plat/mem/prdfMemThresholds.C index f6403f219..f9c73b739 100755 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemThresholds.C +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemThresholds.C @@ -173,7 +173,8 @@ void getMnfgMemCeTh( ExtensibleChip * i_chip, const MemRank & i_rank, else { // Get DRAM size - uint8_t size = MemUtils::getDramSize<T>( i_chip, i_rank.getDimmSlct() ); + uint8_t size = MemUtils::getDramSize<T>( i_chip->getTrgt(), + i_rank.getDimmSlct() ); // Get number of ranks per DIMM select. uint8_t rankCount = getNumRanksPerDimm<T>( i_chip->getTrgt(), @@ -209,7 +210,7 @@ void getMnfgMemCeTh<TYPE_MBA>( ExtensibleChip * i_chip, const MemRank & i_rank, uint32_t & o_cePerDimm ); template -void getMnfgMemCeTh<TYPE_MEM_PORT>( ExtensibleChip * i_chip, +void getMnfgMemCeTh<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, const MemRank & i_rank, uint32_t & o_cePerDram, uint32_t & o_cePerRank, uint32_t & o_cePerDimm ); @@ -236,14 +237,8 @@ uint32_t getScrubCeThreshold( ExtensibleChip * i_chip, const MemRank & i_rank ) // need these templates to avoid linker errors template -uint32_t getScrubCeThreshold<TYPE_MCA>( ExtensibleChip * i_chip, - const MemRank & i_rank ); -template uint32_t getScrubCeThreshold<TYPE_MBA>( ExtensibleChip * i_chip, const MemRank & i_rank ); -template -uint32_t getScrubCeThreshold<TYPE_MEM_PORT>( ExtensibleChip * i_chip, - const MemRank & i_rank ); } // end namespace PRDF diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemUtils.C b/src/usr/diag/prdf/common/plat/mem/prdfMemUtils.C index 744e55e69..64677f1ae 100755 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemUtils.C +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemUtils.C @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2013,2019 */ +/* Contributors Listed Below - COPYRIGHT 2013,2020 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -31,11 +31,14 @@ // Framework includes #include <iipServiceDataCollector.h> +#include <iipSystem.h> #include <prdfExtensibleChip.H> +#include <prdfGlobal_common.H> #include <UtilHash.H> // Platform includes #include <prdfCenMbaDataBundle.H> +#include <prdfOcmbDataBundle.H> #include <prdfCenMembufDataBundle.H> #include <prdfCenMembufExtraSig.H> #include <prdfMemSymbol.H> @@ -224,12 +227,12 @@ int32_t collectCeStats<TYPE_MCA>( ExtensibleChip * i_chip, //------------------------------------------------------------------------------ template<> -int32_t collectCeStats<TYPE_MEM_PORT>( ExtensibleChip * i_chip, - const MemRank & i_rank, - MaintSymbols & o_maintStats, - MemSymbol & o_chipMark, uint8_t i_thr ) +int32_t collectCeStats<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemRank & i_rank, + MaintSymbols & o_maintStats, + MemSymbol & o_chipMark, uint8_t i_thr ) { - #define PRDF_FUNC "[MemUtils::collectCeStats<TYPE_MEM_PORT>] " + #define PRDF_FUNC "[MemUtils::collectCeStats<TYPE_OCMB_CHIP>] " int32_t o_rc = SUCCESS; o_chipMark = MemSymbol(); // Initially invalid. @@ -238,10 +241,13 @@ int32_t collectCeStats<TYPE_MEM_PORT>( ExtensibleChip * i_chip, { PRDF_ASSERT( 0 != i_thr ); - TargetHandle_t memPortTrgt = i_chip->getTrgt(); - ExtensibleChip * ocmbChip = getConnectedParent(i_chip, TYPE_OCMB_CHIP); + TargetHandle_t ocmbTrgt = i_chip->getTrgt(); - const bool isX4 = isDramWidthX4(memPortTrgt); + // TODO RTC 210072 - support for multiple ports + TargetHandle_t memPortTrgt = getConnectedChild( ocmbTrgt, + TYPE_MEM_PORT, 0 ); + TargetHandle_t dimm = getConnectedDimm( memPortTrgt, i_rank ); + const bool isX4 = isDramWidthX4( dimm ); // Use this map to keep track of the total counts per DRAM. DramCountMap dramCounts; @@ -252,7 +258,7 @@ int32_t collectCeStats<TYPE_MEM_PORT>( ExtensibleChip * i_chip, for ( uint8_t regIdx = 0; regIdx < CE_REGS_PER_PORT; regIdx++ ) { reg_str = ocmbCeStatReg[regIdx]; - reg = ocmbChip->getRegister( reg_str ); + reg = i_chip->getRegister( reg_str ); o_rc = reg->Read(); if ( SUCCESS != o_rc ) @@ -272,8 +278,8 @@ int32_t collectCeStats<TYPE_MEM_PORT>( ExtensibleChip * i_chip, uint8_t sym = baseSymbol + i; PRDF_ASSERT( sym < SYMBOLS_PER_RANK ); - uint8_t dram = isX4 ? symbol2Nibble<TYPE_MEM_PORT>( sym ) - : symbol2Byte <TYPE_MEM_PORT>( sym ); + uint8_t dram = isX4 ? symbol2Nibble<TYPE_OCMB_CHIP>( sym ) + : symbol2Byte <TYPE_OCMB_CHIP>( sym ); // Keep track of the total DRAM counts. dramCounts[dram].totalCount += count; @@ -286,7 +292,7 @@ int32_t collectCeStats<TYPE_MEM_PORT>( ExtensibleChip * i_chip, dramCounts[dram].symbolCount++; SymbolData symData; - symData.symbol = MemSymbol::fromSymbol( memPortTrgt, i_rank, + symData.symbol = MemSymbol::fromSymbol( ocmbTrgt, i_rank, sym, CEN_SYMBOL::ODD_SYMBOL_DQ ); if ( !symData.symbol.isValid() ) { @@ -329,11 +335,11 @@ int32_t collectCeStats<TYPE_MEM_PORT>( ExtensibleChip * i_chip, if ( 0 != highestCount ) { - uint8_t sym = isX4 ? nibble2Symbol<TYPE_MEM_PORT>( highestDram ) - : byte2Symbol <TYPE_MEM_PORT>( highestDram ); + uint8_t sym = isX4 ? nibble2Symbol<TYPE_OCMB_CHIP>( highestDram ) + : byte2Symbol <TYPE_OCMB_CHIP>( highestDram ); PRDF_ASSERT( sym < SYMBOLS_PER_RANK ); - o_chipMark = MemSymbol::fromSymbol( memPortTrgt, i_rank, sym ); + o_chipMark = MemSymbol::fromSymbol( ocmbTrgt, i_rank, sym ); } } while(0); @@ -514,19 +520,18 @@ int32_t collectCeStats<TYPE_MBA>( ExtensibleChip * i_chip, //------------------------------------------------------------------------------ template<> -uint8_t getDramSize<TYPE_MCA>(ExtensibleChip *i_chip, uint8_t i_dimmSlct) +uint8_t getDramSize<TYPE_MCA>( TargetHandle_t i_trgt, uint8_t i_dimmSlct ) { #define PRDF_FUNC "[MemUtils::getDramSize] " - PRDF_ASSERT( TYPE_MCA == i_chip->getType() ); + PRDF_ASSERT( TYPE_MCA == getTargetType(i_trgt) ); PRDF_ASSERT( i_dimmSlct < DIMM_SLCT_PER_PORT ); - TargetHandle_t mcaTrgt = i_chip->getTrgt(); - TargetHandle_t mcsTrgt = getConnectedParent( mcaTrgt, TYPE_MCS ); + TargetHandle_t mcsTrgt = getConnectedParent( i_trgt, TYPE_MCS ); PRDF_ASSERT( nullptr != mcsTrgt ); - uint8_t mcaRelPos = i_chip->getPos() % MAX_MCA_PER_MCS; + uint8_t mcaRelPos = getTargetPosition(i_trgt) % MAX_MCA_PER_MCS; uint8_t tmp[MAX_MCA_PER_MCS][DIMM_SLCT_PER_PORT]; @@ -542,19 +547,22 @@ uint8_t getDramSize<TYPE_MCA>(ExtensibleChip *i_chip, uint8_t i_dimmSlct) } template<> -uint8_t getDramSize<TYPE_MBA>(ExtensibleChip *i_chip, uint8_t i_dimmSlct) +uint8_t getDramSize<TYPE_MBA>( TargetHandle_t i_trgt, uint8_t i_dimmSlct ) { #define PRDF_FUNC "[MemUtils::getDramSize] " - PRDF_ASSERT( TYPE_MBA == i_chip->getType() ); + PRDF_ASSERT( TYPE_MBA == getTargetType(i_trgt) ); uint8_t o_size = 0; do { - ExtensibleChip * membufChip = getConnectedParent(i_chip, TYPE_MEMBUF); + TargetHandle_t membuf = getConnectedParent(i_trgt, TYPE_MEMBUF); + ExtensibleChip * membufChip = + (ExtensibleChip*)systemPtr->GetChip(membuf); + PRDF_ASSERT( nullptr != membufChip ); - uint32_t pos = i_chip->getPos(); + uint32_t pos = getTargetPosition(i_trgt); const char * reg_str = (0 == pos) ? "MBA0_MBAXCR" : "MBA1_MBAXCR"; SCAN_COMM_REGISTER_CLASS * reg = membufChip->getRegister( reg_str ); @@ -562,7 +570,7 @@ uint8_t getDramSize<TYPE_MBA>(ExtensibleChip *i_chip, uint8_t i_dimmSlct) if ( SUCCESS != rc ) { PRDF_ERR( PRDF_FUNC "Read() failed on %s. Target=0x%08x", - reg_str, i_chip->getHuid() ); + reg_str, getHuid(i_trgt) ); break; } @@ -579,18 +587,16 @@ uint8_t getDramSize<TYPE_MBA>(ExtensibleChip *i_chip, uint8_t i_dimmSlct) } template<> -uint8_t getDramSize<TYPE_MEM_PORT>(ExtensibleChip *i_chip, uint8_t i_dimmSlct) +uint8_t getDramSize<TYPE_MEM_PORT>( TargetHandle_t i_trgt, uint8_t i_dimmSlct ) { #define PRDF_FUNC "[MemUtils::getDramSize] " - PRDF_ASSERT( TYPE_MEM_PORT == i_chip->getType() ); + PRDF_ASSERT( TYPE_MEM_PORT == getTargetType(i_trgt) ); PRDF_ASSERT( i_dimmSlct < DIMM_SLCT_PER_PORT ); - TargetHandle_t memPortTrgt = i_chip->getTrgt(); - uint8_t tmp[DIMM_SLCT_PER_PORT]; - if ( !memPortTrgt->tryGetAttr<TARGETING::ATTR_MEM_EFF_DRAM_DENSITY>(tmp) ) + if ( !i_trgt->tryGetAttr<TARGETING::ATTR_MEM_EFF_DRAM_DENSITY>(tmp) ) { PRDF_ERR( PRDF_FUNC "Failed to get ATTR_MEM_EFF_DRAM_DENSITY" ); PRDF_ASSERT( false ); @@ -601,6 +607,25 @@ uint8_t getDramSize<TYPE_MEM_PORT>(ExtensibleChip *i_chip, uint8_t i_dimmSlct) #undef PRDF_FUNC } +template<> +uint8_t getDramSize<TYPE_OCMB_CHIP>( TargetHandle_t i_trgt, uint8_t i_dimmSlct ) +{ + #define PRDF_FUNC "[MemUtils::getDramSize] " + + PRDF_ASSERT( TYPE_OCMB_CHIP == getTargetType(i_trgt) ); + PRDF_ASSERT( i_dimmSlct < DIMM_SLCT_PER_PORT ); + + // TODO RTC 210072 - Explorer only has one port, however, multiple ports + // will be supported in the future. Updates will need to be made here so we + // can get the relevant port. + + TargetHandle_t memPort = getConnectedChild( i_trgt, TYPE_MEM_PORT, 0 ); + + return getDramSize<TYPE_MEM_PORT>( memPort, i_dimmSlct ); + + #undef PRDF_FUNC +} + //------------------------------------------------------------------------------ template<> @@ -639,6 +664,34 @@ void cleanupChnlAttns<TYPE_MEMBUF>( ExtensibleChip * i_chip, #undef PRDF_FUNC } +template<> +void cleanupChnlAttns<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + #define PRDF_FUNC "[MemUtils::cleanupChnlAttns] " + + PRDF_ASSERT( nullptr != i_chip ); + PRDF_ASSERT( TYPE_OCMB_CHIP == i_chip->getType() ); + + // No cleanup if this is a checkstop attention. + if ( CHECK_STOP == io_sc.service_data->getPrimaryAttnType() ) return; + + #ifdef __HOSTBOOT_MODULE // only do cleanup in Hostboot, no-op in FSP + + // Clear the associated FIR bits for all attention types. DSTLFIR[0:7] + ExtensibleChip * mcc = getConnectedParent( i_chip, TYPE_MCC ); + + SCAN_COMM_REGISTER_CLASS * reg = mcc->getRegister( "DSTLFIR_AND" ); + + reg->setAllBits(); + reg->SetBitFieldJustified( 0, 8, 0 ); + reg->Write(); + + #endif // Hostboot only + + #undef PRDF_FUNC +} + //------------------------------------------------------------------------------ template<TARGETING::TYPE T> @@ -1288,6 +1341,361 @@ bool analyzeChnlFail<TYPE_MC>( ExtensibleChip * i_chip, //------------------------------------------------------------------------------ +bool __queryUcsOmic( ExtensibleChip * i_omic, ExtensibleChip * i_mcc, + TargetHandle_t i_omi ) +{ + PRDF_ASSERT( nullptr != i_omic ); + PRDF_ASSERT( nullptr != i_mcc ); + PRDF_ASSERT( nullptr != i_omi ); + PRDF_ASSERT( TYPE_OMIC == i_omic->getType() ); + PRDF_ASSERT( TYPE_MCC == i_mcc->getType() ); + PRDF_ASSERT( TYPE_OMI == getTargetType(i_omi) ); + + bool o_activeAttn = false; + + do + { + // Get the DSTLCFG2 register to check whether channel fail is enabled + // NOTE: DSTLCFG2[22] = 0b0 to enable chnl fail for subchannel A + // NOTE: DSTLCFG2[23] = 0b0 to enable chnl fail for subchannel B + SCAN_COMM_REGISTER_CLASS * cnfg = i_mcc->getRegister( "DSTLCFG2" ); + + // Get the position of the inputted OMI relative to the parent MCC (0-1) + // to determine which channel we need to check. + uint8_t omiPosRelMcc = getTargetPosition(i_omi) % MAX_OMI_PER_MCC; + + // If channel fail isn't configured, no need to continue. + if ( cnfg->IsBitSet(22 + omiPosRelMcc) ) break; + + // Check the OMIDLFIR for UCS (relevant bits: 0,20,40) + SCAN_COMM_REGISTER_CLASS * fir = i_omic->getRegister("OMIDLFIR"); + SCAN_COMM_REGISTER_CLASS * mask = i_omic->getRegister("OMIDLFIR_MASK"); + SCAN_COMM_REGISTER_CLASS * act0 = i_omic->getRegister("OMIDLFIR_ACT0"); + SCAN_COMM_REGISTER_CLASS * act1 = i_omic->getRegister("OMIDLFIR_ACT1"); + + if ( SUCCESS == ( fir->Read() | mask->Read() | + act0->Read() | act1->Read() ) ) + { + // Get the position of the inputted OMI relative to the parent + // OMIC (0-2). We'll need to use ATTR_OMI_DL_GROUP_POS for this. + uint8_t omiPosRelOmic = i_omi->getAttr<ATTR_OMI_DL_GROUP_POS>(); + + // Get the bit offset for the bit relevant to the inputted OMI. + // 0 : OMI-DL 0 + // 20: OMI-DL 1 + // 40: OMI-DL 2 + uint8_t bitOff = omiPosRelOmic * 20; + + // Check if there is a UNIT_CS for the relevant bits in the OMIDLFIR + // Note: The OMIDLFIR can't actually be set up to report UNIT_CS + // attentions, instead, as a workaround, the relevant channel fail + // bits will be set as recoverable bits and we will manually set + // the attention types to UNIT_CS in our handling of those errors. + if ( fir->IsBitSet(bitOff) && !mask->IsBitSet(bitOff) && + !act0->IsBitSet(bitOff) && act1->IsBitSet(bitOff) ) + { + o_activeAttn = true; + } + } + }while(0); + + return o_activeAttn; +} + +bool __queryUcsMcc( ExtensibleChip * i_mcc, TargetHandle_t i_omi ) +{ + PRDF_ASSERT( nullptr != i_mcc ); + PRDF_ASSERT( nullptr != i_omi ); + PRDF_ASSERT( TYPE_MCC == i_mcc->getType() ); + PRDF_ASSERT( TYPE_OMI == getTargetType(i_omi) ); + + bool o_activeAttn = false; + + // Get the position of the inputted OMI relative to the parent MCC (0-1) + // to determine which channel we need to check. + uint8_t omiPos = getTargetPosition(i_omi) % MAX_OMI_PER_MCC; + + // Maps of the DSTLFIR UCS bits to their relevant channel fail + // configuration bit in DSTLCFG2. Ex: {12,28} = DSTLFIR[12], DSTLCFG2[28] + // NOTE: there is a separate map for each subchannel. + const std::map<uint8_t,uint8_t> dstlfirMapChanA = + { {12,28}, {16,30}, {22,24} }; + + const std::map<uint8_t,uint8_t> dstlfirMapChanB = + { {13,29}, {17,31}, {23,25} }; + + // Check the DSTLFIR for UCS + SCAN_COMM_REGISTER_CLASS * fir = i_mcc->getRegister( "DSTLFIR" ); + SCAN_COMM_REGISTER_CLASS * mask = i_mcc->getRegister( "DSTLFIR_MASK" ); + SCAN_COMM_REGISTER_CLASS * act0 = i_mcc->getRegister( "DSTLFIR_ACT0" ); + SCAN_COMM_REGISTER_CLASS * act1 = i_mcc->getRegister( "DSTLFIR_ACT1" ); + SCAN_COMM_REGISTER_CLASS * cnfg = i_mcc->getRegister( "DSTLCFG2" ); + + if ( SUCCESS == (fir->Read() | mask->Read() | act0->Read() | act1->Read() | + cnfg->Read()) ) + { + // Get which relevant channel we need to check. + std::map<uint8_t,uint8_t> dstlfirMap; + dstlfirMap = (0 == omiPos) ? dstlfirMapChanA : dstlfirMapChanB; + + for ( auto const & bits : dstlfirMap ) + { + uint8_t firBit = bits.first; + uint8_t cnfgBit = bits.second; + + // NOTE: Channel fail is enabled if the config bit is set to 0b0 + if ( !cnfg->IsBitSet(cnfgBit) && fir->IsBitSet(firBit) && + !mask->IsBitSet(firBit) && act0->IsBitSet(firBit) && + act1->IsBitSet(firBit) ) + { + o_activeAttn = true; + } + } + } + + // Maps of the USTLFIR UCS bits to their relevant channel fail + // config bit in USTLFAILMASK. Ex: {0,54} = USTLFIR[0], USTLFAILMASK[54] + // NOTE: there is a separate map for each subchannel. + const std::map<uint8_t,uint8_t> ustlfirMapChanA = + { { 0,54}, { 2,48}, {27,56}, {35,49}, {37,50}, {39,51}, {41,52}, {43,53}, + {49,55}, {51,50}, {53,50}, {55,48}, {59,56} }; + const std::map<uint8_t,uint8_t> ustlfirMapChanB = + { { 1,54}, { 3,48}, {28,56}, {36,49}, {38,50}, {40,51}, {42,52}, {44,53}, + {50,55}, {52,50}, {54,50}, {56,48}, {60,56} }; + + // Check the USTLFIR for UCS + fir = i_mcc->getRegister( "USTLFIR" ); + mask = i_mcc->getRegister( "USTLFIR_MASK" ); + act0 = i_mcc->getRegister( "USTLFIR_ACT0" ); + act1 = i_mcc->getRegister( "USTLFIR_ACT1" ); + cnfg = i_mcc->getRegister( "USTLFAILMASK" ); + + if ( SUCCESS == (fir->Read() | mask->Read() | act0->Read() | act1->Read() | + cnfg->Read()) ) + { + // Get which relevant channel we need to check. + std::map<uint8_t,uint8_t> ustlfirMap; + ustlfirMap = (0 == omiPos) ? ustlfirMapChanA : ustlfirMapChanB; + + for ( auto const & bits : ustlfirMap ) + { + uint8_t firBit = bits.first; + uint8_t cnfgBit = bits.second; + + // NOTE: Channel fail is enabled if the config bit is set to 0b0 + if ( !cnfg->IsBitSet(cnfgBit) && fir->IsBitSet(firBit) && + !mask->IsBitSet(firBit) && act0->IsBitSet(firBit) && + act1->IsBitSet(firBit) ) + { + o_activeAttn = true; + } + } + } + + return o_activeAttn; +} + +bool __queryUcsOcmb( ExtensibleChip * i_ocmb ) +{ + PRDF_ASSERT( nullptr != i_ocmb ); + PRDF_ASSERT( TYPE_OCMB_CHIP == i_ocmb->getType() ); + + bool o_activeAttn = false; + + // We can't use the GLOBAL_CS_FIR. It will not clear automatically when a + // channel has failed because the hardware clocks have stopped. Also, since + // it is a virtual register there really is no way to clear it. Fortunately + // we have the INTER_STATUS_REG that will tell us if there is an active + // attention. Note that we clear this register as part of the channel + // failure cleanup. So we can rely on this register to determine if there is + // a new channel failure. + + SCAN_COMM_REGISTER_CLASS * fir = i_ocmb->getRegister("INTER_STATUS_REG"); + + if ( SUCCESS == fir->Read() ) + { + o_activeAttn = fir->IsBitSet(2); // Checkstop bit. + } + + return o_activeAttn; +} + +//------------------------------------------------------------------------------ + +template<TARGETING::TYPE T> +bool __analyzeChnlFail( TargetHandle_t i_trgt, + STEP_CODE_DATA_STRUCT & io_sc ); + +template<> +bool __analyzeChnlFail<TYPE_OMI>( TargetHandle_t i_omi, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + #define PRDF_FUNC "[MemUtils::__analyzeChnlFail<TYPE_OMI>] " + + PRDF_ASSERT( nullptr != i_omi ); + PRDF_ASSERT( TYPE_OMI == getTargetType(i_omi) ); + + uint32_t o_analyzed = false; + + do + { + // Skip if currently analyzing a host attention. This is a required for + // a rare scenario when a channel failure occurs after PRD is called to + // handle the host attention. + if ( HOST_ATTN == io_sc.service_data->getPrimaryAttnType() ) break; + + // Get the needed ExtensibleChips for analysis + TargetHandle_t ocmb = getConnectedChild( i_omi, TYPE_OCMB_CHIP, 0 ); + ExtensibleChip * ocmbChip = (ExtensibleChip *)systemPtr->GetChip(ocmb); + + TargetHandle_t omic = getConnectedParent( i_omi, TYPE_OMIC ); + ExtensibleChip * omicChip = (ExtensibleChip *)systemPtr->GetChip(omic); + + TargetHandle_t mcc = getConnectedParent( i_omi, TYPE_MCC ); + ExtensibleChip * mccChip = (ExtensibleChip *)systemPtr->GetChip(mcc); + + // Do an initial query for channel fail attentions from the targets. + // This is to check whether we actually have an active channel fail + // attention before checking whether it is a side effect of some + // recoverable attention or not. + if ( !__queryUcsOmic(omicChip, mccChip, i_omi) && + !__queryUcsMcc(mccChip, i_omi) && + !__queryUcsOcmb(ocmbChip) ) + { + // If no channel fail attentions found, just break out. + break; + } + + // There was a channel fail found, so take the following actions. + + // Set the MEM_CHNL_FAIL flag in the SDC to indicate a channel failure + // has been detected and there is no need to check again. + io_sc.service_data->setMemChnlFail(); + + // Make the error log predictive and set threshold. + io_sc.service_data->setFlag( ServiceDataCollector::SERVICE_CALL ); + io_sc.service_data->setFlag( ServiceDataCollector::AT_THRESHOLD ); + + // Channel failures will always send SUEs. + io_sc.service_data->setFlag( ServiceDataCollector::UERE ); + + // Indicate cleanup is required on this channel. + getOcmbDataBundle(ocmbChip)->iv_doChnlFailCleanup = true; + + // Check for recoverable attentions that could have a channel failure + // as a side effect. These include: N/A + // TODO RTC 243518 -requires more input from the test team to determine + + // Check OMIC for unit checkstops + if ( __queryUcsOmic( omicChip, mccChip, i_omi ) ) + { + // Analyze UNIT_CS on the OMIC chip + // Note: The OMIDLFIR can't actually be set up to report UNIT_CS + // attentions, instead, as a workaround, the relevant channel fail + // bits will be set as recoverable bits and we will manually set + // the attention types to UNIT_CS in our handling of those errors. + if ( SUCCESS == omicChip->Analyze(io_sc, RECOVERABLE) ) + { + o_analyzed = true; + break; + } + } + + // Check MCC for unit checkstops + if ( __queryUcsMcc( mccChip, i_omi ) ) + { + // Analyze UNIT_CS on the MCC chip + if ( SUCCESS == mccChip->Analyze(io_sc, UNIT_CS) ) + { + o_analyzed = true; + break; + } + } + + // Check OCMB for unit checkstops + if ( __queryUcsOcmb( ocmbChip ) ) + { + // Analyze UNIT_CS on the OCMB chip + if ( SUCCESS == ocmbChip->Analyze(io_sc, UNIT_CS) ) + { + o_analyzed = true; + break; + } + + } + PRDF_INF( PRDF_FUNC "Failed channel detected on 0x%08x, but no active " + "attentions found", getHuid(i_omi) ); + }while(0); + + return o_analyzed; + + #undef PRDF_FUNC +} + +template<> +bool analyzeChnlFail<TYPE_MCC>( ExtensibleChip * i_mcc, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + PRDF_ASSERT( nullptr != i_mcc ); + PRDF_ASSERT( TYPE_MCC == i_mcc->getType() ); + + uint32_t o_analyzed = false; + + if ( !io_sc.service_data->isMemChnlFail() ) + { + // Loop through all the connected OMIs + for ( auto & omi : getConnected(i_mcc->getTrgt(), TYPE_OMI) ) + { + o_analyzed = __analyzeChnlFail<TYPE_OMI>( omi, io_sc ); + if ( o_analyzed ) break; + } + } + + return o_analyzed; +} + +template<> +bool analyzeChnlFail<TYPE_OMIC>( ExtensibleChip * i_omic, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + PRDF_ASSERT( nullptr != i_omic ); + PRDF_ASSERT( TYPE_OMIC == i_omic->getType() ); + + uint32_t o_analyzed = false; + + if ( !io_sc.service_data->isMemChnlFail() ) + { + // Loop through all the connected OMIs + for ( auto & omi : getConnected(i_omic->getTrgt(), TYPE_OMI) ) + { + o_analyzed = __analyzeChnlFail<TYPE_OMI>( omi, io_sc ); + if ( o_analyzed ) break; + } + } + + return o_analyzed; +} + +template<> +bool analyzeChnlFail<TYPE_OCMB_CHIP>( ExtensibleChip * i_ocmb, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + PRDF_ASSERT( nullptr != i_ocmb ); + PRDF_ASSERT( TYPE_OCMB_CHIP == i_ocmb->getType() ); + + uint32_t o_analyzed = false; + + if ( !io_sc.service_data->isMemChnlFail() ) + { + TargetHandle_t omi = getConnectedParent( i_ocmb->getTrgt(), TYPE_OMI ); + o_analyzed = __analyzeChnlFail<TYPE_OMI>( omi, io_sc ); + } + + return o_analyzed; +} + +//------------------------------------------------------------------------------ + template<TARGETING::TYPE T1, TARGETING::TYPE T2, TARGETING::TYPE T3> void __cleanupChnlFail( ExtensibleChip * i_chip1, ExtensibleChip * i_chip2, ExtensibleChip * i_chip3, @@ -1415,6 +1823,158 @@ void cleanupChnlFail<TYPE_MEMBUF>( ExtensibleChip * i_chip, cleanupChnlFail<TYPE_DMI>( dmiChip, io_sc ); } +template<TARGETING::TYPE T> +void __cleanupChnlFail( TargetHandle_t i_trgt, STEP_CODE_DATA_STRUCT & io_sc ); + +template<> +void __cleanupChnlFail<TYPE_OMI>( TargetHandle_t i_omi, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + #define PRDF_FUNC "[MemUtils::__cleanupChnlFail] " + + PRDF_ASSERT( nullptr != i_omi ); + PRDF_ASSERT( TYPE_OMI == getTargetType(i_omi) ); + + do + { + // No cleanup if this is a checkstop attention. + if ( CHECK_STOP == io_sc.service_data->getPrimaryAttnType() ) break; + + TargetHandle_t ocmb = getConnectedChild(i_omi, TYPE_OCMB_CHIP, 0); + ExtensibleChip * ocmbChip = (ExtensibleChip *)systemPtr->GetChip(ocmb); + + // Check if cleanup is still required or has already been done. + if ( !getOcmbDataBundle(ocmbChip)->iv_doChnlFailCleanup ) break; + + // Cleanup is complete and no longer required on this channel. + getOcmbDataBundle(ocmbChip)->iv_doChnlFailCleanup = false; + + #ifdef __HOSTBOOT_MODULE // only do cleanup in Hostboot, no-op in FSP + + TargetHandle_t omic = getConnectedParent( i_omi, TYPE_OMIC ); + ExtensibleChip * omicChip = (ExtensibleChip *)systemPtr->GetChip(omic); + + TargetHandle_t mcc = getConnectedParent( i_omi, TYPE_MCC ); + ExtensibleChip * mccChip = (ExtensibleChip *)systemPtr->GetChip(mcc); + + // Get the OMI position relative to the OMIC (0,1,2) and the MCC (0,1) + uint8_t omiPosRelOmic = i_omi->getAttr<ATTR_OMI_DL_GROUP_POS>(); + uint8_t omiPosRelMcc = getTargetPosition(i_omi) % MAX_OMI_PER_MCC; + + // Note that this is a clean up function. If there are any SCOM errors + // we will just move on and try the rest. + SCAN_COMM_REGISTER_CLASS * reg = nullptr; + + // Mask off attentions from the OMIDLFIR in the OMIC based on the + // OMI position. 0-19, 20-39, 40-59 + reg = omicChip->getRegister( "OMIDLFIR_MASK_OR" ); + reg->SetBitFieldJustified( (omiPosRelOmic * 20), 20, 0xfffff ); + reg->Write(); + + // Mask off attentions from the DSTLFIR and USTLFIR in the MCC based on + // the OMI position. + // DSTLFIR Generic Bits: 8,9,10,11,24,25,26,27 + uint64_t mask = 0x00f000f000000000ull; + if ( 0 == omiPosRelMcc ) + { + // DSTLFIR Subchannel A Bits: 0,1,2,3,12,14,16,18,20,22 + mask |= 0xf00aaa0000000000ull; + } + else + { + // DSTLFIR Subchannel B Bits: 4,5,6,7,13,15,17,19,21,23 + mask |= 0x0f05550000000000ull; + } + reg = mccChip->getRegister( "DSTLFIR_MASK_OR" ); + reg->SetBitFieldJustified( 0, 64, mask ); + reg->Write(); + + // USTLFIR Generic Bits: 6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21, + // 22,23,24,25,26,57,58,61,62,63 + mask = 0x03ffffe000000067ull; + if ( 0 == omiPosRelMcc ) + { + // USTLFIR Subchannel A Bits: 0,2,4,27,29,31,33,35,37,39,41,43,45, + // 47,49,51,53,55,59 + mask |= 0xa800001555555510ull; + } + else + { + // USTLFIR Subchannel B Bits: 1,3,5,28,30,32,34,36,38,40,42,44,46, + // 48,50,52,54,56,60 + mask |= 0x5400000aaaaaaa88ull; + } + reg = mccChip->getRegister( "USTLFIR_MASK_OR" ); + reg->SetBitFieldJustified( 0, 64, mask ); + reg->Write(); + + // Mask off all attentions from the chiplet FIRs in the OCMB + reg = ocmbChip->getRegister( "OCMB_CHIPLET_FIR_MASK" ); + reg->setAllBits(); // Blindly mask everything + reg->Write(); + + + // To ensure FSP ATTN doesn't think there is an active attention on this + // OCMB, manually clear the interrupt status register. + reg = ocmbChip->getRegister( "INTER_STATUS_REG" ); + reg->clearAllBits(); // Blindly clear everything + reg->Write(); + + // During runtime, send a dynamic memory deallocation message. + // During Memory Diagnostics, tell MDIA to stop pattern tests. + #ifdef __HOSTBOOT_RUNTIME + MemDealloc::port<TYPE_OCMB_CHIP>( ocmbChip ); + #else + if ( isInMdiaMode() ) + { + mdiaSendEventMsg( ocmb, MDIA::STOP_TESTING ); + } + #endif + + #endif // Hostboot only + + }while(0); + + #undef PRDF_FUNC +} + +template<> +void cleanupChnlFail<TYPE_MCC>( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + PRDF_ASSERT( nullptr != i_chip ); + PRDF_ASSERT( TYPE_MCC == i_chip->getType() ); + + for ( auto & omi : getConnected(i_chip->getTrgt(), TYPE_OMI) ) + { + __cleanupChnlFail<TYPE_OMI>( omi, io_sc ); + } +} + +template<> +void cleanupChnlFail<TYPE_OMIC>( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + PRDF_ASSERT( nullptr != i_chip ); + PRDF_ASSERT( TYPE_OMIC == i_chip->getType() ); + + for ( auto & omi : getConnected(i_chip->getTrgt(), TYPE_OMI) ) + { + __cleanupChnlFail<TYPE_OMI>( omi, io_sc ); + } +} + +template<> +void cleanupChnlFail<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + PRDF_ASSERT( nullptr != i_chip ); + PRDF_ASSERT( TYPE_OCMB_CHIP == i_chip->getType() ); + + TargetHandle_t omi = getConnectedParent( i_chip->getTrgt(), TYPE_OMI ); + __cleanupChnlFail<TYPE_OMI>( omi, io_sc ); +} + //------------------------------------------------------------------------------ uint64_t reverseBits( uint64_t i_val, uint64_t i_numBits ) diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemUtils.H b/src/usr/diag/prdf/common/plat/mem/prdfMemUtils.H index 9759cd010..39a6051fe 100755 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemUtils.H +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemUtils.H @@ -102,12 +102,12 @@ int32_t collectCeStats( ExtensibleChip * i_chip, const MemRank & i_rank, /** * @brief Gets DRAM size for an MBA, MCA, or MEM_PORT. - * @param i_chip MBA, MCA, or MEM_PORT chip. + * @param i_trgt MBA, MCA, or MEM_PORT target. * @param i_dimmSlct DIMM select. Optional for MBA chip. * @return size for a DRAM */ template<TARGETING::TYPE T> -uint8_t getDramSize( ExtensibleChip * i_chip, uint8_t i_dimmSlct = 0 ); +uint8_t getDramSize( TARGETING::TargetHandle_t i_trgt, uint8_t i_dimmSlct = 0 ); /** * @brief determines the type of Centaur based raw card associated with MBA. diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemoryMru.C b/src/usr/diag/prdf/common/plat/mem/prdfMemoryMru.C index bb911847e..4cd596514 100755 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemoryMru.C +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemoryMru.C @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2013,2018 */ +/* Contributors Listed Below - COPYRIGHT 2013,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -70,42 +70,78 @@ MemoryMru::MemoryMru( uint32_t i_memMru ) : PRDF_ASSERT( false ); } - // If our target is MBA, get the chnlPos from the membuf - if ( 0 == iv_memMruMeld.s.isMca ) + // If our target is MCA + if ( 1 == iv_memMruMeld.s.isMca ) { - TargetHandle_t membuf = getConnectedChild( proc, TYPE_MEMBUF, + iv_target = getConnectedChild( proc, TYPE_MCA, iv_memMruMeld.s.chnlPos ); - if ( NULL == membuf ) + if ( NULL == iv_target ) { - PRDF_ERR( PRDF_FUNC "Could not find functional membuf " + PRDF_ERR( PRDF_FUNC "Could not find functional mca " "attached to proc 0x%08X at pos: %u", getHuid( proc ), iv_memMruMeld.s.chnlPos ); PRDF_ASSERT( false ); } + } + // If our target is OCMB + else if ( 1 == iv_memMruMeld.s.isOcmb ) + { + // chnlPos specifies the position of the MCC relative to the proc + TargetHandle_t mcc = getConnectedChild( proc, TYPE_MCC, + iv_memMruMeld.s.chnlPos ); + if ( nullptr == mcc ) + { + PRDF_ERR( PRDF_FUNC "Could not find functional mcc attached to " + "proc 0x%08x at pos: %u", getHuid(proc), + iv_memMruMeld.s.chnlPos ); + PRDF_ASSERT( false ); + } - iv_target = getConnectedChild( membuf, TYPE_MBA, - iv_memMruMeld.s.mbaPos ); - if ( NULL == iv_target ) + // mbaPos specifies the position of the OMI relative to the MCC + TargetHandle_t omi = getConnectedChild( mcc, TYPE_OMI, + iv_memMruMeld.s.mbaPos ); + if ( nullptr == omi ) { - PRDF_ERR( PRDF_FUNC "Could not find functional mba attached " - "to 0x%08X at pos: %u", getHuid( membuf ), - iv_memMruMeld.s.mbaPos ); + PRDF_ERR( PRDF_FUNC "Could not find functional omi attached to " + "mcc 0x%08x at pos: %u", getHuid(mcc), + iv_memMruMeld.s.mbaPos ); + PRDF_ASSERT( false ); + } + + // There is only one OCMB attached per OMI + iv_target = getConnectedChild( omi, TYPE_OCMB_CHIP, 0 ); + if ( nullptr == iv_target ) + { + PRDF_ERR( PRDF_FUNC "Could not find functional ocmb attached to " + "omi 0x%08x", getHuid(mcc) ); PRDF_ASSERT( false ); } + + } + // If our target is MBA, get the chnlPos from the membuf else { - iv_target = getConnectedChild( proc, TYPE_MCA, + TargetHandle_t membuf = getConnectedChild( proc, TYPE_MEMBUF, iv_memMruMeld.s.chnlPos ); - if ( NULL == iv_target ) + if ( nullptr == membuf ) { - PRDF_ERR( PRDF_FUNC "Could not find functional mca " + PRDF_ERR( PRDF_FUNC "Could not find functional membuf " "attached to proc 0x%08X at pos: %u", getHuid( proc ), iv_memMruMeld.s.chnlPos ); PRDF_ASSERT( false ); } - } + iv_target = getConnectedChild( membuf, TYPE_MBA, + iv_memMruMeld.s.mbaPos ); + if ( nullptr == iv_target ) + { + PRDF_ERR( PRDF_FUNC "Could not find functional mba attached " + "to 0x%08X at pos: %u", getHuid( membuf ), + iv_memMruMeld.s.mbaPos ); + PRDF_ASSERT( false ); + } + } // Get the rank iv_rank = MemRank( iv_memMruMeld.s.mrank, iv_memMruMeld.s.srank ); @@ -247,7 +283,8 @@ TargetHandleList MemoryMru::getCalloutList() const } } } - else if ( TARGETING::TYPE_MCA == getTargetType(iv_target) ) + else if ( TARGETING::TYPE_MCA == getTargetType(iv_target) || + TARGETING::TYPE_OCMB_CHIP == getTargetType(iv_target) ) { if ( CALLOUT_ALL_MEM == iv_special ) { @@ -304,6 +341,11 @@ void MemoryMru::getCommonVars() { proc = getConnectedParent( iv_target, TYPE_PROC ); } + else if ( TYPE_OCMB_CHIP == trgtType ) + { + TargetHandle_t mcc = getConnectedParent( iv_target, TYPE_MCC ); + proc = getConnectedParent( mcc, TYPE_PROC ); + } else { PRDF_ERR( PRDF_FUNC "Invalid target type" ); @@ -323,11 +365,27 @@ void MemoryMru::getCommonVars() } // If our target is an MCA, then chnlPos will specify the MCA position // and mbaPos will be an unused field - else + else if ( TYPE_MCA == getTargetType(iv_target) ) { iv_memMruMeld.s.isMca = 1; iv_memMruMeld.s.chnlPos = getTargetPosition( iv_target ); } + // If our target is an OCMB, then chnlPos will specify the MCC position and + // mbaPos will specify the OMI position. + else if ( TYPE_OCMB_CHIP == getTargetType(iv_target) ) + { + TargetHandle_t omi = getConnectedParent( iv_target, TYPE_OMI ); + TargetHandle_t mcc = getConnectedParent( omi, TYPE_MCC ); + + iv_memMruMeld.s.isOcmb = 1; + iv_memMruMeld.s.chnlPos = getTargetPosition(mcc) % MAX_MCC_PER_PROC; + iv_memMruMeld.s.mbaPos = getTargetPosition(omi) % MAX_OMI_PER_MCC; + } + else + { + PRDF_ERR( PRDF_FUNC "Invalid target type" ); + PRDF_ASSERT(false); + } iv_memMruMeld.s.nodePos = getTargetPosition( node ); iv_memMruMeld.s.procPos = getTargetPosition( proc ); diff --git a/src/usr/diag/prdf/common/plat/mem/prdfOcmbDataBundle.H b/src/usr/diag/prdf/common/plat/mem/prdfOcmbDataBundle.H new file mode 100644 index 000000000..75d7dd53e --- /dev/null +++ b/src/usr/diag/prdf/common/plat/mem/prdfOcmbDataBundle.H @@ -0,0 +1,247 @@ +/* IBM_PROLOG_BEGIN_TAG */ +/* This is an automatically generated prolog. */ +/* */ +/* $Source: src/usr/diag/prdf/common/plat/mem/prdfOcmbDataBundle.H $ */ +/* */ +/* OpenPOWER HostBoot Project */ +/* */ +/* Contributors Listed Below - COPYRIGHT 2019 */ +/* [+] International Business Machines Corp. */ +/* */ +/* */ +/* Licensed under the Apache License, Version 2.0 (the "License"); */ +/* you may not use this file except in compliance with the License. */ +/* You may obtain a copy of the License at */ +/* */ +/* http://www.apache.org/licenses/LICENSE-2.0 */ +/* */ +/* Unless required by applicable law or agreed to in writing, software */ +/* distributed under the License is distributed on an "AS IS" BASIS, */ +/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */ +/* implied. See the License for the specific language governing */ +/* permissions and limitations under the License. */ +/* */ +/* IBM_PROLOG_END_TAG */ + +#ifndef __prdfOcmbDataBundle_H +#define __prdfOcmbDataBundle_H + +/** @file prdfOcmbDataBundle.H + * @brief Contains the data bundle for a P9 OCMB_CHIP object. + */ + +// Framework includes +#include <prdfExtensibleChip.H> + +// Platform includes +#include <prdfPlatServices.H> +#include <prdfMemCeTable.H> +#include <prdfMemUeTable.H> + +#ifdef __HOSTBOOT_MODULE + +#include <prdfMemScrubUtils.H> +#include <prdfMemTdFalseAlarm.H> +#include <prdfMemThresholds.H> +#include <prdfMemTdCtlr.H> + +#ifndef __HOSTBOOT_RUNTIME +#include <prdfMemIplCeStats.H> +#endif + +#endif // __HOSTBOOT_MODULE + +namespace PRDF +{ + +/** @brief P9 OCMB data bundle. */ +class OcmbDataBundle : public DataBundle +{ + public: // functions + + /** + * @brief Constructor. + * @param i_ocmbChip The OCMB chip. + */ + explicit OcmbDataBundle( ExtensibleChip * i_ocmbChip ) : + iv_chip(i_ocmbChip), iv_ceTable(i_ocmbChip), iv_ueTable(i_ocmbChip) + {} + + /** @brief Destructor. */ + ~OcmbDataBundle() + { + #ifdef __HOSTBOOT_MODULE + #ifdef __HOSTBOOT_RUNTIME + delete iv_vcmFalseAlarmCounter; + delete iv_tpsFalseAlarmCounter; + #else // IPL only + delete iv_iplCeStats; + #endif + delete iv_tdCtlr; iv_tdCtlr = nullptr; + #endif // __HOSTBOOT_MODULE + } + + // Don't allow copy or assignment. + OcmbDataBundle( const OcmbDataBundle & ) = delete; + const OcmbDataBundle & operator=( const OcmbDataBundle & ) = delete; + + #ifdef __HOSTBOOT_MODULE + + /** @return The Targeted Diagnostics controller. */ + MemTdCtlr<TARGETING::TYPE_OCMB_CHIP> * getTdCtlr() + { + if ( nullptr == iv_tdCtlr ) + { + iv_tdCtlr = new MemTdCtlr<TARGETING::TYPE_OCMB_CHIP>{iv_chip}; + } + + return iv_tdCtlr; + } + + /** @return The IMPE threshold counter. */ + VcmFalseAlarm * getImpeThresholdCounter() + { + if ( nullptr == iv_impeThresholdCounter ) + { + iv_impeThresholdCounter = new VcmFalseAlarm( + TimeBasedThreshold { getImpeTh() } ); + } + + return iv_impeThresholdCounter; + } + + #ifdef __HOSTBOOT_RUNTIME + + /** @return The VCM false alarm counter. */ + VcmFalseAlarm * getVcmFalseAlarmCounter() + { + if ( nullptr == iv_vcmFalseAlarmCounter ) + { + iv_vcmFalseAlarmCounter = new VcmFalseAlarm( + TimeBasedThreshold { 4, ThresholdResolution::ONE_DAY } ); + } + + return iv_vcmFalseAlarmCounter; + } + + /** @return The TPS false alarm counter. */ + TpsFalseAlarm * getTpsFalseAlarmCounter() + { + if ( nullptr == iv_tpsFalseAlarmCounter ) + { + iv_tpsFalseAlarmCounter = new TpsFalseAlarm( + TimeBasedThreshold{ 3, ThresholdResolution::ONE_DAY } ); + } + + return iv_tpsFalseAlarmCounter; + } + + #else // IPL only + + /** @return The IPL CE statistics object. */ + MemIplCeStats<TARGETING::TYPE_OCMB_CHIP> * getIplCeStats() + { + if ( nullptr == iv_iplCeStats ) + { + iv_iplCeStats = + new MemIplCeStats<TARGETING::TYPE_OCMB_CHIP>( iv_chip ); + } + + return iv_iplCeStats; + } + + #endif + + #endif // __HOSTBOOT_MODULE + + private: // instance variables + + /** The OCMB chip associated with this data bundle. */ + ExtensibleChip * const iv_chip; + + #ifdef __HOSTBOOT_MODULE + + /** The Targeted Diagnostics controller. */ + MemTdCtlr<TARGETING::TYPE_OCMB_CHIP> * iv_tdCtlr = nullptr; + + /** IMPE threshold counter. */ + VcmFalseAlarm * iv_impeThresholdCounter = nullptr; + + #endif // __HOSTBOOT_MODULE + + public: // instance variables + + MemCeTable<TARGETING::TYPE_OCMB_CHIP> iv_ceTable; ///< CE table for FFDC + MemUeTable iv_ueTable; ///< UE table for FFDC + + /** If there is a channel failure detected on this bus, there will be some + * required cleanup after analysis to mask off all further attentions from + * the bus. A channel failure could occur on either side of the bus and it + * is possible the cleanup function could be called in multiple + * PostAnalysis plugins depending on where the channel failure occurred. + * Since we only want to do one cleanup, we will use this variable to + * indicate if a cleanup is still required or has already been done. */ + bool iv_doChnlFailCleanup = false; + + #ifdef __HOSTBOOT_MODULE + + /** Threshold table for RCD parity errors. */ + TimeBasedThreshold iv_rcdParityTh = TimeBasedThreshold( getRcdParityTh() ); + + /** Threshold table for IUEs. Threshold per DIMM */ + std::map<uint8_t, TimeBasedThreshold> iv_iueTh; + + /** Bool to indicate if we've triggered a port fail because of IUEs. */ + bool iv_iuePortFail = false; + + #ifdef __HOSTBOOT_RUNTIME + + /** VCM false alarm counter. */ + VcmFalseAlarm * iv_vcmFalseAlarmCounter = nullptr; + + /** TPS false alarm counter. */ + TpsFalseAlarm * iv_tpsFalseAlarmCounter = nullptr; + + /** Set to true if mainline NCEs and TCEs should be permanently masked. This + * is checked at the end of targeted diagnostics before background + * scrubbing is resumed. */ + bool iv_maskMainlineNceTce = false; + + // These are used to limit the number of times a scrub command will stop + // on a UE or CE on a rank. This is to prevent potential flooding of + // maintenance UEs or CEs. The threshold will be 16 per rank for each. + TimeBasedThreshold iv_ueStopCounter = + TimeBasedThreshold( 16, ThresholdResolution::TEN_HOURS ); + TimeBasedThreshold iv_ceStopCounter = + TimeBasedThreshold( 16, ThresholdResolution::TEN_HOURS );; + + // If we stop on a UE or a CE, we will need to store the rank that the + // error is on so that we can clear our respective thresholds if the + // next error we stop on is on a different rank. + MemRank iv_ceUeRank; + + #else // IPL only + + /** MNFG IPL CE statistics. */ + MemIplCeStats<TARGETING::TYPE_OCMB_CHIP> * iv_iplCeStats = nullptr; + + #endif + + #endif // __HOSTBOOT_MODULE + +}; + +/** + * @brief Wrapper function for the OcmbDataBundle. + * @param i_ocmbChip The OCMB chip. + * @return This MBA's data bundle. + */ +inline OcmbDataBundle * getOcmbDataBundle( ExtensibleChip * i_ocmbChip ) +{ + return static_cast<OcmbDataBundle *>(i_ocmbChip->getDataBundle()); +} + +} // end namespace PRDF + +#endif // __prdfOcmbDataBundle_H + diff --git a/src/usr/diag/prdf/common/plat/mem/prdf_plat_mem.mk b/src/usr/diag/prdf/common/plat/mem/prdf_plat_mem.mk index 087214ece..2ea0712d3 100644 --- a/src/usr/diag/prdf/common/plat/mem/prdf_plat_mem.mk +++ b/src/usr/diag/prdf/common/plat/mem/prdf_plat_mem.mk @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2016,2018 +# Contributors Listed Below - COPYRIGHT 2016,2019 # [+] International Business Machines Corp. # # @@ -51,6 +51,7 @@ prd_obj += prdfMemoryMru.o prd_obj += prdfMemUeTable.o prd_obj += prdfMemUtils.o prd_obj += prdfMemThresholds.o +prd_obj += prdfP9OcmbChipDomain.o # rule plugin related prd_rule_plugin += prdfP9Mca_common.o diff --git a/src/usr/diag/prdf/common/plat/nimbus/nimbus_mca.rule b/src/usr/diag/prdf/common/plat/nimbus/nimbus_mca.rule index eea254545..d1a6bc290 100644 --- a/src/usr/diag/prdf/common/plat/nimbus/nimbus_mca.rule +++ b/src/usr/diag/prdf/common/plat/nimbus/nimbus_mca.rule @@ -241,7 +241,7 @@ group gMCACALFIR /** MCACALFIR[0] * A MBA recoverable error has occurred. */ - (rMCACALFIR, bit(0)) ? self_th_1; + (rMCACALFIR, bit(0)) ? nvdimm_self_th_1; /** MCACALFIR[1] * MBA Nonrecoverable Error @@ -251,7 +251,7 @@ group gMCACALFIR /** MCACALFIR[2] * Excessive refreshes to a single rank. */ - (rMCACALFIR, bit(2)) ? self_th_32perDay; + (rMCACALFIR, bit(2)) ? nvdimm_self_th_32perDay; /** MCACALFIR[3] * Err detected in the MBA debug WAT logic @@ -266,7 +266,7 @@ group gMCACALFIR /** MCACALFIR[5] * Calibration complete indication xout */ - (rMCACALFIR, bit(5)) ? self_th_32perDay; + (rMCACALFIR, bit(5)) ? nvdimm_self_th_32perDay; /** MCACALFIR[6] * Emergency Throttle @@ -279,7 +279,7 @@ group gMCACALFIR (rMCACALFIR, bit(7)) ? self_th_1; /** MCACALFIR[8] - * event_n active on DDR interface + * Active NVDIMM Attention */ (rMCACALFIR, bit(8)) ? analyzeNvdimms; @@ -533,7 +533,7 @@ group gMCAECCFIR /** MCAECCFIR[42] * SCOM_PARITY_CLASS_RECOVERABLE */ - (rMCAECCFIR, bit(42)) ? self_th_1; + (rMCAECCFIR, bit(42)) ? nvdimm_self_th_1; /** MCAECCFIR[43] * SCOM_PARITY_CLASS_UNRECOVERABLE @@ -548,7 +548,7 @@ group gMCAECCFIR /** MCAECCFIR[45] * WRITE_RMW_CE */ - (rMCAECCFIR, bit(45)) ? self_th_32perDay; + (rMCAECCFIR, bit(45)) ? nvdimm_self_th_32perDay; /** MCAECCFIR[46] * WRITE_RMW_UE @@ -686,12 +686,12 @@ group gDDRPHYFIR /** DDRPHYFIR[60] * Register PE 4 bit impact */ - (rDDRPHYFIR, bit(60)) ? self_th_1; + (rDDRPHYFIR, bit(60)) ? nvdimm_self_th_1; /** DDRPHYFIR[61] * Register PE 1 bit impact */ - (rDDRPHYFIR, bit(61)) ? self_th_1; + (rDDRPHYFIR, bit(61)) ? nvdimm_self_th_1; }; diff --git a/src/usr/diag/prdf/common/plat/nimbus/nimbus_mca_actions.rule b/src/usr/diag/prdf/common/plat/nimbus/nimbus_mca_actions.rule index da3a73f82..6d5ab9018 100644 --- a/src/usr/diag/prdf/common/plat/nimbus/nimbus_mca_actions.rule +++ b/src/usr/diag/prdf/common/plat/nimbus/nimbus_mca_actions.rule @@ -70,6 +70,7 @@ actionclass rcd_parity_error calloutSelfLowNoGard; # Self LOW # Thresholding done in plugin funccall("RcdParityError"); # Run TPS on TH for all MCA ranks + funccall("ClearNvdimmGardState"); # Clear gard for NVDIMMs }; /** Handle Mainline IUEs */ @@ -125,7 +126,7 @@ actionclass maintenance_iaue_handling /** MCA/UE algroithm, threshold 5 per day */ actionclass mca_ue_algorithm_th_5perDay { - calloutSelfMed; + try( funccall("CheckForNvdimms"), calloutSelfMed ); threshold5pday; funccall("mcaUeAlgorithm"); # must be called last }; @@ -133,12 +134,29 @@ actionclass mca_ue_algorithm_th_5perDay /** MCA/UE algroithm, threshold 1 */ actionclass mca_ue_algorithm_th_1 { - calloutSelfMed; + try( funccall("CheckForNvdimms"), calloutSelfMed ); threshold1; funccall("mcaUeAlgorithm"); # must be called last }; ################################################################################ +# NVDIMM callouts # +################################################################################ + +# Simple callouts that will avoid gard for NVDIMMs at IPL +actionclass nvdimm_self_th_1 +{ + try( funccall("CheckForNvdimms"), calloutSelfMed ); + threshold1; +}; + +actionclass nvdimm_self_th_32perDay +{ + try( funccall("CheckForNvdimms"), calloutSelfMed ); + threshold32pday; +}; + +################################################################################ # Analyze groups ################################################################################ diff --git a/src/usr/diag/prdf/common/plat/nimbus/nimbus_mcbist.rule b/src/usr/diag/prdf/common/plat/nimbus/nimbus_mcbist.rule index 1f61719a7..0a3301e2a 100644 --- a/src/usr/diag/prdf/common/plat/nimbus/nimbus_mcbist.rule +++ b/src/usr/diag/prdf/common/plat/nimbus/nimbus_mcbist.rule @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2016,2018 +# Contributors Listed Below - COPYRIGHT 2016,2019 # [+] International Business Machines Corp. # # @@ -599,7 +599,7 @@ group gMCBISTFIR /** MCBISTFIR[13] * SCOM_RECOVERABLE_REG_PE */ - (rMCBISTFIR, bit(13)) ? self_th_1; + (rMCBISTFIR, bit(13)) ? nvdimm_self_th_1; /** MCBISTFIR[14] * SCOM_FATAL_REG_PE diff --git a/src/usr/diag/prdf/common/plat/nimbus/nimbus_mcbist_actions.rule b/src/usr/diag/prdf/common/plat/nimbus/nimbus_mcbist_actions.rule index 9b2127f3f..11d499e30 100644 --- a/src/usr/diag/prdf/common/plat/nimbus/nimbus_mcbist_actions.rule +++ b/src/usr/diag/prdf/common/plat/nimbus/nimbus_mcbist_actions.rule @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2016,2018 +# Contributors Listed Below - COPYRIGHT 2016,2019 # [+] International Business Machines Corp. # # @@ -36,6 +36,17 @@ actionclass command_addr_timeout funccall("commandAddrTimeout"); }; +################################################################################ +# NVDIMM callouts # +################################################################################ + +# Simple callouts that will avoid gard for NVDIMMs at IPL +actionclass nvdimm_self_th_1 +{ + try( funccall("CheckForNvdimms"), calloutSelfMed ); + threshold1; +}; + ############################################################################### # Analyze groups ############################################################################### diff --git a/src/usr/diag/prdf/common/plat/nimbus/nimbus_mcs.rule b/src/usr/diag/prdf/common/plat/nimbus/nimbus_mcs.rule index 71a0342ab..987d68afb 100644 --- a/src/usr/diag/prdf/common/plat/nimbus/nimbus_mcs.rule +++ b/src/usr/diag/prdf/common/plat/nimbus/nimbus_mcs.rule @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2016,2018 +# Contributors Listed Below - COPYRIGHT 2016,2019 # [+] International Business Machines Corp. # # @@ -148,7 +148,7 @@ group gMCFIR /** MCFIR[0] * mc internal recoverable eror */ - (rMCFIR, bit(0)) ? self_th_1; + (rMCFIR, bit(0)) ? nvdimm_self_th_1; /** MCFIR[1] * mc internal non recovervable error diff --git a/src/usr/diag/prdf/common/plat/nimbus/nimbus_mcs_actions.rule b/src/usr/diag/prdf/common/plat/nimbus/nimbus_mcs_actions.rule index 1497cdccb..35339ccc6 100644 --- a/src/usr/diag/prdf/common/plat/nimbus/nimbus_mcs_actions.rule +++ b/src/usr/diag/prdf/common/plat/nimbus/nimbus_mcs_actions.rule @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2018 +# Contributors Listed Below - COPYRIGHT 2018,2019 # [+] International Business Machines Corp. # # @@ -24,6 +24,17 @@ # IBM_PROLOG_END_TAG ################################################################################ +# NVDIMM callouts # +################################################################################ + +# Simple callouts that will avoid gard for NVDIMMs at IPL +actionclass nvdimm_self_th_1 +{ + try( funccall("CheckForNvdimms"), calloutSelfMed ); + threshold1; +}; + +################################################################################ # Analyze groups ################################################################################ diff --git a/src/usr/diag/prdf/common/plat/nimbus/nimbus_obus.rule b/src/usr/diag/prdf/common/plat/nimbus/nimbus_obus.rule index a4ce0d02d..790537acf 100644 --- a/src/usr/diag/prdf/common/plat/nimbus/nimbus_obus.rule +++ b/src/usr/diag/prdf/common/plat/nimbus/nimbus_obus.rule @@ -469,12 +469,12 @@ group gIOOLFIR /** IOOLFIR[8] * link0 nak received */ - (rIOOLFIR, bit(8)) ? defaultMaskedError; + (rIOOLFIR, bit(8)) ? threshold_and_mask_self_non_smp_only; /** IOOLFIR[9] * link1 nak received */ - (rIOOLFIR, bit(9)) ? defaultMaskedError; + (rIOOLFIR, bit(9)) ? threshold_and_mask_self_non_smp_only; /** IOOLFIR[10] * link0 replay buffer full @@ -499,22 +499,22 @@ group gIOOLFIR /** IOOLFIR[14] * link0 sl ecc correctable */ - (rIOOLFIR, bit(14)) ? threshold_and_mask_self; + (rIOOLFIR, bit(14)) ? threshold_and_mask_self_smp_only; /** IOOLFIR[15] * link1 sl ecc correctable */ - (rIOOLFIR, bit(15)) ? threshold_and_mask_self; + (rIOOLFIR, bit(15)) ? threshold_and_mask_self_smp_only; /** IOOLFIR[16] * link0 sl ecc ue */ - (rIOOLFIR, bit(16)) ? threshold_and_mask_self; + (rIOOLFIR, bit(16)) ? threshold_and_mask_self_smp_only; /** IOOLFIR[17] * link1 sl ecc ue */ - (rIOOLFIR, bit(17)) ? threshold_and_mask_self; + (rIOOLFIR, bit(17)) ? threshold_and_mask_self_smp_only; /** IOOLFIR[18] * link0 retrain threshold @@ -597,12 +597,12 @@ group gIOOLFIR (rIOOLFIR, bit(33)) ? defaultMaskedError; /** IOOLFIR[34] - * link0 num replay + * link0 num replay or no forward progress */ (rIOOLFIR, bit(34)) ? defaultMaskedError; /** IOOLFIR[35] - * link1 num replay + * link1 num replay or no forward progress */ (rIOOLFIR, bit(35)) ? defaultMaskedError; @@ -619,12 +619,12 @@ group gIOOLFIR /** IOOLFIR[38] * link0 prbs select error */ - (rIOOLFIR, bit(38)) ? threshold_and_mask_self; + (rIOOLFIR, bit(38)) ? threshold_and_mask_self_smp_only; /** IOOLFIR[39] * link1 prbs select error */ - (rIOOLFIR, bit(39)) ? threshold_and_mask_self; + (rIOOLFIR, bit(39)) ? threshold_and_mask_self_smp_only; /** IOOLFIR[40] * link0 tcomplete bad @@ -639,102 +639,102 @@ group gIOOLFIR /** IOOLFIR[42] * link0 no spare lane available */ - (rIOOLFIR, bit(42)) ? obusSmpCallout_L0; + (rIOOLFIR, bit(42)) ? obusSmpCallout_L0_smp_only; /** IOOLFIR[43] * link1 no spare lane available */ - (rIOOLFIR, bit(43)) ? obusSmpCallout_L1; + (rIOOLFIR, bit(43)) ? obusSmpCallout_L1_smp_only; /** IOOLFIR[44] - * link0 spare done + * link0 spare done or degraded mode */ - (rIOOLFIR, bit(44)) ? obusSmpCallout_th32_L0; + (rIOOLFIR, bit(44)) ? spare_lane_degraded_mode_L0; /** IOOLFIR[45] - * link1 spare done + * link1 spare done or degraded mode */ - (rIOOLFIR, bit(45)) ? obusSmpCallout_th32_L1; + (rIOOLFIR, bit(45)) ? spare_lane_degraded_mode_L1; /** IOOLFIR[46] * link0 too many crc errors */ - (rIOOLFIR, bit(46)) ? obusSmpCallout_L0; + (rIOOLFIR, bit(46)) ? obusSmpCallout_L0_smp_only; /** IOOLFIR[47] * link1 too many crc errors */ - (rIOOLFIR, bit(47)) ? obusSmpCallout_L1; + (rIOOLFIR, bit(47)) ? obusSmpCallout_L1_smp_only; /** IOOLFIR[48] - * link0 npu error + * link0 npu error or orx otx dlx errors */ (rIOOLFIR, bit(48)) ? threshold_and_mask_self; /** IOOLFIR[49] - * link1 npu error + * link1 npu error or orx otx dlx errors */ (rIOOLFIR, bit(49)) ? threshold_and_mask_self; /** IOOLFIR[50] * linkx npu error */ - (rIOOLFIR, bit(50)) ? threshold_and_mask_self; + (rIOOLFIR, bit(50)) ? threshold_and_mask_self_smp_only; /** IOOLFIR[51] * osc switch */ - (rIOOLFIR, bit(51)) ? threshold_and_mask_self; + (rIOOLFIR, bit(51)) ? threshold_and_mask_self_smp_only; /** IOOLFIR[52] * link0 correctable array error */ - (rIOOLFIR, bit(52)) ? obusSmpCallout_th32_L0; + (rIOOLFIR, bit(52)) ? self_th_32perDay; /** IOOLFIR[53] * link1 correctable array error */ - (rIOOLFIR, bit(53)) ? obusSmpCallout_th32_L1; + (rIOOLFIR, bit(53)) ? self_th_32perDay; /** IOOLFIR[54] * link0 uncorrectable array error */ - (rIOOLFIR, bit(54)) ? obusSmpFailure_L0; + (rIOOLFIR, bit(54)) ? self_th_1; /** IOOLFIR[55] * link1 uncorrectable array error */ - (rIOOLFIR, bit(55)) ? obusSmpFailure_L1; + (rIOOLFIR, bit(55)) ? self_th_1; /** IOOLFIR[56] * link0 training failed */ - (rIOOLFIR, bit(56)) ? obusSmpFailure_L0; + (rIOOLFIR, bit(56)) ? training_failure_L0; /** IOOLFIR[57] * link1 training failed */ - (rIOOLFIR, bit(57)) ? obusSmpFailure_L1; + (rIOOLFIR, bit(57)) ? training_failure_L1; /** IOOLFIR[58] * link0 unrecoverable error */ - (rIOOLFIR, bit(58)) ? obusSmpFailure_L0; + (rIOOLFIR, bit(58)) ? unrecoverable_error_L0; /** IOOLFIR[59] * link1 unrecoverable error */ - (rIOOLFIR, bit(59)) ? obusSmpFailure_L1; + (rIOOLFIR, bit(59)) ? unrecoverable_error_L1; /** IOOLFIR[60] * link0 internal error */ - (rIOOLFIR, bit(60)) ? obusSmpFailure_L0; + (rIOOLFIR, bit(60)) ? internal_error_L0; /** IOOLFIR[61] * link1 internal error */ - (rIOOLFIR, bit(61)) ? obusSmpFailure_L1; + (rIOOLFIR, bit(61)) ? internal_error_L1; /** IOOLFIR[62] * fir scom err dup diff --git a/src/usr/diag/prdf/common/plat/nimbus/nimbus_proc.rule b/src/usr/diag/prdf/common/plat/nimbus/nimbus_proc.rule index 6ac3bc5a1..6712a5977 100644 --- a/src/usr/diag/prdf/common/plat/nimbus/nimbus_proc.rule +++ b/src/usr/diag/prdf/common/plat/nimbus/nimbus_proc.rule @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2016,2018 +# Contributors Listed Below - COPYRIGHT 2016,2019 # [+] International Business Machines Corp. # # @@ -2872,7 +2872,7 @@ group gNXCQFIR /** NXCQFIR[19] * Uncorrectable error on ERAT arrays */ - (rNXCQFIR, bit(19)) ? nx_th_32perDay; + (rNXCQFIR, bit(19)) ? nx_th_1; /** NXCQFIR[20] * SUE on ERAT arrays diff --git a/src/usr/diag/prdf/common/plat/nimbus/nimbus_proc_actions.rule b/src/usr/diag/prdf/common/plat/nimbus/nimbus_proc_actions.rule index 826308710..1960da53b 100644 --- a/src/usr/diag/prdf/common/plat/nimbus/nimbus_proc_actions.rule +++ b/src/usr/diag/prdf/common/plat/nimbus/nimbus_proc_actions.rule @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2018 +# Contributors Listed Below - COPYRIGHT 2018,2019 # [+] International Business Machines Corp. # # @@ -23,9 +23,15 @@ # # IBM_PROLOG_END_TAG -############################################################################### +################################################################################ +# Analyze +################################################################################ + +actionclass analyzeENHCAFIR { analyze(gENHCAFIR); }; + +################################################################################ # Analyze connected -############################################################################### +################################################################################ actionclass analyzeConnectedMCBIST0 { analyze(connected(TYPE_MCBIST, 0)); }; actionclass analyzeConnectedMCBIST1 { analyze(connected(TYPE_MCBIST, 1)); }; diff --git a/src/usr/diag/prdf/common/plat/p9/p9_common_actions.rule b/src/usr/diag/prdf/common/plat/p9/p9_common_actions.rule index 669d3e5b5..2e7e32869 100644 --- a/src/usr/diag/prdf/common/plat/p9/p9_common_actions.rule +++ b/src/usr/diag/prdf/common/plat/p9/p9_common_actions.rule @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2016,2018 +# Contributors Listed Below - COPYRIGHT 2016,2019 # [+] International Business Machines Corp. # # @@ -208,6 +208,12 @@ actionclass parent_proc_th_1 threshold1; }; +actionclass parent_proc_th_32perDay +{ + callout(connected(TYPE_PROC), MRU_MED); + threshold32pday; +}; + actionclass level2_M_proc_L_th_1 { callout2ndLvlMed; @@ -273,4 +279,3 @@ actionclass chip_to_chip calloutSelfMed; threshold1; }; - diff --git a/src/usr/diag/prdf/common/plat/p9/p9_common_obus_actions.rule b/src/usr/diag/prdf/common/plat/p9/p9_common_obus_actions.rule index 6590bb122..700e87649 100644 --- a/src/usr/diag/prdf/common/plat/p9/p9_common_obus_actions.rule +++ b/src/usr/diag/prdf/common/plat/p9/p9_common_obus_actions.rule @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2018 +# Contributors Listed Below - COPYRIGHT 2018,2019 # [+] International Business Machines Corp. # # @@ -88,6 +88,150 @@ actionclass obusSmpFailure_L1 threshold1; }; +actionclass smp_masked +{ + # If SMP mode, does defaultMaskedError action and returns SUCCESS. + # Otherwise, returns PRD_SCAN_COMM_REGISTER_ZERO. + funccall( "smp_masked" ); # If SMP mode +}; + +actionclass non_smp_masked +{ + # If NOT in SMP mode, does defaultMaskedError action and returns SUCCESS. + # Otherwise, returns PRD_SCAN_COMM_REGISTER_ZERO. + funccall( "non_smp_masked" ); +}; + +actionclass non_smp_callout_bus_th_1 +{ + # NOTE: We cannot put the threshold action in this actionclass because it + # will affect the SMP action in the try() statement. Therefore, the + # plugin must handle the thresholding if in non-SMP mode. + + # If NOT in SMP mode: + # - calls out this OBUS + # - indicates the probably may be somewhere between this OBUS and whatever + # is on the other side (which we know nothing about) + # - sets threshold + # - sets service call + # - returns SUCCESS + # Otherwise + # - returns PRD_SCAN_COMM_REGISTER_ZERO + funccall( "non_smp_callout_bus_th_1" ); +}; + +actionclass non_smp_callout_lvl2_th_1 +{ + # NOTE: We cannot put the threshold action in this actionclass because it + # will affect the SMP action in the try() statement. Therefore, the + # plugin must handle the thresholding if in non-SMP mode. + + # If NOT in SMP mode: + # - calls out level 2 support + # - sets threshold + # - sets service call + # - returns SUCCESS + # Otherwise + # - returns PRD_SCAN_COMM_REGISTER_ZERO + funccall( "non_smp_callout_lvl2_th_1" ); +}; + +actionclass non_smp_callout_self_th_32perDay +{ + threshold32pday; # This is ok because it is greater than threshold1. + + # If NOT in SMP mode: + # - calls out this OBUS + # - returns SUCCESS + # Otherwise + # - returns PRD_SCAN_COMM_REGISTER_ZERO + funccall( "non_smp_callout_self" ); +}; + +actionclass threshold_and_mask_self_non_smp_only +{ + # SMP: masked + # Non-SMP: threshold_and_mask_self + try ( smp_masked, threshold_and_mask_self ); +}; + +actionclass threshold_and_mask_self_smp_only +{ + # SMP: threshold_and_mask_self + # Non-SMP: masked + try ( non_smp_masked, threshold_and_mask_self ); +}; + +actionclass obusSmpCallout_L0_smp_only +{ + # SMP: obusSmpCallout_L0 + # Non-SMP: masked + try ( non_smp_masked, obusSmpCallout_L0 ); +}; + +actionclass obusSmpCallout_L1_smp_only +{ + # SMP: obusSmpCallout_L1 + # Non-SMP: masked + try ( non_smp_masked, obusSmpCallout_L1 ); +}; + +actionclass spare_lane_degraded_mode_L0 +{ + # SMP: obusSmpCallout_th32_L0 (lane spare) + # Non-SMP: non_smp_callout_bus_th_1 (degraded mode) + try ( non_smp_callout_bus_th_1, obusSmpCallout_th32_L0 ); +}; + +actionclass spare_lane_degraded_mode_L1 +{ + # SMP: obusSmpCallout_th32_L1 (lane spare) + # Non-SMP: non_smp_callout_bus_th_1 (degraded mode) + try ( non_smp_callout_bus_th_1, obusSmpCallout_th32_L1 ); +}; + +actionclass training_failure_L0 +{ + # SMP: obusSmpFailure_L0 + # Non-SMP: non_smp_callout_lvl2_th_1 + try ( non_smp_callout_lvl2_th_1, obusSmpFailure_L0 ); +}; + +actionclass training_failure_L1 +{ + # SMP: obusSmpFailure_L1 + # Non-SMP: non_smp_callout_lvl2_th_1 + try ( non_smp_callout_lvl2_th_1, obusSmpFailure_L1 ); +}; + +actionclass unrecoverable_error_L0 +{ + # SMP: obusSmpFailure_L0 + # Non-SMP: non_smp_callout_bus_th_1 + try ( non_smp_callout_bus_th_1, obusSmpFailure_L0 ); +}; + +actionclass unrecoverable_error_L1 +{ + # SMP: obusSmpFailure_L1 + # Non-SMP: non_smp_callout_bus_th_1 + try ( non_smp_callout_bus_th_1, obusSmpFailure_L1 ); +}; + +actionclass internal_error_L0 +{ + # SMP: obusSmpFailure_L0 + # Non-SMP: non_smp_callout_self_th_32perDay + try ( non_smp_callout_self_th_32perDay, obusSmpFailure_L0 ); +}; + +actionclass internal_error_L1 +{ + # SMP: obusSmpFailure_L1 + # Non-SMP: non_smp_callout_self_th_32perDay + try ( non_smp_callout_self_th_32perDay, obusSmpFailure_L1 ); +}; + ############################################################################### # Analyze groups ############################################################################### diff --git a/src/usr/diag/prdf/common/plat/p9/p9_common_obus_regs.rule b/src/usr/diag/prdf/common/plat/p9/p9_common_obus_regs.rule index 461fbc664..bc25fba5d 100644 --- a/src/usr/diag/prdf/common/plat/p9/p9_common_obus_regs.rule +++ b/src/usr/diag/prdf/common/plat/p9/p9_common_obus_regs.rule @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2016,2018 +# Contributors Listed Below - COPYRIGHT 2016,2019 # [+] International Business Machines Corp. # # @@ -85,6 +85,13 @@ capture group default; }; + register MISC_ERROR_STATUS + { + name "P9 OBUS target Misc Error Status register"; + scomaddr 0x09010829; + capture group default; + }; + ############################################################################ # P9 OBUS targets for cable FFDC # One additional reg (IOOLFIR) is in default group diff --git a/src/usr/diag/prdf/common/plat/p9/p9_common_proc_actions.rule b/src/usr/diag/prdf/common/plat/p9/p9_common_proc_actions.rule index aacf978bd..e5700c34b 100644 --- a/src/usr/diag/prdf/common/plat/p9/p9_common_proc_actions.rule +++ b/src/usr/diag/prdf/common/plat/p9/p9_common_proc_actions.rule @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2017,2018 +# Contributors Listed Below - COPYRIGHT 2017,2019 # [+] International Business Machines Corp. # # @@ -670,7 +670,6 @@ actionclass analyzePBIOOFIR { analyze(gPBIOOFIR ); }; actionclass analyzePBAFIR { analyze(gPBAFIR ); }; actionclass analyzePSIHBFIR { analyze(gPSIHBFIR ); }; actionclass analyzePBAMFIR { analyze(gPBAMFIR ); }; -actionclass analyzeENHCAFIR { analyze(gENHCAFIR ); }; actionclass analyzeXB_LFIR { analyze(gXB_LFIR ); }; actionclass analyzeXBPPEFIR { analyze(gXBPPEFIR ); }; diff --git a/src/usr/diag/prdf/common/plat/p9/prdfCommonPlugins.C b/src/usr/diag/prdf/common/plat/p9/prdfCommonPlugins.C index ece3fc1a8..730f99f09 100644 --- a/src/usr/diag/prdf/common/plat/p9/prdfCommonPlugins.C +++ b/src/usr/diag/prdf/common/plat/p9/prdfCommonPlugins.C @@ -127,6 +127,88 @@ PRDF_PLUGIN_DEFINE_NS(nimbus_proc, CommonPlugins, ClearServiceCallFlag_mnfgInfo PRDF_PLUGIN_DEFINE_NS(cumulus_proc, CommonPlugins, ClearServiceCallFlag_mnfgInfo); PRDF_PLUGIN_DEFINE_NS(axone_proc, CommonPlugins, ClearServiceCallFlag_mnfgInfo); +/** + * @brief Will change the gard state of any NVDIMMs in the callout list to + * NO_GARD. + * @param i_chip The chip. + * @param io_sc The step code data struct. + * @returns SUCCESS + */ +int32_t ClearNvdimmGardState( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + #ifdef __HOSTBOOT_MODULE + + // Call the sdc to clear the NVDIMM mru list. + io_sc.service_data->clearNvdimmMruListGard(); + + #endif + + return SUCCESS; +} +PRDF_PLUGIN_DEFINE_NS(nimbus_mca, CommonPlugins, ClearNvdimmGardState); + +/** + * @brief Will check if any of the DIMMs connected to this chip are NVDIMMs + * and send a message to PHYP/Hostboot that save/restore may work. If + * we are at IPL, we will callout self no gard instead of garding. + * @param i_chip The chip of the DIMM parent. + * @param io_sc The step code data struct. + * @returns SUCCESS if NVDIMMs found at IPL, PRD_SCAN_COMM_REGISTER_ZERO if not. + */ +int32_t CheckForNvdimms( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + int32_t rc = PRD_SCAN_COMM_REGISTER_ZERO; + + #ifdef CONFIG_NVDIMM + #ifdef __HOSTBOOT_MODULE + + TargetHandleList dimmList = getConnected( i_chip->getTrgt(), TYPE_DIMM ); + + // Always loop through all the dimms so we send the + // nvdimmNotifyProtChange message for all the NVDIMMs on the target. + for ( auto & dimm : dimmList ) + { + // If the callout target is an NVDIMM send a message to + // PHYP/Hostboot that a save/restore may work, and if we are at + // IPL, do not gard the target. + if ( isNVDIMM(dimm) ) + { + // Send the message to PHYP/Hostboot + uint32_t l_rc = PlatServices::nvdimmNotifyProtChange( dimm, + NVDIMM::NVDIMM_RISKY_HW_ERROR ); + if ( SUCCESS != l_rc ) + { + PRDF_TRAC( "CheckForNvdimms: nvdimmNotifyProtChange(0x%08x)" + " failed.", PlatServices::getHuid(dimm) ); + continue; + } + + #ifndef __HOSTBOOT_RUNTIME + // IPL + // We will callout self, no gard. No need for another self callout + // from the rule code, so return SUCCESS. + rc = SUCCESS; + #endif + } + } + + if ( SUCCESS == rc ) + { + // Callout self, no gard + io_sc.service_data->SetCallout( i_chip->getTrgt(), MRU_MED, NO_GARD ); + } + + #endif // __HOSTBOOT_MODULE + #endif // CONFIG_NVDIMM + + return rc; +} +PRDF_PLUGIN_DEFINE_NS(nimbus_mcs, CommonPlugins, CheckForNvdimms); +PRDF_PLUGIN_DEFINE_NS(nimbus_mca, CommonPlugins, CheckForNvdimms); +PRDF_PLUGIN_DEFINE_NS(nimbus_mcbist, CommonPlugins, CheckForNvdimms); + } // namespace CommonPlugins ends }// namespace PRDF ends diff --git a/src/usr/diag/prdf/common/plat/p9/prdfLaneRepair.C b/src/usr/diag/prdf/common/plat/p9/prdfLaneRepair.C index 6cb4e6535..6ad889fd5 100644 --- a/src/usr/diag/prdf/common/plat/p9/prdfLaneRepair.C +++ b/src/usr/diag/prdf/common/plat/p9/prdfLaneRepair.C @@ -75,6 +75,16 @@ TargetHandle_t getTxBusEndPt( TargetHandle_t i_rxTrgt) // grab connected DMI parent o_txTrgt = getConnectedParent( i_rxTrgt, TYPE_DMI ); } + else if ( TYPE_OMI == busType ) + { + // Get connected child OCMB (one OCMB per OMI) + o_txTrgt = getConnectedChild( i_rxTrgt, TYPE_OCMB_CHIP, 0 ); + } + else if ( TYPE_OCMB_CHIP == busType ) + { + // Get connected parent OMI + o_txTrgt = getConnectedParent( i_rxTrgt, TYPE_OMI ); + } PRDF_ASSERT(nullptr != o_txTrgt); return o_txTrgt; @@ -310,38 +320,6 @@ int32_t __handleLaneRepairEvent( ExtensibleChip * i_chip, #undef PRDF_FUNC } -template<> -int32_t __handleLaneRepairEvent<TYPE_OBUS, TYPE_OBUS>( ExtensibleChip * i_chip, - STEP_CODE_DATA_STRUCT & i_sc, - bool i_spareDeployed ) -{ - TargetHandle_t rxBusTgt = i_chip->getTrgt(); - - // Make predictive on first occurrence in MFG - if ( isLaneRepairDisabled<TYPE_OBUS>() ) - { - i_sc.service_data->setServiceCall(); - } - - // RTC 174485 - // Need HWPs for this. Just callout bus interface for now. - if ( obusInSmpMode(rxBusTgt) ) - { - calloutBusInterface( i_chip, i_sc, MRU_LOW ); - i_sc.service_data->setServiceCall(); - } - else - { - PRDF_ERR( "__handleLaneRepairEvent: Lane repair only supported " - "in SMP mode obus: 0x%08x", getHuid(rxBusTgt) ); - i_sc.service_data->SetCallout( LEVEL2_SUPPORT, MRU_MED, NO_GARD ); - i_sc.service_data->SetCallout( SP_CODE, MRU_MED, NO_GARD ); - i_sc.service_data->setServiceCall(); - } - return SUCCESS; -} - - int32_t handleLaneRepairEvent( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & i_sc, bool i_spareDeployed ) @@ -350,10 +328,6 @@ int32_t handleLaneRepairEvent( ExtensibleChip * i_chip, TYPE trgtType = getTargetType(i_chip->getTrgt()); switch (trgtType) { - case TYPE_OBUS: - rc = __handleLaneRepairEvent<TYPE_OBUS,TYPE_OBUS>( i_chip, i_sc, - i_spareDeployed ); - break; case TYPE_XBUS: rc = __handleLaneRepairEvent<TYPE_XBUS,TYPE_XBUS>( i_chip, i_sc, i_spareDeployed ); @@ -729,6 +703,8 @@ void obus_clearMaskFail( errlHndl_t &io_errl, TargetHandle_t &i_rxTrgt, PRDF_ASSERT( NULL != i_txTrgt ); PRDF_ASSERT( NULL != io_errl ); +#ifdef __HOSTBOOT_MODULE // register writes not allowed on FSP + uint32_t l_rc = SUCCESS; ExtensibleChip *l_rxChip = (ExtensibleChip *)systemPtr->GetChip( i_rxTrgt ); @@ -790,6 +766,8 @@ void obus_clearMaskFail( errlHndl_t &io_errl, TargetHandle_t &i_rxTrgt, } while (0); +#endif // __HOSTBOOT_MODULE + } // end obus_clearMaskFail @@ -924,7 +902,7 @@ PRDF_PLUGIN_DEFINE_NS( cumulus_proc, LaneRepair, captureSmpObus3 ); PRDF_PLUGIN_DEFINE_NS( nimbus_proc, LaneRepair, captureSmpObus3 ); PRDF_PLUGIN_DEFINE_NS( axone_proc, LaneRepair, captureSmpObus3 ); -int32_t calloutBusInterface( ExtensibleChip * i_chip, +int32_t calloutBusInterface( TargetHandle_t i_rxTrgt, STEP_CODE_DATA_STRUCT & i_sc, PRDpriority i_priority ) { @@ -934,10 +912,9 @@ int32_t calloutBusInterface( ExtensibleChip * i_chip, do { // Get both endpoints - TargetHandle_t rxTrgt = i_chip->getTrgt(); - TYPE rxType = getTargetType(rxTrgt); + TYPE rxType = getTargetType(i_rxTrgt); - if ( rxType == TYPE_OBUS && !obusInSmpMode( rxTrgt ) ) + if ( rxType == TYPE_OBUS && !obusInSmpMode( i_rxTrgt ) ) { // There is no support in hostboot for calling out the other end of // an NV or openCAPI bus. By design, any FIR bits associated with @@ -945,7 +922,7 @@ int32_t calloutBusInterface( ExtensibleChip * i_chip, // action. So if we hit this case, just make a default callout. PRDF_ERR( PRDF_FUNC "Lane repair only supported in SMP mode " - "obus: 0x%08x", getHuid(rxTrgt) ); + "obus: 0x%08x", getHuid(i_rxTrgt) ); i_sc.service_data->SetCallout( LEVEL2_SUPPORT, MRU_MED, NO_GARD ); i_sc.service_data->SetCallout( SP_CODE, MRU_MED, NO_GARD ); @@ -953,11 +930,11 @@ int32_t calloutBusInterface( ExtensibleChip * i_chip, break; } - TargetHandle_t txTrgt = getTxBusEndPt(rxTrgt); + TargetHandle_t txTrgt = getTxBusEndPt(i_rxTrgt); TYPE txType = getTargetType(txTrgt); // Add the endpoint target callouts - i_sc.service_data->SetCallout( rxTrgt, MRU_MEDA ); + i_sc.service_data->SetCallout( i_rxTrgt, MRU_MEDA ); i_sc.service_data->SetCallout( txTrgt, MRU_MEDA); // Get the HWAS bus type. @@ -975,6 +952,11 @@ int32_t calloutBusInterface( ExtensibleChip * i_chip, { hwasType = HWAS::DMI_BUS_TYPE; } + else if ( (TYPE_OMI == rxType && TYPE_OCMB_CHIP == txType) || + (TYPE_OCMB_CHIP == rxType && TYPE_OMI == txType) ) + { + hwasType = HWAS::OMI_BUS_TYPE; + } else { PRDF_ASSERT( false ); @@ -990,7 +972,7 @@ int32_t calloutBusInterface( ExtensibleChip * i_chip, } // Callout this bus interface. - PRDF_ADD_BUS_CALLOUT( errl, rxTrgt, txTrgt, hwasType, i_priority ); + PRDF_ADD_BUS_CALLOUT( errl, i_rxTrgt, txTrgt, hwasType, i_priority ); } while(0); @@ -1020,9 +1002,6 @@ int32_t spareDeployed( ExtensibleChip * i_chip, PRDF_PLUGIN_DEFINE_NS( nimbus_xbus, LaneRepair, spareDeployed ); PRDF_PLUGIN_DEFINE_NS( cumulus_xbus, LaneRepair, spareDeployed ); PRDF_PLUGIN_DEFINE_NS( axone_xbus, LaneRepair, spareDeployed ); -PRDF_PLUGIN_DEFINE_NS( nimbus_obus, LaneRepair, spareDeployed ); -PRDF_PLUGIN_DEFINE_NS( cumulus_obus, LaneRepair, spareDeployed ); -PRDF_PLUGIN_DEFINE_NS( axone_obus, LaneRepair, spareDeployed ); PRDF_PLUGIN_DEFINE_NS( centaur_membuf, LaneRepair, spareDeployed ); /** @@ -1042,9 +1021,6 @@ int32_t maxSparesExceeded( ExtensibleChip * i_chip, PRDF_PLUGIN_DEFINE_NS( nimbus_xbus, LaneRepair, maxSparesExceeded ); PRDF_PLUGIN_DEFINE_NS( cumulus_xbus, LaneRepair, maxSparesExceeded ); PRDF_PLUGIN_DEFINE_NS( axone_xbus, LaneRepair, maxSparesExceeded ); -PRDF_PLUGIN_DEFINE_NS( nimbus_obus, LaneRepair, maxSparesExceeded ); -PRDF_PLUGIN_DEFINE_NS( cumulus_obus, LaneRepair, maxSparesExceeded ); -PRDF_PLUGIN_DEFINE_NS( axone_obus, LaneRepair, maxSparesExceeded ); PRDF_PLUGIN_DEFINE_NS( centaur_membuf, LaneRepair, maxSparesExceeded ); /** @@ -1064,9 +1040,6 @@ int32_t tooManyBusErrors( ExtensibleChip * i_chip, PRDF_PLUGIN_DEFINE_NS( nimbus_xbus, LaneRepair, tooManyBusErrors ); PRDF_PLUGIN_DEFINE_NS( cumulus_xbus, LaneRepair, tooManyBusErrors ); PRDF_PLUGIN_DEFINE_NS( axone_xbus, LaneRepair, tooManyBusErrors ); -PRDF_PLUGIN_DEFINE_NS( nimbus_obus, LaneRepair, tooManyBusErrors ); -PRDF_PLUGIN_DEFINE_NS( cumulus_obus, LaneRepair, tooManyBusErrors ); -PRDF_PLUGIN_DEFINE_NS( axone_obus, LaneRepair, tooManyBusErrors ); PRDF_PLUGIN_DEFINE_NS( centaur_membuf, LaneRepair, tooManyBusErrors ); /** @@ -1078,18 +1051,53 @@ PRDF_PLUGIN_DEFINE_NS( centaur_membuf, LaneRepair, tooManyBusErrors ); int32_t calloutBusInterfacePlugin( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc ) { - calloutBusInterface(i_chip, io_sc, MRU_LOW); + calloutBusInterface(i_chip->getTrgt(), io_sc, MRU_LOW); return SUCCESS; } PRDF_PLUGIN_DEFINE_NS( nimbus_xbus, LaneRepair, calloutBusInterfacePlugin ); PRDF_PLUGIN_DEFINE_NS( cumulus_xbus, LaneRepair, calloutBusInterfacePlugin ); PRDF_PLUGIN_DEFINE_NS( axone_xbus, LaneRepair, calloutBusInterfacePlugin ); -PRDF_PLUGIN_DEFINE_NS( nimbus_obus, LaneRepair, calloutBusInterfacePlugin ); -PRDF_PLUGIN_DEFINE_NS( cumulus_obus, LaneRepair, calloutBusInterfacePlugin ); -PRDF_PLUGIN_DEFINE_NS( axone_obus, LaneRepair, calloutBusInterfacePlugin ); +PRDF_PLUGIN_DEFINE_NS( explorer_ocmb, LaneRepair, calloutBusInterfacePlugin ); PRDF_PLUGIN_DEFINE_NS( cumulus_dmi, LaneRepair, calloutBusInterfacePlugin ); PRDF_PLUGIN_DEFINE_NS( centaur_membuf, LaneRepair, calloutBusInterfacePlugin ); +/** + * @brief Add callouts for a BUS interface inputting an OMIC or MCC target + * @param i_chip OMIC/MCC chip + * @param io_sc Step code data struct. + * @param i_pos The position of the OMI relative to the OMIC/MCC. + * @return SUCCESS always + */ + +int32_t omiParentCalloutBusInterfacePlugin( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc, + uint8_t i_pos ) +{ + TargetHandle_t omi = getConnectedChild(i_chip->getTrgt(), TYPE_OMI, i_pos); + TargetHandle_t ocmb = getConnectedChild( omi, TYPE_OCMB_CHIP, 0 ); + + // Callout both ends of the bus as well (OMI and OCMB) + io_sc.service_data->SetCallout( omi, MRU_MEDA ); + io_sc.service_data->SetCallout( ocmb, MRU_MEDA ); + + calloutBusInterface(omi, io_sc, MRU_LOW); + return SUCCESS; +} + +#define OMI_PARENT_CALL_BUS_PLUGIN( POS ) \ +int32_t omiParentCalloutBusInterfacePlugin_##POS( ExtensibleChip * i_chip, \ + STEP_CODE_DATA_STRUCT & io_sc ) \ +{ \ + return omiParentCalloutBusInterfacePlugin( i_chip, io_sc, POS ); \ +} \ +PRDF_PLUGIN_DEFINE_NS( axone_omic, LaneRepair, \ + omiParentCalloutBusInterfacePlugin_##POS );\ +PRDF_PLUGIN_DEFINE_NS( axone_mcc, LaneRepair, \ + omiParentCalloutBusInterfacePlugin_##POS ); + +OMI_PARENT_CALL_BUS_PLUGIN( 0 ); +OMI_PARENT_CALL_BUS_PLUGIN( 1 ); +OMI_PARENT_CALL_BUS_PLUGIN( 2 ); //------------------------------------------------------------------------------ diff --git a/src/usr/diag/prdf/common/plat/p9/prdfLaneRepair.H b/src/usr/diag/prdf/common/plat/p9/prdfLaneRepair.H index afc834e29..3f5a3f33c 100644 --- a/src/usr/diag/prdf/common/plat/p9/prdfLaneRepair.H +++ b/src/usr/diag/prdf/common/plat/p9/prdfLaneRepair.H @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2017 */ +/* Contributors Listed Below - COPYRIGHT 2017,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -56,12 +56,12 @@ int32_t handleLaneRepairEvent (ExtensibleChip * i_chip, /** * @brief Will add target bus interface endpoints and all parts in between the * endpoints to the global error log in RasServices. - * @param i_chip RX-side chip of bus interface - * @param i_sc The step code data struct. + * @param i_rxTrgt RX-side target of bus interface + * @param i_sc The step code data struct. * @param i_priority Callout priority (default MRU_LOW). * @return Non-SUCCESS if an internal function fails. SUCCESS otherwise. */ -int32_t calloutBusInterface( ExtensibleChip * i_chip, +int32_t calloutBusInterface( TARGETING::TargetHandle_t i_rxTrgt, STEP_CODE_DATA_STRUCT & i_sc, PRDpriority i_priority = MRU_LOW ); diff --git a/src/usr/diag/prdf/common/plat/p9/prdfP9Configurator.C b/src/usr/diag/prdf/common/plat/p9/prdfP9Configurator.C index e37cffcd3..7c3033dc2 100755 --- a/src/usr/diag/prdf/common/plat/p9/prdfP9Configurator.C +++ b/src/usr/diag/prdf/common/plat/p9/prdfP9Configurator.C @@ -243,7 +243,7 @@ errlHndl_t PlatConfigurator::addDomainChips( TARGETING::TYPE i_type, { errlHndl_t errl = nullptr; - std::map<TARGETING::MODEL, std::map<TARGETING::TYPE, const char *>> fnMap = + std::map<uint32_t, std::map<TARGETING::TYPE, const char *>> fnMap = { { MODEL_NIMBUS, { { TYPE_PROC, nimbus_proc }, { TYPE_EQ, nimbus_eq }, @@ -285,7 +285,14 @@ errlHndl_t PlatConfigurator::addDomainChips( TARGETING::TYPE i_type, { TYPE_MI, axone_mi }, { TYPE_MCC, axone_mcc }, { TYPE_OMIC, axone_omic }, } }, - { MODEL_EXPLORER, { { TYPE_OCMB_CHIP, explorer_ocmb }, } }, + #ifdef __HOSTBOOT_MODULE + { POWER_CHIPID::EXPLORER_16, { { TYPE_OCMB_CHIP, explorer_ocmb }, } }, + #endif + // OCMB is not supported on FSP, however we need support here for the + // MODEL_OCMB model for our simulator to work. + #ifdef ESW_SIM_COMPILE + { MODEL_OCMB, { { TYPE_OCMB_CHIP, explorer_ocmb }, } }, + #endif }; // Get references to factory objects. @@ -299,7 +306,19 @@ errlHndl_t PlatConfigurator::addDomainChips( TARGETING::TYPE i_type, // Iterate all the targets for this type and add to given domain. for ( const auto & trgt : getFunctionalTargetList(i_type) ) { - TARGETING::MODEL model = getChipModel( trgt ); + uint32_t model = getChipModel( trgt ); + + #ifdef __HOSTBOOT_MODULE + // Special case for OCMBs (hostboot only issue for P9). + if ( MODEL_OCMB == model ) + { + // Use the chip ID instead of model. + model = getChipId( trgt ); + + // Skip Gemini OCMBs. They can exist, but PRD won't support them. + if ( POWER_CHIPID::GEMINI_16 == model ) continue; + } + #endif // Ensure this model is supported. if ( fnMap.end() == fnMap.find(model) ) @@ -350,8 +369,6 @@ errlHndl_t PlatConfigurator::addDomainChips( TARGETING::TYPE i_type, scanFac, resFac ); break; - // TODO RTC 199020 - add the pll domains for axone - default: ; } } diff --git a/src/usr/diag/prdf/common/plat/p9/prdfP9Obus.C b/src/usr/diag/prdf/common/plat/p9/prdfP9Obus.C new file mode 100644 index 000000000..6117c6edc --- /dev/null +++ b/src/usr/diag/prdf/common/plat/p9/prdfP9Obus.C @@ -0,0 +1,193 @@ +/* IBM_PROLOG_BEGIN_TAG */ +/* This is an automatically generated prolog. */ +/* */ +/* $Source: src/usr/diag/prdf/common/plat/p9/prdfP9Obus.C $ */ +/* */ +/* OpenPOWER HostBoot Project */ +/* */ +/* Contributors Listed Below - COPYRIGHT 2019 */ +/* [+] International Business Machines Corp. */ +/* */ +/* */ +/* Licensed under the Apache License, Version 2.0 (the "License"); */ +/* you may not use this file except in compliance with the License. */ +/* You may obtain a copy of the License at */ +/* */ +/* http://www.apache.org/licenses/LICENSE-2.0 */ +/* */ +/* Unless required by applicable law or agreed to in writing, software */ +/* distributed under the License is distributed on an "AS IS" BASIS, */ +/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */ +/* implied. See the License for the specific language governing */ +/* permissions and limitations under the License. */ +/* */ +/* IBM_PROLOG_END_TAG */ + + +// Framework includes +#include <iipServiceDataCollector.h> +#include <prdfExtensibleChip.H> +#include <prdfPluginMap.H> + +// Platform includes +#include <prdfPlatServices.H> + +using namespace TARGETING; + +namespace PRDF +{ + +using namespace PlatServices; + +namespace obus +{ + +//############################################################################## +// +// IOOLFIR +// +//############################################################################## + +/** + * @brief If OBUS is in SMP mode, does defaultMaskedError actions and returns + * SUCCESS. Otherwise, returns PRD_SCAN_COMM_REGISTER_ZERO. + */ +int32_t smp_masked( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc ) +{ + if ( obusInSmpMode(i_chip->getTrgt()) ) + { + // SMP mode: This attention should be masked. + io_sc.service_data->SetCallout( LEVEL2_SUPPORT, MRU_MED, NO_GARD ); + io_sc.service_data->setFlag( ServiceDataCollector::AT_THRESHOLD ); + io_sc.service_data->setFlag( ServiceDataCollector::SERVICE_CALL ); + return SUCCESS; + } + else + { + // Non-SMP mode: Try some other action. + return PRD_SCAN_COMM_REGISTER_ZERO; + } +} +PRDF_PLUGIN_DEFINE_NS( nimbus_obus, obus, smp_masked ); +PRDF_PLUGIN_DEFINE_NS( cumulus_obus, obus, smp_masked ); +PRDF_PLUGIN_DEFINE_NS( axone_obus, obus, smp_masked ); + +//------------------------------------------------------------------------------ + +/** + * @brief If OBUS is NOT in SMP mode, does defaultMaskedError actions and + * returns SUCCESS. Otherwise, returns PRD_SCAN_COMM_REGISTER_ZERO. + */ +int32_t non_smp_masked( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc ) +{ + if ( obusInSmpMode(i_chip->getTrgt()) ) + { + // SMP mode: Try some other action. + return PRD_SCAN_COMM_REGISTER_ZERO; + } + else + { + // Non-SMP mode: This attention should be masked. + io_sc.service_data->SetCallout( LEVEL2_SUPPORT, MRU_MED, NO_GARD ); + io_sc.service_data->setFlag( ServiceDataCollector::AT_THRESHOLD ); + io_sc.service_data->setFlag( ServiceDataCollector::SERVICE_CALL ); + return SUCCESS; + } +} +PRDF_PLUGIN_DEFINE_NS( nimbus_obus, obus, non_smp_masked ); +PRDF_PLUGIN_DEFINE_NS( cumulus_obus, obus, non_smp_masked ); +PRDF_PLUGIN_DEFINE_NS( axone_obus, obus, non_smp_masked ); + +//------------------------------------------------------------------------------ + +/** + * @brief If OBUS is NOT in SMP mode, calls out this bus on first occurrence and + * returns SUCCESS. Otherwise, returns PRD_SCAN_COMM_REGISTER_ZERO. + */ +int32_t non_smp_callout_bus_th_1( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + if ( obusInSmpMode(i_chip->getTrgt()) ) + { + // SMP mode: Try some other action. + return PRD_SCAN_COMM_REGISTER_ZERO; + } + else + { + // Non-SMP mode: Callout this bus. Note that Hostboot does not know what + // is on the other side of this bus and does not have any control over + // garding/deconfiguring. Therefore, we cannot gard since we will never + // know if the other side of the bus has been replaced. Also, there is + // a small probability that the fault could be between the two + // endpoints. Usually, we would do a procedure callout or call some HWP + // that would take care of the "everything in between" scenario. + // However, there is no existing mechanism. For now callout level 2 + // support at low priority. + io_sc.service_data->SetCallout( i_chip->getTrgt(), MRU_MED, NO_GARD ); + io_sc.service_data->SetCallout( LEVEL2_SUPPORT, MRU_LOW, NO_GARD ); + io_sc.service_data->setFlag( ServiceDataCollector::AT_THRESHOLD ); + io_sc.service_data->setFlag( ServiceDataCollector::SERVICE_CALL ); + return SUCCESS; + } +} +PRDF_PLUGIN_DEFINE_NS( nimbus_obus, obus, non_smp_callout_bus_th_1 ); +PRDF_PLUGIN_DEFINE_NS( cumulus_obus, obus, non_smp_callout_bus_th_1 ); +PRDF_PLUGIN_DEFINE_NS( axone_obus, obus, non_smp_callout_bus_th_1 ); + +//------------------------------------------------------------------------------ + +/** + * @brief If OBUS is NOT in SMP mode, calls out level 2 support on first + * occurrence and returns SUCCESS. Otherwise, returns + * PRD_SCAN_COMM_REGISTER_ZERO. + */ +int32_t non_smp_callout_lvl2_th_1( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + if ( obusInSmpMode(i_chip->getTrgt()) ) + { + // SMP mode: Try some other action. + return PRD_SCAN_COMM_REGISTER_ZERO; + } + else + { + // Non-SMP mode: Callout this bus on first occurrence. + io_sc.service_data->SetCallout( LEVEL2_SUPPORT, MRU_MED, NO_GARD ); + io_sc.service_data->setFlag( ServiceDataCollector::AT_THRESHOLD ); + io_sc.service_data->setFlag( ServiceDataCollector::SERVICE_CALL ); + return SUCCESS; + } +} +PRDF_PLUGIN_DEFINE_NS( nimbus_obus, obus, non_smp_callout_lvl2_th_1 ); +PRDF_PLUGIN_DEFINE_NS( cumulus_obus, obus, non_smp_callout_lvl2_th_1 ); +PRDF_PLUGIN_DEFINE_NS( axone_obus, obus, non_smp_callout_lvl2_th_1 ); + +//------------------------------------------------------------------------------ + +/** + * @brief If OBUS is NOT in SMP mode, calls out this OBUS target and returns + * SUCCESS. Otherwise, returns PRD_SCAN_COMM_REGISTER_ZERO. + */ +int32_t non_smp_callout_self( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + if ( obusInSmpMode(i_chip->getTrgt()) ) + { + // SMP mode: Try some other action. + return PRD_SCAN_COMM_REGISTER_ZERO; + } + else + { + // Non-SMP mode: Callout this OBUS target. + io_sc.service_data->SetCallout( i_chip->getTrgt() ); + return SUCCESS; + } +} +PRDF_PLUGIN_DEFINE_NS( nimbus_obus, obus, non_smp_callout_self ); +PRDF_PLUGIN_DEFINE_NS( cumulus_obus, obus, non_smp_callout_self ); +PRDF_PLUGIN_DEFINE_NS( axone_obus, obus, non_smp_callout_self ); + +} // end namespace obus + +} // end namespace PRDF + diff --git a/src/usr/diag/prdf/common/plat/p9/prdfP9OcmbChipDomain.C b/src/usr/diag/prdf/common/plat/p9/prdfP9OcmbChipDomain.C new file mode 100644 index 000000000..2f6c25646 --- /dev/null +++ b/src/usr/diag/prdf/common/plat/p9/prdfP9OcmbChipDomain.C @@ -0,0 +1,78 @@ +/* IBM_PROLOG_BEGIN_TAG */ +/* This is an automatically generated prolog. */ +/* */ +/* $Source: src/usr/diag/prdf/common/plat/p9/prdfP9OcmbChipDomain.C $ */ +/* */ +/* OpenPOWER HostBoot Project */ +/* */ +/* Contributors Listed Below - COPYRIGHT 2019 */ +/* [+] International Business Machines Corp. */ +/* */ +/* */ +/* Licensed under the Apache License, Version 2.0 (the "License"); */ +/* you may not use this file except in compliance with the License. */ +/* You may obtain a copy of the License at */ +/* */ +/* http://www.apache.org/licenses/LICENSE-2.0 */ +/* */ +/* Unless required by applicable law or agreed to in writing, software */ +/* distributed under the License is distributed on an "AS IS" BASIS, */ +/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */ +/* implied. See the License for the specific language governing */ +/* permissions and limitations under the License. */ +/* */ +/* IBM_PROLOG_END_TAG */ + +/** + * @file prdfP9OcmbChipDomain.C + * @brief chip Plug-in code for OCMB domain + */ + +#include <prdfP9OcmbChipDomain.H> + +// Framework includes +#include <prdfExtensibleChip.H> +#include <prdfPlatServices.H> +#include <prdfTrace.H> +#include <prdfOcmbDataBundle.H> + +using namespace TARGETING; + +namespace PRDF +{ + +using namespace PlatServices; + +#ifdef __HOSTBOOT_RUNTIME +void OcmbChipDomain::handleRrFo() +{ + #define PRDF_FUNC "[OcmbChipDomain::handleRrFo] " + + do + { + uint32_t domainSize = GetSize(); + // Iterate all OCMBs in the domain. + for ( uint32_t i = 0; i < domainSize; ++i ) + { + RuleChip * ocmbChip = LookUp(i); + + // Start background scrub if required. + OcmbDataBundle * ocmbdb = getOcmbDataBundle( ocmbChip ); + int32_t l_rc = ocmbdb->getTdCtlr()->handleRrFo(); + if ( SUCCESS != l_rc ) + { + // Let us not fail here. If problem is contained within an OCMB + // we will discover it again during normal TD procedures. + PRDF_ERR( PRDF_FUNC "handleRrFo() failed: OCMB=0x%08x", + ocmbChip->GetId() ); + continue; // Keep going. + } + } + + } while (0); + + #undef PRDF_FUNC +} +#endif + +} // end namespace PRDF diff --git a/src/usr/diag/prdf/common/plat/p9/prdfP9OcmbChipDomain.H b/src/usr/diag/prdf/common/plat/p9/prdfP9OcmbChipDomain.H index 5546d9453..9f5776cac 100644 --- a/src/usr/diag/prdf/common/plat/p9/prdfP9OcmbChipDomain.H +++ b/src/usr/diag/prdf/common/plat/p9/prdfP9OcmbChipDomain.H @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2018 */ +/* Contributors Listed Below - COPYRIGHT 2018,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -54,6 +54,16 @@ class OcmbChipDomain : public RuleChipDomain virtual bool Query( ATTENTION_TYPE i_attnType ) { return false; } + #ifdef __HOSTBOOT_RUNTIME + + /** + * @brief Starts memory background scrubbing or VCM procedure for OCMB + * during R/R and F/O if required. + */ + void handleRrFo(); + + #endif + }; } // end namespace PRDF diff --git a/src/usr/diag/prdf/common/plat/p9/prdf_plat_p9.mk b/src/usr/diag/prdf/common/plat/p9/prdf_plat_p9.mk index cb69cad14..64092650f 100644 --- a/src/usr/diag/prdf/common/plat/p9/prdf_plat_p9.mk +++ b/src/usr/diag/prdf/common/plat/p9/prdf_plat_p9.mk @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2016,2018 +# Contributors Listed Below - COPYRIGHT 2016,2019 # [+] International Business Machines Corp. # # @@ -56,4 +56,5 @@ prd_rule_plugin += prdfP9Eq.o prd_rule_plugin += prdfP9TodPlugins.o prd_rule_plugin += prdfP9Dmi_common.o prd_rule_plugin += prdfP9Mc_common.o +prd_rule_plugin += prdfP9Obus.o diff --git a/src/usr/diag/prdf/common/plat/prdfPlatServices_common.C b/src/usr/diag/prdf/common/plat/prdfPlatServices_common.C index f99427d61..5cabaedc8 100644 --- a/src/usr/diag/prdf/common/plat/prdfPlatServices_common.C +++ b/src/usr/diag/prdf/common/plat/prdfPlatServices_common.C @@ -48,7 +48,6 @@ #include <p9_io_xbus_pdwn_lanes.H> #include <p9_io_xbus_clear_firs.H> #include <p9_io_erepairAccessorHwpFuncs.H> -#include <config.h> #include <p9_io_cen_read_erepair.H> #include <p9_io_cen_pdwn_lanes.H> #include <p9_io_dmi_read_erepair.H> @@ -695,6 +694,10 @@ uint32_t getBadDqBitmap( TargetHandle_t i_trgt, const MemRank & i_rank, o_rc = __getBadDqBitmap<fapi2::TARGET_TYPE_MEM_PORT>( i_trgt, i_rank, o_bitmap ); break; + case TYPE_OCMB_CHIP: + o_rc = __getBadDqBitmap<fapi2::TARGET_TYPE_OCMB_CHIP>( i_trgt, + i_rank, o_bitmap ); + break; default: PRDF_ERR( PRDF_FUNC "Invalid trgt type" ); o_rc = FAIL; @@ -777,6 +780,10 @@ uint32_t setBadDqBitmap( TargetHandle_t i_trgt, const MemRank & i_rank, o_rc = __setBadDqBitmap<fapi2::TARGET_TYPE_MEM_PORT>( i_trgt, i_rank, i_bitmap ); break; + case TYPE_OCMB_CHIP: + o_rc = __setBadDqBitmap<fapi2::TARGET_TYPE_OCMB_CHIP>( i_trgt, + i_rank, i_bitmap ); + break; default: PRDF_ERR( PRDF_FUNC "Invalid trgt type" ); o_rc = FAIL; @@ -872,6 +879,17 @@ void getDimmDqAttr<TYPE_MEM_PORT>( TargetHandle_t i_target, } // end function getDimmDqAttr template<> +void getDimmDqAttr<TYPE_OCMB_CHIP>( TargetHandle_t i_target, + uint8_t (&o_dqMapPtr)[DQS_PER_DIMM] ) +{ + PRDF_ASSERT( TYPE_OCMB_CHIP == getTargetType(i_target) ); + + // TODO RTC 210072 - Support for multiple ports per OCMB + TargetHandle_t memPort = getConnectedChild( i_target, TYPE_MEM_PORT, 0 ); + getDimmDqAttr<TYPE_MEM_PORT>( memPort, o_dqMapPtr ); +} + +template<> void getDimmDqAttr<TYPE_DIMM>( TargetHandle_t i_target, uint8_t (&o_dqMapPtr)[DQS_PER_DIMM] ) { @@ -947,15 +965,15 @@ int32_t mssGetSteerMux<TYPE_MBA>( TargetHandle_t i_mba, const MemRank & i_rank, } template<> -int32_t mssGetSteerMux<TYPE_MEM_PORT>( TargetHandle_t i_memPort, - const MemRank & i_rank, - MemSymbol & o_port0Spare, - MemSymbol & o_port1Spare, - MemSymbol & o_eccSpare ) +int32_t mssGetSteerMux<TYPE_OCMB_CHIP>( TargetHandle_t i_ocmb, + const MemRank & i_rank, + MemSymbol & o_port0Spare, + MemSymbol & o_port1Spare, + MemSymbol & o_eccSpare ) { int32_t o_rc = SUCCESS; - /* TODO RTC 207273 - sparing support + /* TODO RTC 199032 - sparing support // called by FSP code so can't just move to hostboot side #ifdef __HOSTBOOT_MODULE @@ -963,7 +981,7 @@ int32_t mssGetSteerMux<TYPE_MEM_PORT>( TargetHandle_t i_memPort, uint8_t port0Spare, port1Spare, eccSpare; - fapi2::Target<fapi2::TARGET_TYPE_MEM_PORT> fapiPort(i_memPort); + fapi2::Target<fapi2::TARGET_TYPE_OCMB_CHIP> fapiPort(i_ocmb); FAPI_INVOKE_HWP( errl, mss_check_steering, fapiPort, i_rank.getMaster(), port0Spare, port1Spare, eccSpare ); @@ -971,15 +989,15 @@ int32_t mssGetSteerMux<TYPE_MEM_PORT>( TargetHandle_t i_memPort, { PRDF_ERR( "[PlatServices::mssGetSteerMux] mss_check_steering() " "failed. HUID: 0x%08x rank: %d", - getHuid(i_memPort), i_rank.getMaster() ); + getHuid(i_ocmb), i_rank.getMaster() ); PRDF_COMMIT_ERRL( errl, ERRL_ACTION_REPORT ); o_rc = FAIL; } else { - o_port0Spare = MemSymbol::fromSymbol( i_memPort, i_rank, port0Spare ); - o_port1Spare = MemSymbol::fromSymbol( i_memPort, i_rank, port1Spare ); - o_eccSpare = MemSymbol::fromSymbol( i_memPort, i_rank, eccSpare ); + o_port0Spare = MemSymbol::fromSymbol( i_ocmb, i_rank, port0Spare ); + o_port1Spare = MemSymbol::fromSymbol( i_ocmb, i_rank, port1Spare ); + o_eccSpare = MemSymbol::fromSymbol( i_ocmb, i_rank, eccSpare ); } #endif */ @@ -1020,20 +1038,22 @@ int32_t mssSetSteerMux<TYPE_MBA>( TargetHandle_t i_mba, const MemRank & i_rank, } template<> -int32_t mssSetSteerMux<TYPE_MEM_PORT>( TargetHandle_t i_memPort, +int32_t mssSetSteerMux<TYPE_OCMB_CHIP>( TargetHandle_t i_memPort, const MemRank & i_rank, const MemSymbol & i_symbol, bool i_x4EccSpare ) { int32_t o_rc = SUCCESS; - /* TODO RTC 207273 - sparing support + /* TODO RTC 199032 - sparing support #ifdef __HOSTBOOT_MODULE errlHndl_t errl = NULL; fapi2::Target<fapi2::TARGET_TYPE_MEM_PORT> fapiPort(i_memPort); + TargetHandle_t dimm = getConnectedDimm( i_memPort, i_rank, + i_symbol.getPortSlct() ); uint8_t l_dramSymbol = PARSERUTILS::dram2Symbol<TYPE_MBA>( i_symbol.getDram(), - isDramWidthX4(i_memPort) ); + isDramWidthX4(dimm) ); FAPI_INVOKE_HWP( errl, mss_do_steering, fapiPort, i_rank.getMaster(), l_dramSymbol, @@ -1105,7 +1125,9 @@ int32_t getDimmSpareConfig<TYPE_MEM_PORT>( TargetHandle_t i_memPort, bool isFullByte = ( ENUM_ATTR_MEM_EFF_DIMM_SPARE_FULL_BYTE == o_spareConfig ); - bool isX4Dram = isDramWidthX4(i_memPort); + + TargetHandle_t dimm = getConnectedDimm( i_memPort, i_rank, i_ps ); + bool isX4Dram = isDramWidthX4(dimm); if ( ( isX4Dram && isFullByte ) || ( !isX4Dram && !isFullByte ) ) { @@ -1122,6 +1144,15 @@ int32_t getDimmSpareConfig<TYPE_MEM_PORT>( TargetHandle_t i_memPort, } template<> +int32_t getDimmSpareConfig<TYPE_OCMB_CHIP>( TargetHandle_t i_ocmb, + MemRank i_rank, uint8_t i_ps, uint8_t & o_spareConfig ) +{ + TargetHandle_t memPort = getConnectedChild( i_ocmb, TYPE_MEM_PORT, i_ps ); + return getDimmSpareConfig<TYPE_MEM_PORT>( memPort, i_rank, i_ps, + o_spareConfig ); +} + +template<> int32_t getDimmSpareConfig<TYPE_MBA>( TargetHandle_t i_mba, MemRank i_rank, uint8_t i_ps, uint8_t & o_spareConfig ) { @@ -1207,7 +1238,8 @@ uint32_t isDramSparingEnabled<TYPE_MEM_PORT>( TARGETING::TargetHandle_t i_trgt, do { - const bool isX4 = isDramWidthX4( i_trgt ); + TargetHandle_t dimm = getConnectedDimm( i_trgt, i_rank, i_ps ); + const bool isX4 = isDramWidthX4( dimm ); if ( isX4 ) { // Always an ECC spare in x4 mode. @@ -1216,9 +1248,7 @@ uint32_t isDramSparingEnabled<TYPE_MEM_PORT>( TARGETING::TargetHandle_t i_trgt, } // Check for any DRAM spares. - // TODO RTC 207273 - no TARGETING support for attr yet - //uint8_t cnfg = TARGETING::MEM_EFF_DIMM_SPARE_NO_SPARE; - uint8_t cnfg = 0; + uint8_t cnfg = TARGETING::MEM_EFF_DIMM_SPARE_NO_SPARE; o_rc = getDimmSpareConfig<TYPE_MEM_PORT>( i_trgt, i_rank, i_ps, cnfg ); if ( SUCCESS != o_rc ) { @@ -1226,9 +1256,7 @@ uint32_t isDramSparingEnabled<TYPE_MEM_PORT>( TARGETING::TargetHandle_t i_trgt, "failed", getHuid(i_trgt), i_rank.getKey(), i_ps ); break; } - // TODO RTC 207273 - no TARGETING support for attr yet - //o_spareEnable = (TARGETING::MEM_EFF_DIMM_SPARE_NO_SPARE; != cnfg); - o_spareEnable = (0 != cnfg); + o_spareEnable = (TARGETING::MEM_EFF_DIMM_SPARE_NO_SPARE != cnfg); }while(0); @@ -1303,12 +1331,22 @@ uint32_t isSpareAvailable( TARGETING::TargetHandle_t i_trgt, MemRank i_rank, if ( !dramSparingEnabled ) break; // Get the current spares in hardware + TargetHandle_t steerTrgt = i_trgt; MemSymbol sp0, sp1, ecc; - o_rc = mssGetSteerMux<T>( i_trgt, i_rank, sp0, sp1, ecc ); + if ( TYPE_MEM_PORT == T ) + { + steerTrgt = getConnectedParent( i_trgt, TYPE_OCMB_CHIP ); + o_rc = mssGetSteerMux<TYPE_OCMB_CHIP>( steerTrgt, i_rank, sp0, sp1, + ecc ); + } + else + { + o_rc = mssGetSteerMux<T>( steerTrgt, i_rank, sp0, sp1, ecc ); + } if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "mssGetSteerMux(0x%08x,0x%02x) failed", - getHuid(i_trgt), i_rank.getKey() ); + getHuid(steerTrgt), i_rank.getKey() ); break; } @@ -1353,6 +1391,10 @@ template uint32_t isSpareAvailable<TYPE_MBA>( TARGETING::TargetHandle_t i_trgt, MemRank i_rank, uint8_t i_ps, bool & o_spAvail, bool & o_eccAvail ); +template +uint32_t isSpareAvailable<TYPE_MEM_PORT>( TARGETING::TargetHandle_t i_trgt, + MemRank i_rank, uint8_t i_ps, bool & o_spAvail, bool & o_eccAvail ); + //------------------------------------------------------------------------------ template<> diff --git a/src/usr/diag/prdf/common/plat/prdfPlatServices_common.H b/src/usr/diag/prdf/common/plat/prdfPlatServices_common.H index 5d41d96e0..203703b42 100755 --- a/src/usr/diag/prdf/common/plat/prdfPlatServices_common.H +++ b/src/usr/diag/prdf/common/plat/prdfPlatServices_common.H @@ -193,7 +193,7 @@ bool obusInSmpMode(TARGETING::TargetHandle_t obusTgt); /** * @brief Reads the bad DQ bitmap attribute for both ports of the target rank. - * @param i_trgt A MCA/MBA/MEM_PORT target. + * @param i_trgt A MCA/MBA/MEM_PORT/OCMB_CHIP target. * @param i_rank Target rank. * @param o_bitmap DQ bitmap container. * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. @@ -203,7 +203,7 @@ uint32_t getBadDqBitmap( TARGETING::TargetHandle_t i_trgt, /** * @brief Writes the bad DQ bitmap attribute for both ports of the target rank. - * @param i_trgt A MCA/MBA/MEM_PORT target. + * @param i_trgt A MCA/MBA/MEM_PORT/OCMB_CHIP target. * @param i_rank Target rank. * @param i_bitmap DQ bitmap container. * @note This is a no-op if DRAM Repairs are disabled in manufacturing. @@ -215,7 +215,7 @@ uint32_t setBadDqBitmap( TARGETING::TargetHandle_t i_trgt, /** * @brief Clears the bad DQ bitmap attribute for all ports of the target rank. - * @param i_trgt A MCA/MBA/MEM_PORT target. + * @param i_trgt A MCA/MBA/MEM_PORT/OCMB_CHIP target. * @param i_rank Target rank. * @note This is a no-op if DRAM Repairs are disabled in manufacturing. * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. diff --git a/src/usr/diag/prdf/common/plat/prdfRasServices_common.C b/src/usr/diag/prdf/common/plat/prdfRasServices_common.C index 3f9ba2322..2742286b3 100755 --- a/src/usr/diag/prdf/common/plat/prdfRasServices_common.C +++ b/src/usr/diag/prdf/common/plat/prdfRasServices_common.C @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2016,2018 */ +/* Contributors Listed Below - COPYRIGHT 2016,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -891,12 +891,21 @@ void ErrDataService::deallocateDimms( const SDC_MRU_LIST & i_mruList ) for ( SDC_MRU_LIST::const_iterator it = i_mruList.begin(); it != i_mruList.end(); ++it ) { + PRDcallout thiscallout = it->callout; if ( PRDcalloutData::TYPE_TARGET == thiscallout.getType() ) { TargetHandle_t calloutTgt = thiscallout.getTarget(); TYPE tgtType = getTargetType( calloutTgt ); + #ifdef CONFIG_NVDIMM + // If the MRU's gard policy is set to NO_GARD, skip it. + if ( NO_GARD == it->gardState && isNVDIMM(calloutTgt) ) + { + continue; + } + #endif + if ( TYPE_L4 == tgtType ) { calloutTgt = getConnectedParent( calloutTgt, TYPE_MEMBUF ); @@ -932,7 +941,17 @@ void ErrDataService::deallocateDimms( const SDC_MRU_LIST & i_mruList ) dimm != dimms.end(); ++dimm ) { if ( TYPE_DIMM == getTargetType(*dimm) ) + { + #ifdef CONFIG_NVDIMM + // If the MRU's gard policy is set to NO_GARD, skip it. + if ( NO_GARD == it->gardState && isNVDIMM(*dimm) ) + { + continue; + } + #endif + dimmList.push_back(*dimm); + } } } } diff --git a/src/usr/diag/prdf/common/plat/prdfTargetServices.C b/src/usr/diag/prdf/common/plat/prdfTargetServices.C index 65f8b9cdc..d34c980ad 100755 --- a/src/usr/diag/prdf/common/plat/prdfTargetServices.C +++ b/src/usr/diag/prdf/common/plat/prdfTargetServices.C @@ -365,6 +365,20 @@ TARGETING::MODEL getChipModel( TARGETING::TargetHandle_t i_trgt ) //------------------------------------------------------------------------------ +#ifdef __HOSTBOOT_MODULE +uint32_t getChipId( TARGETING::TargetHandle_t i_trgt ) +{ + PRDF_ASSERT( NULL != i_trgt ); + + TargetHandle_t parent = getParentChip( i_trgt ); + PRDF_ASSERT( NULL != parent ); + + return parent->getAttr<ATTR_CHIP_ID>(); +} +#endif + +//------------------------------------------------------------------------------ + uint8_t getChipLevel( TARGETING::TargetHandle_t i_trgt ) { PRDF_ASSERT( NULL != i_trgt ); @@ -566,6 +580,7 @@ TargetService::ASSOCIATION_TYPE getAssociationType( TargetHandle_t i_target, { TYPE_MC, TYPE_PROC, TargetService::PARENT_BY_AFFINITY }, { TYPE_MC, TYPE_MI, TargetService::CHILD_BY_AFFINITY }, { TYPE_MC, TYPE_OMIC, TargetService::CHILD_BY_AFFINITY }, + { TYPE_MC, TYPE_MCC, TargetService::CHILD_BY_AFFINITY }, { TYPE_MC, TYPE_DMI, TargetService::CHILD_BY_AFFINITY }, { TYPE_MC, TYPE_DIMM, TargetService::CHILD_BY_AFFINITY }, @@ -579,13 +594,16 @@ TargetService::ASSOCIATION_TYPE getAssociationType( TargetHandle_t i_target, { TYPE_OMIC, TYPE_OMI, TargetService::CHILD_BY_AFFINITY }, { TYPE_MCC, TYPE_PROC, TargetService::PARENT_BY_AFFINITY }, + { TYPE_MCC, TYPE_MC, TargetService::PARENT_BY_AFFINITY }, { TYPE_MCC, TYPE_MI, TargetService::PARENT_BY_AFFINITY }, { TYPE_MCC, TYPE_OMI, TargetService::CHILD_BY_AFFINITY }, + { TYPE_MCC, TYPE_OCMB_CHIP, TargetService::CHILD_BY_AFFINITY }, { TYPE_OMI, TYPE_OMIC, TargetService::PARENT_BY_AFFINITY }, { TYPE_OMI, TYPE_MCC, TargetService::PARENT_BY_AFFINITY }, { TYPE_OMI, TYPE_OCMB_CHIP, TargetService::CHILD_BY_AFFINITY }, + { TYPE_OCMB_CHIP, TYPE_MCC, TargetService::PARENT_BY_AFFINITY }, { TYPE_OCMB_CHIP, TYPE_OMI, TargetService::PARENT_BY_AFFINITY }, { TYPE_OCMB_CHIP, TYPE_MEM_PORT,TargetService::CHILD_BY_AFFINITY }, { TYPE_OCMB_CHIP, TYPE_DIMM, TargetService::CHILD_BY_AFFINITY }, @@ -648,14 +666,30 @@ TargetHandleList getConnAssoc( TargetHandle_t i_target, TYPE i_connType, TargetHandleList o_list; // Default empty list - // Match any class, specified type, and functional. - PredicateCTM predType( CLASS_NA, i_connType ); - PredicateIsFunctional predFunc; - PredicatePostfixExpr predAnd; - predAnd.push(&predType).push(&predFunc).And(); + TYPE trgtType = getTargetType( i_target ); - targetService().getAssociated( o_list, i_target, i_assocType, - TargetService::ALL, &predAnd ); + // OMIC -> OMI and vice versa require special handling. + if ( TYPE_OMIC == trgtType && TYPE_OMI == i_connType ) + { + getChildOmiTargetsByState( o_list, i_target, CLASS_NA, TYPE_OMI, + UTIL_FILTER_FUNCTIONAL ); + } + else if ( TYPE_OMI == trgtType && TYPE_OMIC == i_connType ) + { + getParentOmicTargetsByState( o_list, i_target, CLASS_NA, TYPE_OMIC, + UTIL_FILTER_FUNCTIONAL ); + } + else + { + // Match any class, specified type, and functional. + PredicateCTM predType( CLASS_NA, i_connType ); + PredicateIsFunctional predFunc; + PredicatePostfixExpr predAnd; + predAnd.push(&predType).push(&predFunc).And(); + + targetService().getAssociated( o_list, i_target, i_assocType, + TargetService::ALL, &predAnd ); + } // Sort by target position. std::sort( o_list.begin(), o_list.end(), @@ -866,6 +900,17 @@ TargetHandle_t getConnectedChild( TargetHandle_t i_target, TYPE i_connType, (i_connPos == (miPos % MAX_MI_PER_MC)); } ); } + else if ( TYPE_MC == trgtType && TYPE_MCC == i_connType ) + { + // i_connPos is position relative to MC (0-3) + itr = std::find_if( list.begin(), list.end(), + [&](const TargetHandle_t & t) + { + uint32_t mccPos = getTargetPosition(t); + return (trgtPos == (mccPos / MAX_MCC_PER_MC)) && + (i_connPos == (mccPos % MAX_MCC_PER_MC)); + } ); + } else if ( TYPE_MC == trgtType && TYPE_DMI == i_connType ) { // i_connPos is position relative to MC (0-3) @@ -929,6 +974,17 @@ TargetHandle_t getConnectedChild( TargetHandle_t i_target, TYPE i_connType, (i_connPos == (omiPos % MAX_OMI_PER_MCC)); } ); } + else if ( TYPE_MCC == trgtType && TYPE_OCMB_CHIP == i_connType ) + { + // i_connPos is position relative to MCC (0-1) + itr = std::find_if( list.begin(), list.end(), + [&](const TargetHandle_t & t) + { + uint32_t ocmbPos = getTargetPosition(t); + return (trgtPos == (ocmbPos / MAX_OCMB_PER_MCC)) && + (i_connPos == (ocmbPos % MAX_OCMB_PER_MCC)); + } ); + } else if ( TYPE_MC == trgtType && TYPE_OMIC == i_connType ) { // i_connPos is position relative to MC (0-2) @@ -943,13 +999,17 @@ TargetHandle_t getConnectedChild( TargetHandle_t i_target, TYPE i_connType, else if ( TYPE_OMIC == trgtType && TYPE_OMI == i_connType ) { // i_connPos is position relative to OMIC (0-2) - itr = std::find_if( list.begin(), list.end(), - [&](const TargetHandle_t & t) - { - uint32_t omiPos = getTargetPosition(t); - return (trgtPos == (omiPos / MAX_OMI_PER_OMIC)) && - (i_connPos == (omiPos % MAX_OMI_PER_OMIC)); - } ); + for ( TargetHandleList::iterator trgtIt = list.begin(); + trgtIt != list.end(); trgtIt++ ) + { + uint8_t omiPos = 0; + if ( (*trgtIt)->tryGetAttr<ATTR_OMI_DL_GROUP_POS>(omiPos) && + (i_connPos == omiPos) ) + { + itr = trgtIt; + break; + } + } } else if ( TYPE_PROC == trgtType && TYPE_NPU == i_connType ) { @@ -991,7 +1051,12 @@ ExtensibleChipList getConnected( ExtensibleChip * i_chip, TYPE i_connType ) TargetHandleList list = getConnected( i_chip->getTrgt(), i_connType ); for ( auto & trgt : list ) { - o_list.push_back( (ExtensibleChip *)systemPtr->GetChip(trgt) ); + // Check to make sure that if we have a non-null Target, we also + // get back a non-null ExtensibleChip. + ExtensibleChip * chip = (ExtensibleChip *)systemPtr->GetChip(trgt); + PRDF_ASSERT( nullptr != chip ); + + o_list.push_back( chip ); } return o_list; @@ -1007,7 +1072,12 @@ ExtensibleChip * getConnectedParent( ExtensibleChip * i_child, TargetHandle_t trgt = getConnectedParent( i_child->getTrgt(), i_parentType ); - return (ExtensibleChip *)systemPtr->GetChip( trgt ); + // Check to make sure that if we have a non-null Target, we also + // get back a non-null ExtensibleChip. + ExtensibleChip * chip = (ExtensibleChip *)systemPtr->GetChip( trgt ); + PRDF_ASSERT( nullptr != chip ); + + return chip; } //------------------------------------------------------------------------------ @@ -1026,6 +1096,10 @@ ExtensibleChip * getConnectedChild( ExtensibleChip * i_parent, if ( nullptr != trgt ) { o_child = (ExtensibleChip *)systemPtr->GetChip( trgt ); + + // Check to make sure that if we have a non-null Target, we also + // get back a non-null ExtensibleChip. + PRDF_ASSERT( nullptr != o_child ); } return o_child; @@ -1471,7 +1545,9 @@ bool isDramWidthX4( TargetHandle_t i_trgt ) bool o_dramWidthX4 = false; PRDF_ASSERT( nullptr != i_trgt ); - //uint8_t dramWidths = 0; + uint8_t dramWidths[MAX_DIMM_PER_PORT]; + uint8_t dimmSlct = 0; + TargetHandle_t memPort = nullptr; switch ( getTargetType(i_trgt) ) { @@ -1485,12 +1561,17 @@ bool isDramWidthX4( TargetHandle_t i_trgt ) break; case TYPE_DIMM: - // TODO RTC 207273 - attribute not in TARGETING code yet - //TargetHandle_t memPort = getConnectedParent(i_trgt, TYPE_MEM_PORT); - //dramWidths = memPort->getAttr<ATTR_MEM_EFF_DRAM_WIDTH>(); - //uint8_t dimmSlct = getDimmSlct( i_trgt ); - //o_dramWidthX4 = - // (fapi2::ENUM_ATTR_MEM_EFF_DRAM_WIDTH_X4 == dramWidths[dimmSlct]); + memPort = getConnectedParent(i_trgt, TYPE_MEM_PORT); + if ( !memPort->tryGetAttr<ATTR_MEM_EFF_DRAM_WIDTH>(dramWidths) ) + { + PRDF_ERR( "isDramWidthX4: Unable to access " + "ATTR_MEM_EFF_DRAM_WIDTH i_trgt=0x%08x.", + getHuid(memPort) ); + PRDF_ASSERT( false ); + } + dimmSlct = getDimmSlct( i_trgt ); + o_dramWidthX4 = + (TARGETING::MEM_EFF_DRAM_WIDTH_X4 == dramWidths[dimmSlct]); break; default: @@ -1538,15 +1619,12 @@ void __getMasterRanks( TargetHandle_t i_trgt, std::vector<MemRank> & o_ranks, } else if ( MODEL_AXONE == l_procModel ) { - PRDF_ERR( PRDF_FUNC "Axone attribute not supported yet" ); - /* TODO RTC 207273 - no targeting support for attr yet if ( !i_trgt->tryGetAttr<ATTR_MEM_EFF_DIMM_RANKS_CONFIGED>(info[0]) ) { PRDF_ERR( PRDF_FUNC "tryGetAttr<ATTR_MEM_EFF_DIMM_RANKS_CONFIGED> " "failed: i_trgt=0x%08x", getHuid(i_trgt) ); PRDF_ASSERT( false ); // attribute does not exist for target } - */ } else { @@ -1605,17 +1683,21 @@ void getMasterRanks<TYPE_MBA>( TargetHandle_t i_trgt, } template<> -void getMasterRanks<TYPE_MEM_PORT>( TargetHandle_t i_trgt, - std::vector<MemRank> & o_ranks, - uint8_t i_ds ) -{ - __getMasterRanks<TYPE_MEM_PORT>( i_trgt, o_ranks, 0, i_ds ); +void getMasterRanks<TYPE_OCMB_CHIP>( TargetHandle_t i_trgt, + std::vector<MemRank> & o_ranks, + uint8_t i_ds ) +{ + // TODO RTC 210072 - Explorer only has one port, however, multiple ports + // will be supported in the future. Updates will need to be made here so we + // can get the relevant port. + TargetHandle_t memPort = getConnectedChild( i_trgt, TYPE_MEM_PORT, 0 ); + __getMasterRanks<TYPE_MEM_PORT>( memPort, o_ranks, 0, i_ds ); } //------------------------------------------------------------------------------ template<TARGETING::TYPE T> -void __getSlaveRanks( TargetHandle_t i_trgt, std::vector<MemRank> & o_ranks, +void getSlaveRanks( TargetHandle_t i_trgt, std::vector<MemRank> & o_ranks, uint8_t i_ds ) { PRDF_ASSERT( nullptr != i_trgt ); @@ -1656,29 +1738,18 @@ void __getSlaveRanks( TargetHandle_t i_trgt, std::vector<MemRank> & o_ranks, } } -template<> +template void getSlaveRanks<TYPE_MCA>( TargetHandle_t i_trgt, std::vector<MemRank> & o_ranks, - uint8_t i_ds ) -{ - __getSlaveRanks<TYPE_MCA>( i_trgt, o_ranks, i_ds ); -} - -template<> + uint8_t i_ds ); +template void getSlaveRanks<TYPE_MBA>( TargetHandle_t i_trgt, std::vector<MemRank> & o_ranks, - uint8_t i_ds ) -{ - __getSlaveRanks<TYPE_MBA>( i_trgt, o_ranks, i_ds ); -} - -template<> -void getSlaveRanks<TYPE_MEM_PORT>( TargetHandle_t i_trgt, - std::vector<MemRank> & o_ranks, - uint8_t i_ds ) -{ - __getSlaveRanks<TYPE_MEM_PORT>( i_trgt, o_ranks, i_ds ); -} + uint8_t i_ds ); +template +void getSlaveRanks<TYPE_OCMB_CHIP>( TargetHandle_t i_trgt, + std::vector<MemRank> & o_ranks, + uint8_t i_ds ); //------------------------------------------------------------------------------ @@ -1774,12 +1845,15 @@ uint8_t getNumMasterRanksPerDimm<TYPE_MBA>( TargetHandle_t i_trgt, } template<> -uint8_t getNumMasterRanksPerDimm<TYPE_MEM_PORT>( TargetHandle_t i_trgt, - uint8_t i_ds ) -{ - return __getNumMasterRanksPerDimm<TYPE_MEM_PORT>( i_trgt, 0, i_ds ); +uint8_t getNumMasterRanksPerDimm<TYPE_OCMB_CHIP>( TargetHandle_t i_trgt, + uint8_t i_ds ) +{ + // TODO RTC 210072 - Explorer only has one port, however, multiple ports + // will be supported in the future. Updates will need to be made here so we + // can get the relevant port. + TargetHandle_t memPort = getConnectedChild( i_trgt, TYPE_MEM_PORT, 0 ); + return __getNumMasterRanksPerDimm<TYPE_MEM_PORT>( memPort, 0, i_ds ); } - //------------------------------------------------------------------------------ template<TARGETING::TYPE T> @@ -1822,10 +1896,10 @@ uint8_t __getNumRanksPerDimm( TargetHandle_t i_trgt, } else if ( MODEL_AXONE == l_procModel ) { - ATTR_MEM_EFF_NUM_RANKS_PER_DIMM_type attr; - if ( !i_trgt->tryGetAttr<ATTR_MEM_EFF_NUM_RANKS_PER_DIMM>(attr) ) + ATTR_MEM_EFF_LOGICAL_RANKS_PER_DIMM_type attr; + if ( !i_trgt->tryGetAttr<ATTR_MEM_EFF_LOGICAL_RANKS_PER_DIMM>(attr) ) { - PRDF_ERR( PRDF_FUNC "tryGetAttr<ATTR_MEM_EFF_NUM_RANKS_PER_DIMM> " + PRDF_ERR( PRDF_FUNC "tryGetAttr<ATTR_MEM_EFF_LOGICAL_RANKS_PER_DIMM> " "failed: i_trgt=0x%08x", getHuid(i_trgt) ); PRDF_ASSERT( false ); // attribute does not exist for target } @@ -1869,9 +1943,13 @@ uint8_t getNumRanksPerDimm<TYPE_MBA>( TargetHandle_t i_trgt, uint8_t i_ds ) } template<> -uint8_t getNumRanksPerDimm<TYPE_MEM_PORT>( TargetHandle_t i_trgt, uint8_t i_ds ) +uint8_t getNumRanksPerDimm<TYPE_OCMB_CHIP>(TargetHandle_t i_trgt, uint8_t i_ds) { - return __getNumRanksPerDimm<TYPE_MEM_PORT>( i_trgt, 0, i_ds ); + // TODO RTC 210072 - Explorer only has one port, however, multiple ports + // will be supported in the future. Updates will need to be made here so we + // can get the relevant port. + TargetHandle_t memPort = getConnectedChild( i_trgt, TYPE_MEM_PORT, 0 ); + return __getNumRanksPerDimm<TYPE_MEM_PORT>( memPort, 0, i_ds ); } //############################################################################## diff --git a/src/usr/diag/prdf/common/plat/prdfTargetServices.H b/src/usr/diag/prdf/common/plat/prdfTargetServices.H index 8793e8c61..34af865d7 100755 --- a/src/usr/diag/prdf/common/plat/prdfTargetServices.H +++ b/src/usr/diag/prdf/common/plat/prdfTargetServices.H @@ -42,6 +42,10 @@ #include <targeting/common/target.H> #include <prdfParserEnums.H> +#ifdef __HOSTBOOT_MODULE + #include <chipids.H> +#endif + //------------------------------------------------------------------------------ namespace PRDF @@ -145,6 +149,20 @@ TARGETING::CLASS getTargetClass( TARGETING::TargetHandle_t i_target ); */ TARGETING::MODEL getChipModel( TARGETING::TargetHandle_t i_trgt ); +#ifdef __HOSTBOOT_MODULE + +// NOTE: This should be used instead of getChipModel() because of the case of +// MODEL_OCMB, where we need the chip ID to distinguish between Explorer +// and Gemini. + +/** + * @param i_trgt A chip target or any unit target within the chip. + * @return The chip ID. + */ +uint32_t getChipId( TARGETING::TargetHandle_t i_trgt ); + +#endif + /** * @param i_trgt A chip target or any unit target within the chip. * @return The level (EC level) of a chip. @@ -293,7 +311,7 @@ TARGETING::TargetHandle_t getConnectedPeerTarget( TARGETING::TargetHandle_t i_tgt); /** - * @param i_trgt The target MBA, MCA, or MEM_PORT. + * @param i_trgt The target MBA, MCA, OCMB_CHIP, or MEM_PORT. * @param i_rank The target rank. * @return A list of DIMMs connected to the target and rank. */ @@ -301,10 +319,10 @@ TARGETING::TargetHandleList getConnectedDimms( TARGETING::TargetHandle_t i_trgt, const MemRank & i_rank ); /** - * @param i_trgt The target MBA, MCA, or MEM_PORT. + * @param i_trgt The target MBA, MCA, OCMB_CHIP, or MEM_PORT. * @param i_rank The target rank. - * @param i_port Port select, only needed for MBA. MCA and MEM_PORT are - * targets equivalent to the port already. + * @param i_port Port select, only needed for MBA and OCMB_CHIP. MCA and + * MEM_PORT are targets equivalent to the port already. * @return The DIMM connected to the target and rank on a port. */ TARGETING::TargetHandle_t getConnectedDimm( TARGETING::TargetHandle_t i_trgt, @@ -434,7 +452,7 @@ uint8_t getColNumConfig( TARGETING::TargetHandle_t i_trgt ); /** * @brief Returns a sorted list of configured master ranks for an MCA or MBA. - * @param i_trgt MCA, MBA, or MEM_PORT target. + * @param i_trgt MCA, MBA, or OCMB_CHIP target. * @param o_ranks The returned list. * @param i_ds When used, this function will only return the list of ranks * for the target DIMM select. Otherwise, the default is to @@ -450,7 +468,7 @@ void getMasterRanks( TARGETING::TargetHandle_t i_trgt, /** * @brief Returns a sorted list of configured slave ranks for an MCA or MBA. - * @param i_trgt MCA, MBA, or MEM_PORT target. + * @param i_trgt MCA, MBA, or OCMB_CHIP target. * @param o_ranks The returned list. * @param i_ds When used, this function will only return the list of ranks * for the target DIMM select. Otherwise, the default is to @@ -466,7 +484,7 @@ void getSlaveRanks( TARGETING::TargetHandle_t i_trgt, /** * @brief Obtains the number of master ranks per DIMM select. - * @param i_trgt MCA, MBA, or MEM_PORT target. + * @param i_trgt MCA, MBA, or OCMB_CHIP target. * @param i_ds DIMM select. * @return Total number of master ranks configured per DIMM select. */ @@ -477,7 +495,7 @@ uint8_t getNumMasterRanksPerDimm( TARGETING::TargetHandle_t i_trgt, /** * @brief Obtains the total number of ranks (including slave ranks) per DIMM * select. - * @param i_trgt MCA, MBA, or MEM_PORT target. + * @param i_trgt MCA, MBA, or OCMB_CHIP target. * @param i_ds DIMM select. * @return Total number of ranks configured per DIMM select. */ diff --git a/src/usr/diag/prdf/common/plugins/prdfLogParse_common.C b/src/usr/diag/prdf/common/plugins/prdfLogParse_common.C index c6cd47d0b..08aa11600 100644 --- a/src/usr/diag/prdf/common/plugins/prdfLogParse_common.C +++ b/src/usr/diag/prdf/common/plugins/prdfLogParse_common.C @@ -237,6 +237,18 @@ void getTargetInfo( HUID i_chipId, TARGETING::TYPE & o_targetType, l_node, l_chip, l_chiplet ); break; + case TYPE_OCMB_CHIP: + snprintf( o_chipName, i_sz_chipName, "ocmb(n%dp%d)", + l_node, l_chip ); + break; + + case TYPE_MEM_PORT: + l_chip = l_chip / MAX_PORT_PER_OCMB; + l_chiplet = l_chiplet % MAX_PORT_PER_OCMB; + snprintf( o_chipName, i_sz_chipName, "memport(n%dp%dc%d)", + l_node, l_chip, l_chiplet ); + break; + case TYPE_MCS: l_chip = l_chip / MAX_MCS_PER_PROC; l_chiplet = l_chiplet % MAX_MCS_PER_PROC; @@ -286,6 +298,13 @@ void getTargetInfo( HUID i_chipId, TARGETING::TYPE & o_targetType, l_node, l_chip, l_chiplet ); break; + case TYPE_OMI: + l_chip = l_chip / MAX_OMI_PER_PROC; + l_chiplet = l_chiplet % MAX_OMI_PER_PROC; + snprintf( o_chipName, i_sz_chipName, "omi(n%dp%dc%d)", + l_node, l_chip, l_chiplet ); + break; + case TYPE_MEMBUF: snprintf( o_chipName, i_sz_chipName, "mb(n%dp%d)", l_node, l_chip ); diff --git a/src/usr/diag/prdf/common/plugins/prdfMemLogParse.C b/src/usr/diag/prdf/common/plugins/prdfMemLogParse.C index 390178f6e..1518319d1 100644 --- a/src/usr/diag/prdf/common/plugins/prdfMemLogParse.C +++ b/src/usr/diag/prdf/common/plugins/prdfMemLogParse.C @@ -2848,17 +2848,22 @@ void initMemMruStrings( MemoryMruData::MemMruMeld i_mm, bool & o_addDramSite, memset( o_header, '\0', HEADER_SIZE ); memset( o_data, '\0', DATA_SIZE ); - // Get the position info (default MCA). - const char * compStr = "mca"; + // Get the position info (default MBA). + + const char * compStr = "mba"; uint8_t nodePos = i_mm.s.nodePos; - uint8_t chipPos = i_mm.s.procPos; - uint8_t compPos = i_mm.s.chnlPos; + uint8_t chipPos = (i_mm.s.procPos << 3) | i_mm.s.chnlPos; + uint8_t compPos = i_mm.s.mbaPos; - if ( !i_mm.s.isMca ) // MBA + if ( i_mm.s.isMca ) // MCA + { + compStr = "mca"; + chipPos = i_mm.s.procPos; + compPos = i_mm.s.chnlPos; + } + else if ( i_mm.s.isOcmb ) // OCMB { - compStr = "mba"; - chipPos = (i_mm.s.procPos << 3) | i_mm.s.chnlPos; - compPos = i_mm.s.mbaPos; + compStr = "ocmb"; } // Build the header string. @@ -2953,13 +2958,13 @@ void addDramSiteString( const MemoryMruData::ExtendedData & i_extMemMru, } } } - else // IS DIMMs + else // Dram site locations not supported { // Add DQ info. char tmp[DATA_SIZE] = { '\0' }; strcat( io_data, "DQ:" ); - if ( mm.s.isMca ) // MCA + if ( mm.s.isMca || mm.s.isOcmb ) // MCA, OCMB { // There is only one DQ per symbol. snprintf( tmp, DATA_SIZE, "%d", i_extMemMru.dqMapping[dqIdx] ); diff --git a/src/usr/diag/prdf/common/plugins/prdfMemoryMruData.H b/src/usr/diag/prdf/common/plugins/prdfMemoryMruData.H index a9a4498d3..f2fdaff26 100644 --- a/src/usr/diag/prdf/common/plugins/prdfMemoryMruData.H +++ b/src/usr/diag/prdf/common/plugins/prdfMemoryMruData.H @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2013,2017 */ +/* Contributors Listed Below - COPYRIGHT 2013,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -88,6 +88,10 @@ union MemMruMeld // version field so that the error log parser know which format to // used. + // NOTE: For OCMBs, specified by the isOcmb field, chnlPos will specify + // the MCC position within the proc and mbaPos will specify the + // OMI position within the channel. + #if !( __BYTE_ORDER == __LITTLE_ENDIAN ) uint32_t valid : 1; ///< Used to indicate nothing failed while @@ -106,13 +110,15 @@ union MemMruMeld uint32_t eccSpared : 1; ///< True if symbol is on ECC DRAM uint32_t srank : 3; ///< Slave rank (0-7) // If isMca is specified, then chnlPos above will specify the MCA pos - // and the mbaPos field will be unused + // and the mbaPos field will be unused. See above note for OCMB usage. uint32_t isMca : 1; ///< True if MCA is used as opposed to MBA - uint32_t unused : 3; ///< 3 Bits currently unused + uint32_t isOcmb : 1; ///< True if OCMB is used + uint32_t unused : 2; ///< 2 Bits currently unused #else // Need to reverse this to make the uint32_t look right in the // simulator. - uint32_t unused : 3; + uint32_t unused : 2; + uint32_t isOcmb : 1; uint32_t isMca : 1; uint32_t srank : 3; uint32_t eccSpared : 1; diff --git a/src/usr/diag/prdf/common/plugins/prdfParserEnums.H b/src/usr/diag/prdf/common/plugins/prdfParserEnums.H index af346e57b..1001e185f 100644 --- a/src/usr/diag/prdf/common/plugins/prdfParserEnums.H +++ b/src/usr/diag/prdf/common/plugins/prdfParserEnums.H @@ -109,8 +109,14 @@ enum PositionBounds MAX_OMI_PER_MCC = 2, MAX_OMI_PER_OMIC = 3, + MAX_OMI_PER_MC = 8, + MAX_OMI_PER_PROC = MAX_OMI_PER_MC * MAX_MC_PER_PROC, MAX_OCMB_PER_OMI = 1, + MAX_OCMB_PER_MCC = MAX_OCMB_PER_OMI * MAX_OMI_PER_MCC, + + // TODO RTC 210072 - Support multiple ports + MAX_PORT_PER_OCMB = 1, MAX_SUB_PORT = 2, diff --git a/src/usr/diag/prdf/common/plugins/prdfParserUtils.C b/src/usr/diag/prdf/common/plugins/prdfParserUtils.C index 9d2233e75..2f9bdb458 100644 --- a/src/usr/diag/prdf/common/plugins/prdfParserUtils.C +++ b/src/usr/diag/prdf/common/plugins/prdfParserUtils.C @@ -87,9 +87,9 @@ uint8_t symbol2Dq<TARGETING::TYPE_MCA>( uint8_t i_symbol ) //------------------------------------------------------------------------------ template<> -uint8_t symbol2Dq<TARGETING::TYPE_MEM_PORT>( uint8_t i_symbol ) +uint8_t symbol2Dq<TARGETING::TYPE_OCMB_CHIP>( uint8_t i_symbol ) { - // MEM_PORT case is identical to MCA + // OCMB_CHIP case is identical to MCA return symbol2Dq<TARGETING::TYPE_MCA>(i_symbol); } @@ -122,10 +122,12 @@ uint8_t symbol2PortSlct<TARGETING::TYPE_MCA>( uint8_t i_symbol ) //------------------------------------------------------------------------------ template<> -uint8_t symbol2PortSlct<TARGETING::TYPE_MEM_PORT>( uint8_t i_symbol ) +uint8_t symbol2PortSlct<TARGETING::TYPE_OCMB_CHIP>( uint8_t i_symbol ) { - // Port select does not exist on MEM_PORT. Always return 0 so that code will - // continue to work. + // TODO RTC 210072 - Explorer only has one port, as such we can just + // return 0. However, multiple ports will be supported in the future, + // We'll need to figure out how to convert the symbol to a port select for + // OCMB at that time. return 0; } @@ -149,8 +151,8 @@ uint8_t dq2Symbol<TARGETING::TYPE_MBA>( uint8_t i_dq, uint8_t i_ps ) //------------------------------------------------------------------------------ -template<> -uint8_t dq2Symbol<TARGETING::TYPE_MCA>( uint8_t i_dq, uint8_t i_ps ) +template<TARGETING::TYPE T> +uint8_t dq2Symbol( uint8_t i_dq, uint8_t i_ps ) { uint8_t symbol = SYMBOLS_PER_RANK; @@ -175,14 +177,12 @@ uint8_t dq2Symbol<TARGETING::TYPE_MCA>( uint8_t i_dq, uint8_t i_ps ) return symbol; } -//------------------------------------------------------------------------------ - -template<> -uint8_t dq2Symbol<TARGETING::TYPE_MEM_PORT>( uint8_t i_dq, uint8_t i_ps ) -{ - // MEM_PORT case is identical to MCA - return dq2Symbol<TARGETING::TYPE_MCA>( i_dq, i_ps ); -} +template +uint8_t dq2Symbol<TARGETING::TYPE_MCA>( uint8_t i_dq, uint8_t i_ps ); +template +uint8_t dq2Symbol<TARGETING::TYPE_MEM_PORT>( uint8_t i_dq, uint8_t i_ps ); +template +uint8_t dq2Symbol<TARGETING::TYPE_OCMB_CHIP>( uint8_t i_dq, uint8_t i_ps ); //------------------------------------------------------------------------------ @@ -218,9 +218,9 @@ uint8_t nibble2Symbol<TARGETING::TYPE_MCA>( uint8_t i_x4Dram ) //------------------------------------------------------------------------------ template<> -uint8_t nibble2Symbol<TARGETING::TYPE_MEM_PORT>( uint8_t i_x4Dram ) +uint8_t nibble2Symbol<TARGETING::TYPE_OCMB_CHIP>( uint8_t i_x4Dram ) { - // MEM_PORT case is identical to MCA + // OCMB_CHIP case is identical to MCA return nibble2Symbol<TARGETING::TYPE_MCA>(i_x4Dram); } @@ -258,9 +258,9 @@ uint8_t byte2Symbol<TARGETING::TYPE_MCA>( uint8_t i_x8Dram ) //------------------------------------------------------------------------------ template<> -uint8_t byte2Symbol<TARGETING::TYPE_MEM_PORT>( uint8_t i_x8Dram ) +uint8_t byte2Symbol<TARGETING::TYPE_OCMB_CHIP>( uint8_t i_x8Dram ) { - // MEM_PORT case is identical to MCA + // OCMB_CHIP case is identical to MCA return byte2Symbol<TARGETING::TYPE_MCA>(i_x8Dram); } @@ -286,9 +286,9 @@ uint8_t symbol2Nibble<TARGETING::TYPE_MCA>( uint8_t i_symbol ) //------------------------------------------------------------------------------ template<> -uint8_t symbol2Nibble<TARGETING::TYPE_MEM_PORT>( uint8_t i_symbol ) +uint8_t symbol2Nibble<TARGETING::TYPE_OCMB_CHIP>( uint8_t i_symbol ) { - // MEM_PORT case is identical to MCA + // OCMB_CHIP case is identical to MCA return symbol2Nibble<TARGETING::TYPE_MCA>(i_symbol); } @@ -314,9 +314,9 @@ uint8_t symbol2Byte<TARGETING::TYPE_MCA>( uint8_t i_symbol ) //------------------------------------------------------------------------------ template<> -uint8_t symbol2Byte<TARGETING::TYPE_MEM_PORT>( uint8_t i_symbol ) +uint8_t symbol2Byte<TARGETING::TYPE_OCMB_CHIP>( uint8_t i_symbol ) { - // MEM_PORT case is identical to MCA + // OCMB_CHIP case is identical to MCA return symbol2Byte<TARGETING::TYPE_MCA>(i_symbol); } diff --git a/src/usr/diag/prdf/common/prdfMain_common.C b/src/usr/diag/prdf/common/prdfMain_common.C index fd23cf4d1..09cfe2212 100755 --- a/src/usr/diag/prdf/common/prdfMain_common.C +++ b/src/usr/diag/prdf/common/prdfMain_common.C @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2013,2018 */ +/* Contributors Listed Below - COPYRIGHT 2013,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -47,6 +47,7 @@ #ifdef __HOSTBOOT_RUNTIME #include <prdfP9McbistDomain.H> +#include <prdfP9OcmbChipDomain.H> #include <prdfCenMbaDomain.H> #endif @@ -172,6 +173,10 @@ errlHndl_t noLock_initialize() { ((MbaDomain *)systemPtr->GetDomain(MBA_DOMAIN))->handleRrFo(); } + else if ( MODEL_AXONE == procModel ) + { + ((OcmbChipDomain *)systemPtr->GetDomain(OCMB_DOMAIN))->handleRrFo(); + } else { PRDF_ERR( PRDF_FUNC "Master PROC model %d not supported", procModel ); diff --git a/src/usr/diag/prdf/common/util/iipbits.h b/src/usr/diag/prdf/common/util/iipbits.h deleted file mode 100755 index 7b02e52f3..000000000 --- a/src/usr/diag/prdf/common/util/iipbits.h +++ /dev/null @@ -1,24 +0,0 @@ -/* IBM_PROLOG_BEGIN_TAG */ -/* This is an automatically generated prolog. */ -/* */ -/* $Source: src/usr/diag/prdf/common/util/iipbits.h $ */ -/* */ -/* OpenPOWER HostBoot Project */ -/* */ -/* COPYRIGHT International Business Machines Corp. 1993,2014 */ -/* */ -/* Licensed under the Apache License, Version 2.0 (the "License"); */ -/* you may not use this file except in compliance with the License. */ -/* You may obtain a copy of the License at */ -/* */ -/* http://www.apache.org/licenses/LICENSE-2.0 */ -/* */ -/* Unless required by applicable law or agreed to in writing, software */ -/* distributed under the License is distributed on an "AS IS" BASIS, */ -/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */ -/* implied. See the License for the specific language governing */ -/* permissions and limitations under the License. */ -/* */ -/* IBM_PROLOG_END_TAG */ - -#include<prdfBitString.H> diff --git a/src/usr/diag/prdf/framework/prdfFileRegisterAccess.C b/src/usr/diag/prdf/framework/prdfFileRegisterAccess.C index dfdaabf9c..ed5d3ec0d 100755 --- a/src/usr/diag/prdf/framework/prdfFileRegisterAccess.C +++ b/src/usr/diag/prdf/framework/prdfFileRegisterAccess.C @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2016,2017 */ +/* Contributors Listed Below - COPYRIGHT 2016,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -37,7 +37,7 @@ uint32_t FileScomAccessor::Access( TargetHandle_t i_target, BitString & bs, uint64_t registerId, - MopRegisterAccess::Operation operation) const + RegisterAccess::Operation operation) const { #define PRDF_FUNC "[FileScomAccessor::Access()] " @@ -48,13 +48,13 @@ uint32_t FileScomAccessor::Access( switch (operation) { - case MopRegisterAccess::WRITE: + case RegisterAccess::WRITE: // TODO: RTC 62076 move BitString class to 64-bit data = (((uint64_t)bs.getFieldJustify( 0, 32)) << 32) | ((uint64_t)bs.getFieldJustify(32, 32)); firData.putScom( i_target, registerId, data); break; - case MopRegisterAccess::READ: + case RegisterAccess::READ: firData.getScom( i_target, registerId, data); // TODO: RTC 62076 move BitString class to 64-bit bs.setFieldJustify( 0, 32, data >> 32); diff --git a/src/usr/diag/prdf/framework/prdfFileRegisterAccess.H b/src/usr/diag/prdf/framework/prdfFileRegisterAccess.H index 61d255e8a..84b749a58 100755 --- a/src/usr/diag/prdf/framework/prdfFileRegisterAccess.H +++ b/src/usr/diag/prdf/framework/prdfFileRegisterAccess.H @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2016,2017 */ +/* Contributors Listed Below - COPYRIGHT 2016,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -71,7 +71,7 @@ class FileScomAccessor : public ScomAccessor virtual uint32_t Access(TARGETING::TargetHandle_t i_target, BitString & bs, uint64_t registerId, - MopRegisterAccess::Operation operation) const; + RegisterAccess::Operation operation) const; }; } // End namespace PRDF diff --git a/src/usr/diag/prdf/makefile b/src/usr/diag/prdf/makefile index ed8b7c1ce..8fda714ed 100755 --- a/src/usr/diag/prdf/makefile +++ b/src/usr/diag/prdf/makefile @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2012,2018 +# Contributors Listed Below - COPYRIGHT 2012,2019 # [+] International Business Machines Corp. # # @@ -39,13 +39,16 @@ include prdf_hb_only.mk # Will define PRD_SRC_PATH and PRD_INC_PATH include common/prdf_common_fsp_and_hb.mk include common/framework/prdf_framework.mk include common/plat/p9/prdf_plat_p9.mk +include common/plat/axone/prdf_plat_axone.mk include common/plat/cen/prdf_plat_cen.mk include common/plat/mem/prdf_plat_mem.mk include common/plat/centaur/prdf_plat_centaur.mk include common/plat/cumulus/prdf_plat_cumulus.mk include common/plat/nimbus/prdf_plat_nimbus.mk +include common/plat/explorer/prdf_plat_explorer.mk include plat/cen/prdf_plat_cen_hb_only.mk include plat/mem/prdf_plat_mem_hb_only.mk +include plat/explorer/prdf_plat_explorer_hb_only.mk include plat/p9/prdf_plat_p9_hb_only.mk VPATH += ${prd_vpath} diff --git a/src/usr/diag/prdf/occ_firdata/prdfWriteHomerFirData.C b/src/usr/diag/prdf/occ_firdata/prdfWriteHomerFirData.C index d3b8a72ee..2e9b6f963 100644 --- a/src/usr/diag/prdf/occ_firdata/prdfWriteHomerFirData.C +++ b/src/usr/diag/prdf/occ_firdata/prdfWriteHomerFirData.C @@ -621,28 +621,11 @@ void getAddresses( TrgtMap_t & io_targMap ) 0x07013340, // OMIDLFIR }; - io_targMap[TRGT_OCMB][REG_GLBL] = + io_targMap[TRGT_OMIC][REG_REG] = { - 0x08040000, // MB_CHIPLET_CS_FIR - 0x08040001, // MB_CHIPLET_RE_FIR - 0x08040004, // MB_CHIPLET_SPA_FIR - }; - - io_targMap[TRGT_OCMB][REG_FIR] = - { - 0x0804000a, // MB_LFIR - 0x08010870, // MMIOFIR - 0x08011400, // SRQFIR - 0x08011800, // MCBISTFIR - 0x08011c00, // RDFFIR - 0x08012400, // TLXFIR - 0x08012800, // OMIDLFIR - }; - - io_targMap[TRGT_OCMB][REG_REG] = - { - 0x08040002, // MB_CHIPLET_FIR_MASK - 0x08040007, // MB_CHIPLET_SPA_FIR_MASK + 0x07013353, // DL0_ERROR_HOLD + 0x07013363, // DL1_ERROR_HOLD + 0x07013373, // DL2_ERROR_HOLD }; // EC level handling will be done with a @@ -721,15 +704,22 @@ void __initChipInfo( TargetHandle_t i_chip, HOMER_ChipType_t i_chipModel, uint32_t chipPos = getTargetPosition( i_chip ); PRDF_ASSERT( chipPos < i_maxChipsPerNode ); - // Get the chip FSI address. - FSI::FsiLinkInfo_t fsiInfo; - FSI::getFsiLinkInfo( i_chip, fsiInfo ); - // Fill in the HOMER chip info. o_chipInfo.hChipType = HOMER_getChip( i_chipModel ); o_chipInfo.hChipType.chipPos = chipPos; - o_chipInfo.hChipType.fsiBaseAddr = fsiInfo.baseAddr; o_chipInfo.hChipType.chipEcLevel = i_chip->getAttr<ATTR_EC>(); + + if( HOMER_CHIP_EXPLORER == i_chipModel ) + { + //@todo - RTC:201781 - Add i2c information + } + else + { + // Get the chip FSI address. + FSI::FsiLinkInfo_t fsiInfo; + FSI::getFsiLinkInfo( i_chip, fsiInfo ); + o_chipInfo.hChipType.fsiBaseAddr = fsiInfo.baseAddr; + } } // Returns a right justified config mask of the unit @@ -942,20 +932,26 @@ errlHndl_t getHwConfig( std::vector<HOMER_ChipInfo_t> & o_chipInfVector, // Iterate all of the OCMB chips. for ( auto & ocmb : getFunctionalTargetList(TYPE_OCMB_CHIP) ) { - // Get the chip model type. - HOMER_ChipType_t modelType = HOMER_CHIP_INVALID; - switch ( getChipModel(ocmb) ) + // Get the OCMB chip type. + HOMER_ChipType_t ocmbType = HOMER_CHIP_INVALID; + switch ( getChipId(ocmb) ) { - case MODEL_EXPLORER: modelType = HOMER_CHIP_EXPLORER; break; + case POWER_CHIPID::GEMINI_16: + // Skip Gemini OCMBs. They can exist, but PRD won't support + // them (set invalid). + ocmbType = HOMER_CHIP_INVALID; break; + case POWER_CHIPID::EXPLORER_16: + ocmbType = HOMER_CHIP_EXPLORER; break; default: - PRDF_ERR( FUNC "Unsupported chip model %d on 0x%08x", - modelType, getHuid(ocmb) ); + PRDF_ERR( FUNC "Unsupported chip ID 0x%08x on 0x%08x", + getChipId(ocmb), getHuid(ocmb) ); PRDF_ASSERT( false ); } + if ( HOMER_CHIP_INVALID == ocmbType ) continue; // Init the chip info. HOMER_ChipInfo_t ci; - __initChipInfo( ocmb, modelType, MAX_OCMB_PER_NODE, ci ); + __initChipInfo( ocmb, ocmbType, MAX_OCMB_PER_NODE, ci ); // NOTE: Explorer does not have any unit data. diff --git a/src/usr/diag/prdf/plat/explorer/prdfExplorerPlugins.C b/src/usr/diag/prdf/plat/explorer/prdfExplorerPlugins.C new file mode 100644 index 000000000..4a8dba1a2 --- /dev/null +++ b/src/usr/diag/prdf/plat/explorer/prdfExplorerPlugins.C @@ -0,0 +1,89 @@ +/* IBM_PROLOG_BEGIN_TAG */ +/* This is an automatically generated prolog. */ +/* */ +/* $Source: src/usr/diag/prdf/plat/explorer/prdfExplorerPlugins.C $ */ +/* */ +/* OpenPOWER HostBoot Project */ +/* */ +/* Contributors Listed Below - COPYRIGHT 2019 */ +/* [+] International Business Machines Corp. */ +/* */ +/* */ +/* Licensed under the Apache License, Version 2.0 (the "License"); */ +/* you may not use this file except in compliance with the License. */ +/* You may obtain a copy of the License at */ +/* */ +/* http://www.apache.org/licenses/LICENSE-2.0 */ +/* */ +/* Unless required by applicable law or agreed to in writing, software */ +/* distributed under the License is distributed on an "AS IS" BASIS, */ +/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */ +/* implied. See the License for the specific language governing */ +/* permissions and limitations under the License. */ +/* */ +/* IBM_PROLOG_END_TAG */ + +// Framework includes +#include <iipServiceDataCollector.h> +#include <prdfExtensibleChip.H> +#include <prdfPluginMap.H> + +// Platform includes +#include <prdfMemDbUtils.H> +#include <prdfMemEccAnalysis.H> +//#include <prdfOcmbDataBundle.H> +#include <prdfPlatServices.H> + +using namespace TARGETING; + +namespace PRDF +{ + +using namespace PlatServices; + +namespace explorer_ocmb +{ + +//############################################################################## +// +// MCBISTFIR +// +//############################################################################## + +/** + * @brief MCBISTFIR[10] - MCBIST Command Complete. + * @param i_chip An OCMB chip. + * @param io_sc The step code data struct. + * @return SUCCESS + */ +int32_t McbistCmdComplete( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + #define PRDF_FUNC "[explorer_ocmb::McbistCmdComplete] " + + return SUCCESS; + + // Tell the TD controller there was a command complete attention. + OcmbDataBundle * db = getOcmbDataBundle( i_chip ); + if ( SUCCESS != db->getTdCtlr()->handleCmdComplete(io_sc) ) + { + // Something failed. It is possible the command complete attention has + // not been cleared. Make the rule code do it. + return SUCCESS; + } + else + { + // Everything was successful. Whether we started a new command or told + // MDIA to do it, the command complete bit has already been cleared. + // Don't do it again. + return PRD_NO_CLEAR_FIR_BITS; + } + + #undef PRDF_FUNC +} +PRDF_PLUGIN_DEFINE( explorer_ocmb, McbistCmdComplete ); + +} // end namespace explorer_ocmb + +} // end namespace PRDF + diff --git a/src/usr/diag/prdf/plat/explorer/prdf_plat_explorer_hb_only.mk b/src/usr/diag/prdf/plat/explorer/prdf_plat_explorer_hb_only.mk new file mode 100644 index 000000000..ee1464d3e --- /dev/null +++ b/src/usr/diag/prdf/plat/explorer/prdf_plat_explorer_hb_only.mk @@ -0,0 +1,42 @@ +# IBM_PROLOG_BEGIN_TAG +# This is an automatically generated prolog. +# +# $Source: src/usr/diag/prdf/plat/explorer/prdf_plat_explorer_hb_only.mk $ +# +# OpenPOWER HostBoot Project +# +# Contributors Listed Below - COPYRIGHT 2019 +# [+] International Business Machines Corp. +# +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# IBM_PROLOG_END_TAG + +# NOTE: PRD_SRC_PATH and PRD_INC_PATH must be defined before including this file + +################################################################################ +# Paths common to both IPL and runtime +################################################################################ + +prd_vpath += ${PRD_SRC_PATH}/plat/explorer + +prd_incpath += ${PRD_SRC_PATH}/plat/explorer + +################################################################################ +# Hostboot only object files common to both IPL and runtime +################################################################################ + +# plat/mem/ (rule plugin related) +prd_rule_plugin += prdfExplorerPlugins.o + diff --git a/src/usr/diag/prdf/plat/mem/prdfMemDsd.H b/src/usr/diag/prdf/plat/mem/prdfMemDsd.H index 5990a902e..063e92775 100644 --- a/src/usr/diag/prdf/plat/mem/prdfMemDsd.H +++ b/src/usr/diag/prdf/plat/mem/prdfMemDsd.H @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2018 */ +/* Contributors Listed Below - COPYRIGHT 2018,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -47,7 +47,7 @@ class DsdEvent : public TdEntry /** * @brief Constructor - * @param i_chip MCA or MBA. + * @param i_chip MCA, MBA, or OCMB. * @param i_rank Rank reporting chip mark. */ DsdEvent<T>( ExtensibleChip * i_chip, const MemRank & i_rank, diff --git a/src/usr/diag/prdf/plat/mem/prdfMemDsd_ipl.C b/src/usr/diag/prdf/plat/mem/prdfMemDsd_ipl.C index 70a6be7f2..9dbaeeb3c 100644 --- a/src/usr/diag/prdf/plat/mem/prdfMemDsd_ipl.C +++ b/src/usr/diag/prdf/plat/mem/prdfMemDsd_ipl.C @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2018 */ +/* Contributors Listed Below - COPYRIGHT 2018,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -30,6 +30,8 @@ #include <prdfMemDqBitmap.H> #include <prdfMemDsd.H> +#include <hwp_wrappers.H> + using namespace TARGETING; namespace PRDF @@ -37,18 +39,12 @@ namespace PRDF using namespace PlatServices; -//############################################################################## -// -// Specializations for MBA -// -//############################################################################## - -template<> -uint32_t DsdEvent<TYPE_MBA>::checkEcc( const uint32_t & i_eccAttns, - STEP_CODE_DATA_STRUCT & io_sc, - bool & o_done ) +template<TARGETING::TYPE T> +uint32_t DsdEvent<T>::checkEcc( const uint32_t & i_eccAttns, + STEP_CODE_DATA_STRUCT & io_sc, + bool & o_done ) { - #define PRDF_FUNC "[DsdEvent<TYPE_MBA>::checkEcc] " + #define PRDF_FUNC "[DsdEvent<T>::checkEcc] " uint32_t o_rc = SUCCESS; @@ -71,7 +67,7 @@ uint32_t DsdEvent<TYPE_MBA>::checkEcc( const uint32_t & i_eccAttns, // At this point we don't actually have an address for the UE. The // best we can do is get the address in which the command stopped. MemAddr addr; - o_rc = getMemMaintAddr<TYPE_MBA>( iv_chip, addr ); + o_rc = getMemMaintAddr<T>( iv_chip, addr ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "getMemMaintAddr(0x%08x) failed", @@ -79,8 +75,8 @@ uint32_t DsdEvent<TYPE_MBA>::checkEcc( const uint32_t & i_eccAttns, break; } - o_rc = MemEcc::handleMemUe<TYPE_MBA>( iv_chip, addr, - UE_TABLE::SCRUB_UE, io_sc ); + o_rc = MemEcc::handleMemUe<T>( iv_chip, addr, + UE_TABLE::SCRUB_UE, io_sc ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "handleMemUe(0x%08x,0x%02x) failed", @@ -101,12 +97,12 @@ uint32_t DsdEvent<TYPE_MBA>::checkEcc( const uint32_t & i_eccAttns, //------------------------------------------------------------------------------ -template<> -uint32_t DsdEvent<TYPE_MBA>::verifySpare( const uint32_t & i_eccAttns, - STEP_CODE_DATA_STRUCT & io_sc, - bool & o_done ) +template<TARGETING::TYPE T> +uint32_t DsdEvent<T>::verifySpare( const uint32_t & i_eccAttns, + STEP_CODE_DATA_STRUCT & io_sc, + bool & o_done ) { - #define PRDF_FUNC "[DsdEvent<TYPE_MBA>::verifySpare] " + #define PRDF_FUNC "[DsdEvent<T>::verifySpare] " uint32_t o_rc = SUCCESS; @@ -166,7 +162,7 @@ uint32_t DsdEvent<TYPE_MBA>::verifySpare( const uint32_t & i_eccAttns, PRDFSIG_DsdDramSpared ); // Remove the chip mark. - o_rc = MarkStore::clearChipMark<TYPE_MBA>( iv_chip, iv_rank ); + o_rc = MarkStore::clearChipMark<T>( iv_chip, iv_rank ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "clearChipMark(0x%08x,0x%02x) failed", @@ -190,7 +186,7 @@ uint32_t DsdEvent<TYPE_MBA>::verifySpare( const uint32_t & i_eccAttns, template<> uint32_t DsdEvent<TYPE_MBA>::startCmd() { - #define PRDF_FUNC "[DsdEvent::startCmd] " + #define PRDF_FUNC "[DsdEvent<TYPE_MBA>::startCmd] " uint32_t o_rc = SUCCESS; @@ -231,7 +227,54 @@ uint32_t DsdEvent<TYPE_MBA>::startCmd() //------------------------------------------------------------------------------ template<> -uint32_t DsdEvent<TYPE_MBA>::startNextPhase( STEP_CODE_DATA_STRUCT & io_sc ) +uint32_t DsdEvent<TYPE_OCMB_CHIP>::startCmd() +{ + #define PRDF_FUNC "[DsdEvent<TYPE_OCMB_CHIP>::startCmd] " + + uint32_t o_rc = SUCCESS; + + #ifdef CONFIG_AXONE + + mss::mcbist::stop_conditions<mss::mc_type::EXPLORER> stopCond; + + switch ( iv_phase ) + { + case TD_PHASE_1: + // Start the steer cleanup procedure on this master rank. + o_rc = startTdSteerCleanup<TYPE_OCMB_CHIP>( iv_chip, iv_rank, + MASTER_RANK, stopCond ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "startTdSteerCleanup(0x%08x,0x%2x) failed", + iv_chip->getHuid(), getKey() ); + } + break; + + case TD_PHASE_2: + // Start the superfast read procedure on this master rank. + o_rc = startTdSfRead<TYPE_OCMB_CHIP>( iv_chip, iv_rank, MASTER_RANK, + stopCond ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "startTdSfRead(0x%08x,0x%2x) failed", + iv_chip->getHuid(), getKey() ); + } + break; + + default: PRDF_ASSERT( false ); // invalid phase + } + + #endif + + return o_rc; + + #undef PRDF_FUNC +} + +//------------------------------------------------------------------------------ + +template<TARGETING::TYPE T> +uint32_t DsdEvent<T>::startNextPhase( STEP_CODE_DATA_STRUCT & io_sc ) { uint32_t signature = 0; @@ -260,5 +303,9 @@ uint32_t DsdEvent<TYPE_MBA>::startNextPhase( STEP_CODE_DATA_STRUCT & io_sc ) //------------------------------------------------------------------------------ +// Avoid linker errors with the template. +template class DsdEvent<TYPE_MBA>; +template class DsdEvent<TYPE_OCMB_CHIP>; + } // end namespace PRDF diff --git a/src/usr/diag/prdf/plat/mem/prdfMemDsd_rt.C b/src/usr/diag/prdf/plat/mem/prdfMemDsd_rt.C index 42b7eb9fc..1478a666d 100644 --- a/src/usr/diag/prdf/plat/mem/prdfMemDsd_rt.C +++ b/src/usr/diag/prdf/plat/mem/prdfMemDsd_rt.C @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2018 */ +/* Contributors Listed Below - COPYRIGHT 2018,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -29,6 +29,8 @@ #include <prdfCenMbaExtraSig.H> #include <prdfMemDsd.H> +#include <hwp_wrappers.H> + using namespace TARGETING; namespace PRDF @@ -36,18 +38,12 @@ namespace PRDF using namespace PlatServices; -//############################################################################## -// -// Specializations for MBA -// -//############################################################################## - -template<> -uint32_t DsdEvent<TYPE_MBA>::checkEcc( const uint32_t & i_eccAttns, - STEP_CODE_DATA_STRUCT & io_sc, - bool & o_done ) +template<TARGETING::TYPE T> +uint32_t DsdEvent<T>::checkEcc( const uint32_t & i_eccAttns, + STEP_CODE_DATA_STRUCT & io_sc, + bool & o_done ) { - #define PRDF_FUNC "[DsdEvent<TYPE_MBA>::checkEcc] " + #define PRDF_FUNC "[DsdEvent<T>::checkEcc] " uint32_t o_rc = SUCCESS; @@ -64,7 +60,7 @@ uint32_t DsdEvent<TYPE_MBA>::checkEcc( const uint32_t & i_eccAttns, // At this point we don't actually have an address for the UE. The // best we can do is get the address in which the command stopped. MemAddr addr; - o_rc = getMemMaintAddr<TYPE_MBA>( iv_chip, addr ); + o_rc = getMemMaintAddr<T>( iv_chip, addr ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "getMemMaintAddr(0x%08x) failed", @@ -72,8 +68,8 @@ uint32_t DsdEvent<TYPE_MBA>::checkEcc( const uint32_t & i_eccAttns, break; } - o_rc = MemEcc::handleMemUe<TYPE_MBA>( iv_chip, addr, - UE_TABLE::SCRUB_UE, io_sc ); + o_rc = MemEcc::handleMemUe<T>( iv_chip, addr, + UE_TABLE::SCRUB_UE, io_sc ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "handleMemUe(0x%08x,0x%02x) failed", @@ -83,7 +79,7 @@ uint32_t DsdEvent<TYPE_MBA>::checkEcc( const uint32_t & i_eccAttns, // Because of the UE, any further TPS requests will likely have no // effect. So ban all subsequent requests. - MemDbUtils::banTps<TYPE_MBA>( iv_chip, addr.getRank() ); + MemDbUtils::banTps<T>( iv_chip, addr.getRank() ); // Leave the mark in place and abort this procedure. o_done = true; break; @@ -114,12 +110,12 @@ uint32_t DsdEvent<TYPE_MBA>::checkEcc( const uint32_t & i_eccAttns, //------------------------------------------------------------------------------ -template<> -uint32_t DsdEvent<TYPE_MBA>::verifySpare( const uint32_t & i_eccAttns, - STEP_CODE_DATA_STRUCT & io_sc, - bool & o_done ) +template<TARGETING::TYPE T> +uint32_t DsdEvent<T>::verifySpare( const uint32_t & i_eccAttns, + STEP_CODE_DATA_STRUCT & io_sc, + bool & o_done ) { - #define PRDF_FUNC "[DsdEvent<TYPE_MBA>::verifySpare] " + #define PRDF_FUNC "[DsdEvent<T>::verifySpare] " uint32_t o_rc = SUCCESS; @@ -134,7 +130,7 @@ uint32_t DsdEvent<TYPE_MBA>::verifySpare( const uint32_t & i_eccAttns, // error (i.e. a UE). bool lastAddr = false; - o_rc = didCmdStopOnLastAddr<TYPE_MBA>( iv_chip, MASTER_RANK, lastAddr ); + o_rc = didCmdStopOnLastAddr<T>( iv_chip, MASTER_RANK, lastAddr ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "didCmdStopOnLastAddr(0x%08x) failed", @@ -155,7 +151,7 @@ uint32_t DsdEvent<TYPE_MBA>::verifySpare( const uint32_t & i_eccAttns, io_sc.service_data->setSignature( iv_chip->getHuid(), PRDFSIG_DsdDramSpared ); // Remove the chip mark. - o_rc = MarkStore::clearChipMark<TYPE_MBA>( iv_chip, iv_rank ); + o_rc = MarkStore::clearChipMark<T>( iv_chip, iv_rank ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "clearChipMark(0x%08x,0x%02x) failed", @@ -179,7 +175,7 @@ uint32_t DsdEvent<TYPE_MBA>::verifySpare( const uint32_t & i_eccAttns, template<> uint32_t DsdEvent<TYPE_MBA>::startCmd() { - #define PRDF_FUNC "[DsdEvent::startCmd] " + #define PRDF_FUNC "[DsdEvent<TYPE_MBA>::startCmd] " uint32_t o_rc = SUCCESS; @@ -224,7 +220,38 @@ uint32_t DsdEvent<TYPE_MBA>::startCmd() //------------------------------------------------------------------------------ template<> -uint32_t DsdEvent<TYPE_MBA>::startNextPhase( STEP_CODE_DATA_STRUCT & io_sc ) +uint32_t DsdEvent<TYPE_OCMB_CHIP>::startCmd() +{ + #define PRDF_FUNC "[DsdEvent<TYPE_OCMB_CHIP>::startCmd] " + + uint32_t o_rc = SUCCESS; + + #ifdef CONFIG_AXONE + + mss::mcbist::stop_conditions<mss::mc_type::EXPLORER> stopCond; + + stopCond.set_pause_on_ue(mss::ON); + + // Start the time based scrub procedure on this master rank. + o_rc = startTdScrub<TYPE_OCMB_CHIP>( iv_chip, iv_rank, MASTER_RANK, + stopCond ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "startTdScrub(0x%08x,0x%2x) failed", + iv_chip->getHuid(), getKey() ); + } + + #endif + + return o_rc; + + #undef PRDF_FUNC +} + +//------------------------------------------------------------------------------ + +template<TARGETING::TYPE T> +uint32_t DsdEvent<T>::startNextPhase( STEP_CODE_DATA_STRUCT & io_sc ) { uint32_t signature = 0; @@ -258,5 +285,9 @@ uint32_t DsdEvent<TYPE_MBA>::startNextPhase( STEP_CODE_DATA_STRUCT & io_sc ) //------------------------------------------------------------------------------ +// Avoid linker errors with the template. +template class DsdEvent<TYPE_MBA>; +template class DsdEvent<TYPE_OCMB_CHIP>; + } // end namespace PRDF diff --git a/src/usr/diag/prdf/plat/mem/prdfMemDynDealloc.C b/src/usr/diag/prdf/plat/mem/prdfMemDynDealloc.C index 41b0de3ea..40653ee09 100644 --- a/src/usr/diag/prdf/plat/mem/prdfMemDynDealloc.C +++ b/src/usr/diag/prdf/plat/mem/prdfMemDynDealloc.C @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2017,2018 */ +/* Contributors Listed Below - COPYRIGHT 2017,2020 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -64,7 +64,7 @@ bool isEnabled() !isMfgAvpEnabled() && !isMfgHdatAvpEnabled() ); } -int32_t __getAddrConfig( ExtensibleChip * i_mcaChip, uint8_t i_dslct, +int32_t __getAddrConfig( ExtensibleChip * i_chip, uint8_t i_dslct, bool & o_twoDimmConfig, uint8_t & o_mrnkBits, uint8_t & o_srnkBits, uint8_t & o_extraRowBits ) { @@ -72,12 +72,12 @@ int32_t __getAddrConfig( ExtensibleChip * i_mcaChip, uint8_t i_dslct, int32_t o_rc = SUCCESS; - SCAN_COMM_REGISTER_CLASS * reg = i_mcaChip->getRegister( "MC_ADDR_TRANS" ); + SCAN_COMM_REGISTER_CLASS * reg = i_chip->getRegister( "MC_ADDR_TRANS" ); o_rc = reg->Read(); if ( SUCCESS != o_rc ) { - PRDF_ERR( PRDF_FUNC "Read failed on MC_ADDR_TRANS: i_mcaChip=0x%08x", - i_mcaChip->getHuid() ); + PRDF_ERR( PRDF_FUNC "Read failed on MC_ADDR_TRANS: i_chip=0x%08x", + i_chip->getHuid() ); return o_rc; } @@ -98,8 +98,8 @@ int32_t __getAddrConfig( ExtensibleChip * i_mcaChip, uint8_t i_dslct, // for some reason B2 is valid, there is definitely a bug. if ( reg->IsBitSet(i_dslct ? 28:12) ) { - PRDF_ERR( PRDF_FUNC "B2 enabled in MC_ADDR_TRANS: i_mcaChip=0x%08x " - "i_dslct=%d", i_mcaChip->getHuid(), i_dslct ); + PRDF_ERR( PRDF_FUNC "B2 enabled in MC_ADDR_TRANS: i_chip=0x%08x " + "i_dslct=%d", i_chip->getHuid(), i_dslct ); return FAIL; } @@ -386,7 +386,7 @@ int32_t __getPortAddr<TYPE_MCA>( ExtensibleChip * i_chip, MemAddr i_addr, // Local vars for address fields uint64_t col = reverseBits(i_addr.getCol(), 7); // C9 C8 C7 C6 C5 C4 C3 uint64_t row = reverseBits(i_addr.getRow(), 18); // R17 R16 R15 .. R1 R0 - uint64_t bnk = i_addr.getBank(); // BG0 BG1 B0 B1 B2 + uint64_t bnk = i_addr.getBank(); // B0 B1 B2 BG0 BG1 uint64_t srnk = i_addr.getRank().getSlave(); // S0 S1 S2 uint64_t mrnk = i_addr.getRank().getRankSlct(); // M0 M1 uint64_t dslct = i_addr.getRank().getDimmSlct(); // D @@ -473,6 +473,266 @@ int32_t __getPortAddr<TYPE_MCA>( ExtensibleChip * i_chip, MemAddr i_addr, return o_rc; } +void __adjustCapiAddrBitPos( uint8_t & io_bitPos ) +{ + // Note: the translation bitmaps are all 5 bits that are defined + // consistently as: + // 00000 = CAPI_Address(5) + // 00001 = CAPI_Address(6) + // 00010 = CAPI_Address(7) + // ... + // 01010 = CAPI_Address(15) + // 01011 = CAPI_Address(31) + // 01100 = CAPI_Address(32) + // ... + // 10011 = CAPI_Address(39) + // So the value from the regs can be converted to the CAPI address bit pos + // by adding 5 if the value is less than or equal to 10, or by adding 20 + // if it is above 10. + + if ( io_bitPos <= 10 ) + { + io_bitPos += 5; + } + else + { + io_bitPos += 20; + } +} + +template <> +int32_t __getPortAddr<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, MemAddr i_addr, + uint64_t & o_addr ) +{ + #define PRDF_FUNC "[MemDealloc::__getPortAddr<TYPE_OCMB_CHIP>] " + + int32_t o_rc = SUCCESS; + + o_addr = 0; + + // Local vars for address fields + uint64_t col = reverseBits(i_addr.getCol(), 7); // C9 C8 C7 C6 C5 C4 C3 + uint64_t row = reverseBits(i_addr.getRow(), 18); // R17 R16 R15 .. R1 R0 + uint64_t bnk = i_addr.getBank(); // B0 B1 B2 BG0 BG1 + uint64_t srnk = i_addr.getRank().getSlave(); // S0 S1 S2 + uint64_t mrnk = i_addr.getRank().getRankSlct(); // M0 M1 + uint64_t dslct = i_addr.getRank().getDimmSlct(); // D + + // Determine if a two DIMM config is used. Also, determine how many + // mrank (M0-M1), srnk (S0-S2), or extra row (R17-R15) bits are used. + bool twoDimmConfig; + uint8_t mrnkBits, srnkBits, extraRowBits; + o_rc = __getAddrConfig( i_chip, dslct, twoDimmConfig, mrnkBits, srnkBits, + extraRowBits ); + if ( SUCCESS != o_rc ) return o_rc; + + // Mask off the non-configured bits. If this address came from hardware, + // this would not be a problem. However, the get_mrank_range() and + // get_srank_range() HWPS got lazy just set the entire fields and did not + // take into account the actual bit ranges. + mrnk = __maskBits( mrnk, mrnkBits ); + srnk = __maskBits( srnk, srnkBits ); + row = __maskBits( row, 15 + extraRowBits ); + + // Insert the needed bits based on the config defined in the MC Address + // Translation Registers. + + uint8_t bitPos = 0; + + // Split the row into its components. + uint8_t r17 = (row & 0x20000) >> 17; + uint8_t r16 = (row & 0x10000) >> 16; + uint8_t r15 = (row & 0x08000) >> 15; + uint16_t r14_r0 = (row & 0x07fff); + + // Split the master rank and slave rank into their components + uint8_t m0 = (mrnk & 0x2) >> 1; + uint8_t m1 = (mrnk & 0x1); + + uint8_t s0 = (srnk & 0x4) >> 2; + uint8_t s1 = (srnk & 0x2) >> 1; + uint8_t s2 = (srnk & 0x1); + + // Split the column into its components + uint8_t c9 = (col & 0x40) >> 6; + uint8_t c8 = (col & 0x20) >> 5; + uint8_t c7 = (col & 0x10) >> 4; + uint8_t c6 = (col & 0x08) >> 3; + uint8_t c5 = (col & 0x04) >> 2; + uint8_t c4 = (col & 0x02) >> 1; + uint8_t c3 = (col & 0x01); + + // Split the bank and bank group into their components + // Note: B2 is not used for OCMB + uint8_t b0 = (bnk & 0x10) >> 4; + uint8_t b1 = (bnk & 0x08) >> 3; + + uint8_t bg0 = (bnk & 0x2) >> 1; + uint8_t bg1 = (bnk & 0x1); + + // Row bits 14:0 are always at CAPI addr position 30:16 + o_addr |= (r14_r0 << 16); + + // Check MC_ADDR_TRANS0 register for bit positions + SCAN_COMM_REGISTER_CLASS * reg = i_chip->getRegister( "MC_ADDR_TRANS" ); + o_rc = reg->Read(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "Read failed on MC_ADDR_TRANS: i_chip=0x%08x", + i_chip->getHuid() ); + return o_rc; + } + + // If the DIMM select is valid, insert that bit + if ( twoDimmConfig ) + { + // DIMM bitmap: MC_ADDR_TRANS0[33:37] + bitPos = reg->GetBitFieldJustified( 33, 5 ); + __adjustCapiAddrBitPos( bitPos ); + o_addr |= (dslct << bitPos); + } + + // Insert any of the master rank bits that are valid + switch( mrnkBits ) + { + case 2: + // Master rank 0 bitmap: MC_ADDR_TRANS0[38:42] + bitPos = reg->GetBitFieldJustified( 38, 5 ); + __adjustCapiAddrBitPos( bitPos ); + o_addr |= (m0 << bitPos); + case 1: + // Master rank 1 bitmap: MC_ADDR_TRANS0[43:47] + bitPos = reg->GetBitFieldJustified( 43, 5 ); + __adjustCapiAddrBitPos( bitPos ); + o_addr |= (m1 << bitPos); + break; + } + + // Insert any extra row bits (17:15) that are valid + switch ( extraRowBits ) + { + case 3: + // Row 17 bitmap: MC_ADDR_TRANS0[49:53] + bitPos = reg->GetBitFieldJustified( 49, 5 ); + __adjustCapiAddrBitPos( bitPos ); + o_addr |= (r17 << bitPos); + case 2: + // Row 16 bitmap: MC_ADDR_TRANS0[54:58] + bitPos = reg->GetBitFieldJustified( 54, 5 ); + __adjustCapiAddrBitPos( bitPos ); + o_addr |= (r16 << bitPos); + case 1: + // Row 15 bitmap: MC_ADDR_TRANS0[59:63] + bitPos = reg->GetBitFieldJustified( 59, 5 ); + __adjustCapiAddrBitPos( bitPos ); + o_addr |= (r15 << bitPos); + break; + } + + // Check MC_ADDR_TRANS1 register for bit positions + reg = i_chip->getRegister( "MC_ADDR_TRANS1" ); + o_rc = reg->Read(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "Read failed on MC_ADDR_TRANS1: i_chip=0x%08x", + i_chip->getHuid() ); + return o_rc; + } + + // Insert any of the slave rank bits that are valid + switch ( srnkBits ) + { + case 3: + // Slave rank 0 bitmap: MC_ADDR_TRANS1[3:7] + bitPos = reg->GetBitFieldJustified( 3, 5 ); + __adjustCapiAddrBitPos( bitPos ); + o_addr |= (s0 << bitPos); + case 2: + // Slave rank 1 bitmap: MC_ADDR_TRANS1[11:15] + bitPos = reg->GetBitFieldJustified( 11, 5 ); + __adjustCapiAddrBitPos( bitPos ); + o_addr |= (s1 << bitPos); + case 1: + // Slave rank 2 bitmap: MC_ADDR_TRANS1[19:23] + bitPos = reg->GetBitFieldJustified( 19, 5 ); + __adjustCapiAddrBitPos( bitPos ); + o_addr |= (s2 << bitPos); + break; + } + + // Column 3 bitmap: MC_ADDR_TRANS1[30:34] + bitPos = reg->GetBitFieldJustified( 30, 5 ); + __adjustCapiAddrBitPos( bitPos ); + o_addr |= (c3 << bitPos); + + // Column 4 bitmap: MC_ADDR_TRANS1[35:39] + bitPos = reg->GetBitFieldJustified( 35, 5 ); + __adjustCapiAddrBitPos( bitPos ); + o_addr |= (c4 << bitPos); + + // Column 5 bitmap: MC_ADDR_TRANS1[43:47] + bitPos = reg->GetBitFieldJustified( 43, 5 ); + __adjustCapiAddrBitPos( bitPos ); + o_addr |= (c5 << bitPos); + + // Column 6 bitmap: MC_ADDR_TRANS1[51:55] + bitPos = reg->GetBitFieldJustified( 51, 5 ); + __adjustCapiAddrBitPos( bitPos ); + o_addr |= (c6 << bitPos); + + // Column 7 bitmap: MC_ADDR_TRANS1[59:63] + bitPos = reg->GetBitFieldJustified( 59, 5 ); + __adjustCapiAddrBitPos( bitPos ); + o_addr |= (c7 << bitPos); + + // Check MC_ADDR_TRANS2 register for bit positions + reg = i_chip->getRegister( "MC_ADDR_TRANS2" ); + o_rc = reg->Read(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "Read failed on MC_ADDR_TRANS2: i_chip=0x%08x", + i_chip->getHuid() ); + return o_rc; + } + + // Column 8 bitmap: MC_ADDR_TRANS2[3:7] + bitPos = reg->GetBitFieldJustified( 3, 5 ); + __adjustCapiAddrBitPos( bitPos ); + o_addr |= (c8 << bitPos); + + // Column 9 bitmap: MC_ADDR_TRANS2[11:15] + bitPos = reg->GetBitFieldJustified( 11, 5 ); + __adjustCapiAddrBitPos( bitPos ); + o_addr |= (c9 << bitPos); + + // Bank 0 bitmap: MC_ADDR_TRANS2[19:23] + bitPos = reg->GetBitFieldJustified( 19, 5 ); + __adjustCapiAddrBitPos( bitPos ); + o_addr |= (b0 << bitPos ); + + // Bank 1 bitmap: MC_ADDR_TRANS2[27:31] + bitPos = reg->GetBitFieldJustified( 27, 5 ); + __adjustCapiAddrBitPos( bitPos ); + o_addr |= (b1 << bitPos); + + // Bank 2 bitmap: MC_ADDR_TRANS2[35:39] + // Note: Bank2 not used for OCMB + + // Bank group 0 bitmap: MC_ADDR_TRANS2[43:47] + bitPos = reg->GetBitFieldJustified( 43, 5 ); + __adjustCapiAddrBitPos( bitPos ); + o_addr |= (bg0 << bitPos); + + // Bank group 1 bitmap: MC_ADDR_TRANS2[51:55] + bitPos = reg->GetBitFieldJustified( 51, 5 ); + __adjustCapiAddrBitPos( bitPos ); + o_addr |= (bg1 << bitPos); + + return o_rc; + + #undef PRDF_FUNC +} + template <> int32_t __getPortAddr<TYPE_MBA>( ExtensibleChip * i_chip, MemAddr i_addr, uint64_t & o_addr ) @@ -566,12 +826,12 @@ int32_t __getPortAddr<TYPE_MBA>( ExtensibleChip * i_chip, MemAddr i_addr, //------------------------------------------------------------------------------ template<TYPE T> -void __getGrpPrms( ExtensibleChip * i_chip, uint8_t o_portPos, +void __getGrpPrms( ExtensibleChip * i_chip, uint8_t & o_portPos, SCAN_COMM_REGISTER_CLASS * &o_mcfgp, SCAN_COMM_REGISTER_CLASS * &o_mcfgpm ); template<> -void __getGrpPrms<TYPE_MCA>( ExtensibleChip * i_chip, uint8_t o_portPos, +void __getGrpPrms<TYPE_MCA>( ExtensibleChip * i_chip, uint8_t & o_portPos, SCAN_COMM_REGISTER_CLASS * &o_mcfgp, SCAN_COMM_REGISTER_CLASS * &o_mcfgpm ) { @@ -585,7 +845,33 @@ void __getGrpPrms<TYPE_MCA>( ExtensibleChip * i_chip, uint8_t o_portPos, } template<> -void __getGrpPrms<TYPE_MBA>( ExtensibleChip * i_chip, uint8_t o_portPos, +void __getGrpPrms<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, uint8_t & o_portPos, + SCAN_COMM_REGISTER_CLASS * &o_mcfgp, + SCAN_COMM_REGISTER_CLASS * &o_mcfgpm ) +{ + // Get the connected parent MI; + ExtensibleChip * mcc = getConnectedParent( i_chip, TYPE_MCC ); + ExtensibleChip * mi = getConnectedParent( mcc, TYPE_MI ); + + // TODO RTC 210072 - support for multiple ports + o_portPos = 0; + + // Get the position of the MCC relative to the MI (0:1) + uint8_t chnlPos = mcc->getPos() % MAX_MCC_PER_MI; + + char mcfgpName[64]; + sprintf( mcfgpName, "MCFGP%d", chnlPos ); + + char mcfgpmName[64]; + sprintf( mcfgpmName, "MCFGPM%d", chnlPos ); + + o_mcfgp = mi->getRegister( mcfgpName ); + o_mcfgpm = mi->getRegister( mcfgpmName ); + +} + +template<> +void __getGrpPrms<TYPE_MBA>( ExtensibleChip * i_chip, uint8_t & o_portPos, SCAN_COMM_REGISTER_CLASS * &o_mcfgp, SCAN_COMM_REGISTER_CLASS * &o_mcfgpm ) { @@ -686,12 +972,67 @@ uint32_t __getGrpInfo( ExtensibleChip * i_chip, uint64_t & o_grpChnls, #undef PRDF_FUNC } +template<> +uint32_t __getGrpInfo<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + uint64_t & o_grpChnls, + uint64_t & o_grpId, uint64_t & o_grpSize, + uint64_t & o_grpBar ) +{ + #define PRDF_FUNC "[MemDealloc::__getGrpInfo] " + + uint32_t o_rc = SUCCESS; + + do + { + // Get portPos and MCFGP/M registers + uint8_t portPos = 0xFF; + SCAN_COMM_REGISTER_CLASS * mcfgp = nullptr; + SCAN_COMM_REGISTER_CLASS * mcfgpm = nullptr; + __getGrpPrms<TYPE_OCMB_CHIP>( i_chip, portPos, mcfgp, mcfgpm ); + + o_rc = mcfgp->Read(); if ( SUCCESS != o_rc ) break; + + // Get the number of channels in this group: MCFGP[40:42] + uint8_t mcGrpCnfg = mcfgp->GetBitFieldJustified( 40, 3 ); + switch ( mcGrpCnfg ) + { + case 0: o_grpChnls = 8; break; // 8MCS + case 1: o_grpChnls = 1; break; // 1MCS + case 2: o_grpChnls = 2; break; // 2MCS + case 3: o_grpChnls = 3; break; // 3MCS + case 4: o_grpChnls = 4; break; // 4MCS + case 5: o_grpChnls = 6; break; // 6MCS + default: + PRDF_ERR( PRDF_FUNC "Invalid MC channels per group value: 0x%x " + "on 0x%08x", mcGrpCnfg, i_chip->getHuid() ); + o_rc = FAIL; + } + if ( SUCCESS != o_rc ) break; + + // Get the group ID and group size. + o_grpId = mcfgp->GetBitFieldJustified( 43, 3 ); // MCFGP[43:45] + o_grpSize = mcfgp->GetBitFieldJustified( 25, 15 ); // MCFGP[25:39] + + // TODO RTC 210072 - support for multiple ports, see generic handling + + // Get the base address (BAR). + // Channel 0 is always from the MCFGP. + o_grpBar = mcfgp->GetBitFieldJustified(1, 24); // MCFGP[1:24] + + } while (0); + + return o_rc; + + #undef PRDF_FUNC +} + //------------------------------------------------------------------------------ -uint32_t __insertGrpId( uint64_t & io_addr, uint64_t i_grpChnls, - uint64_t i_grpId ) +template <TYPE T> +uint32_t __insertGrpId( ExtensibleChip * i_chip, uint64_t & io_addr, + uint64_t i_grpChnls, uint64_t i_grpId ) { - #define PRDF_FUNC "[MemDealloc::__insertGrpId] " + #define PRDF_FUNC "[MemDealloc::__insertGrpId<T>] " uint32_t o_rc = SUCCESS; @@ -742,6 +1083,108 @@ uint32_t __insertGrpId( uint64_t & io_addr, uint64_t i_grpChnls, #undef PRDF_FUNC } +template<> +uint32_t __insertGrpId<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + uint64_t & io_addr, uint64_t i_grpChnls, + uint64_t i_grpId ) +{ + #define PRDF_FUNC "[MemDealloc::__insertGrpId<TYPE_OCMB_CHIP>] " + + uint32_t o_rc = SUCCESS; + + uint64_t upper33 = io_addr & 0xFFFFFFFF80ull; + uint64_t lower7 = io_addr & 0x000000007full; + + bool subChanAEnable = false; + bool subChanBEnable = false; + bool bothSubChansEnabled = false; + + ExtensibleChip * mcc = getConnectedParent( i_chip, TYPE_MCC ); + + // Check both subchannels whether we can get the connected OCMB to + // determine whether they are enabled. + // Check for subchannel A + ExtensibleChip * subchanA = getConnectedChild( mcc, TYPE_OCMB_CHIP, 0 ); + if ( nullptr != subchanA ) subChanAEnable = true; + + // Check for subchannel B + ExtensibleChip * subchanB = getConnectedChild( mcc, TYPE_OCMB_CHIP, 1 ); + if ( nullptr != subchanB ) subChanBEnable = true; + + // Check if both subchannels were enabled + if ( subChanAEnable && subChanBEnable ) bothSubChansEnabled = true; + + // If both subchannels are enabled, bit 56 of the address will contain the + // subchannel select bit. + if ( bothSubChansEnabled ) + { + uint8_t ocmbChnl = i_chip->getPos() % MAX_OCMB_PER_MCC; // 0:1 + uint8_t bitInsert = 0; + + switch ( i_grpChnls ) + { + case 1: // insert 1 bit for subchannel select + case 3: + case 6: + bitInsert = ( ocmbChnl & 0x1 ); + io_addr = (upper33 << 1) | (bitInsert << 7) | lower7; + break; + + case 2: // insert 1 bit for subchannel select and 1 bit for grpId + bitInsert = ( ((i_grpId & 0x1) << 1) | (ocmbChnl & 0x1) ); + io_addr = (upper33 << 2) | (bitInsert << 7) | lower7; + break; + + case 4: // insert 1 bit for subchannel select and 2 bits for grpId + bitInsert = ( ((i_grpId & 0x3) << 1) | (ocmbChnl & 0x1) ); + io_addr = (upper33 << 3) | (bitInsert << 7) | lower7; + break; + + case 8: // insert 1 bit for subchannel select and 3 bits for grpId + bitInsert = ( ((i_grpId & 0x7) << 1) | (ocmbChnl & 0x1) ); + io_addr = (upper33 << 4) | (bitInsert << 7) | lower7; + break; + + default: + PRDF_ERR( PRDF_FUNC "Invalid MC channels per group value %d", + i_grpChnls ); + o_rc = FAIL; + } + } + else + { + switch ( i_grpChnls ) + { + case 1: // no shifting + case 3: + case 6: + break; + + case 2: // insert 1 bit + io_addr = (upper33 << 1) | ((i_grpId & 0x1) << 7) | lower7; + break; + + case 4: // insert 2 bits + io_addr = (upper33 << 2) | ((i_grpId & 0x3) << 7) | lower7; + break; + + case 8: // insert 3 bits + io_addr = (upper33 << 3) | ((i_grpId & 0x7) << 7) | lower7; + break; + + default: + PRDF_ERR( PRDF_FUNC "Invalid MC channels per group value %d", + i_grpChnls ); + o_rc = FAIL; + } + } + + return o_rc; + + #undef PRDF_FUNC + +} + //------------------------------------------------------------------------------ // The hardware uses a mod3 hashing algorithm to calculate which memory channel @@ -849,7 +1292,7 @@ void __addBar( uint64_t & io_addr, uint64_t i_grpBar ) template<TYPE T> uint32_t getSystemAddr( ExtensibleChip * i_chip, MemAddr i_addr, - uint64_t & o_addr ) + uint64_t & o_addr ) { #define PRDF_FUNC "[MemDealloc::getSystemAddr] " @@ -867,7 +1310,7 @@ uint32_t getSystemAddr( ExtensibleChip * i_chip, MemAddr i_addr, if ( SUCCESS != o_rc ) break; // Insert the group ID. - o_rc = __insertGrpId( o_addr, grpChnls, grpId ); + o_rc = __insertGrpId<T>( i_chip, o_addr, grpChnls, grpId ); if ( SUCCESS != o_rc ) break; // Notes on 3 and 6 channel per group configs: @@ -915,8 +1358,8 @@ uint32_t getSystemAddrRange( ExtensibleChip * i_chip, if ( SUCCESS != o_rc ) break; // Insert the group ID. - o_rc = __insertGrpId( o_saddr, grpChnls, grpId ); - o_rc |= __insertGrpId( o_eaddr, grpChnls, grpId ); + o_rc = __insertGrpId<T>( i_chip, o_saddr, grpChnls, grpId ); + o_rc |= __insertGrpId<T>( i_chip, o_eaddr, grpChnls, grpId ); if ( SUCCESS != o_rc ) break; // Notes on 3 and 6 channel per group configs: @@ -975,6 +1418,7 @@ int32_t page( ExtensibleChip * i_chip, MemAddr i_addr ) } template int32_t page<TYPE_MCA>( ExtensibleChip * i_chip, MemAddr i_addr ); template int32_t page<TYPE_MBA>( ExtensibleChip * i_chip, MemAddr i_addr ); +template int32_t page<TYPE_OCMB_CHIP>(ExtensibleChip * i_chip, MemAddr i_addr); //------------------------------------------------------------------------------ @@ -1025,6 +1469,7 @@ int32_t rank( ExtensibleChip * i_chip, MemRank i_rank ) } template int32_t rank<TYPE_MCA>( ExtensibleChip * i_chip, MemRank i_rank ); template int32_t rank<TYPE_MBA>( ExtensibleChip * i_chip, MemRank i_rank ); +template int32_t rank<TYPE_OCMB_CHIP>(ExtensibleChip * i_chip, MemRank i_rank); //------------------------------------------------------------------------------ @@ -1074,6 +1519,7 @@ int32_t port( ExtensibleChip * i_chip ) } template int32_t port<TYPE_MCA>( ExtensibleChip * i_chip ); template int32_t port<TYPE_MBA>( ExtensibleChip * i_chip ); +template int32_t port<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip ); //------------------------------------------------------------------------------ @@ -1236,6 +1682,22 @@ int32_t dimmList( TargetHandleList & i_dimmList ) sendPredDeallocRequest( ssAddr, seAddr ); PRDF_TRAC( PRDF_FUNC "Predictive dealloc for start addr: 0x%016llx " "end addr: 0x%016llx", ssAddr, seAddr ); + + #ifdef CONFIG_NVDIMM + // If the DIMM is an NVDIMM, send a message to PHYP that a save/restore + // may work. + if ( isNVDIMM(*it) ) + { + uint32_t l_rc = PlatServices::nvdimmNotifyProtChange( *it, + NVDIMM::NVDIMM_RISKY_HW_ERROR ); + if ( SUCCESS != l_rc ) + { + PRDF_TRAC( PRDF_FUNC "nvdimmNotifyProtChange(0x%08x) " + "failed.", getHuid(*it) ); + continue; + } + } + #endif } return o_rc; @@ -1278,6 +1740,14 @@ int32_t dimmList( TargetHandleList & i_dimmList ) break; } + // Third, check for OCMBs. + list = getConnected( dimmTrgt, TYPE_OCMB_CHIP ); + if ( !list.empty() ) + { + o_rc = dimmList<TYPE_OCMB_CHIP>( i_dimmList ); + break; + } + // If we get here we did not find a supported target. PRDF_ERR( PRDF_FUNC "Unsupported connected parent to dimm 0x%08x", getHuid(dimmTrgt) ); diff --git a/src/usr/diag/prdf/plat/mem/prdfMemIplCeStats.C b/src/usr/diag/prdf/plat/mem/prdfMemIplCeStats.C index 869aa92e8..b257d0874 100755 --- a/src/usr/diag/prdf/plat/mem/prdfMemIplCeStats.C +++ b/src/usr/diag/prdf/plat/mem/prdfMemIplCeStats.C @@ -83,8 +83,8 @@ void MemIplCeStats<TYPE_MCA>::banAnalysis( uint8_t i_dimmSlct, //------------------------------------------------------------------------------ template<> -void MemIplCeStats<TYPE_MEM_PORT>::banAnalysis( uint8_t i_dimmSlct, - uint8_t i_portSlct ) +void MemIplCeStats<TYPE_OCMB_CHIP>::banAnalysis( uint8_t i_dimmSlct, + uint8_t i_portSlct ) { PRDF_ASSERT( i_dimmSlct < MAX_DIMM_PER_PORT ); PRDF_ASSERT( 0 == i_portSlct ); @@ -117,9 +117,9 @@ void MemIplCeStats<TYPE_MCA>::banAnalysis( uint8_t i_dimmSlct ) //------------------------------------------------------------------------------ template<> -void MemIplCeStats<TYPE_MEM_PORT>::banAnalysis( uint8_t i_dimmSlct ) +void MemIplCeStats<TYPE_OCMB_CHIP>::banAnalysis( uint8_t i_dimmSlct ) { - // Only one DIMM per DIMM select on MEM_PORT. + // Only one DIMM per DIMM select on OCMB_CHIP. banAnalysis( i_dimmSlct, 0 ); } @@ -481,6 +481,6 @@ void MemIplCeStats<T>::addMruAndCommitErrl( const MemoryMru & i_memmru, // need these templates to avoid linker errors template class MemIplCeStats<TYPE_MCA>; template class MemIplCeStats<TYPE_MBA>; -template class MemIplCeStats<TYPE_MEM_PORT>; +template class MemIplCeStats<TYPE_OCMB_CHIP>; } // end namespace PRDF diff --git a/src/usr/diag/prdf/plat/mem/prdfMemScrubUtils.C b/src/usr/diag/prdf/plat/mem/prdfMemScrubUtils.C index 5351b842a..bececfa21 100644 --- a/src/usr/diag/prdf/plat/mem/prdfMemScrubUtils.C +++ b/src/usr/diag/prdf/plat/mem/prdfMemScrubUtils.C @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2016,2019 */ +/* Contributors Listed Below - COPYRIGHT 2016,2020 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -106,17 +106,6 @@ uint32_t clearCmdCompleteAttn<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip ) } template<> -uint32_t clearCmdCompleteAttn<TYPE_MEM_PORT>( ExtensibleChip * i_chip ) -{ - PRDF_ASSERT( nullptr != i_chip ); - PRDF_ASSERT( TYPE_MEM_PORT == i_chip->getType() ); - - ExtensibleChip * ocmbChip = getConnectedParent( i_chip, TYPE_OCMB_CHIP ); - - return clearCmdCompleteAttn<TYPE_OCMB_CHIP>( ocmbChip ); -} - -template<> uint32_t clearCmdCompleteAttn<TYPE_MBA>( ExtensibleChip * i_chip ) { // Clear MBASPA[0,8]. @@ -194,17 +183,6 @@ uint32_t clearEccCounters<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip ) } template<> -uint32_t clearEccCounters<TYPE_MEM_PORT>( ExtensibleChip * i_chip ) -{ - PRDF_ASSERT( nullptr != i_chip ); - PRDF_ASSERT( TYPE_MEM_PORT == i_chip->getType() ); - - ExtensibleChip * ocmbChip = getConnectedParent( i_chip, TYPE_OCMB_CHIP ); - - return clearEccCounters<TYPE_OCMB_CHIP>( ocmbChip ); -} - -template<> uint32_t clearEccCounters<TYPE_MBA>( ExtensibleChip * i_chip ) { PRDF_ASSERT( nullptr != i_chip ); @@ -306,17 +284,6 @@ uint32_t clearEccFirs<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip ) } template<> -uint32_t clearEccFirs<TYPE_MEM_PORT>( ExtensibleChip * i_chip ) -{ - PRDF_ASSERT( nullptr != i_chip ); - PRDF_ASSERT( TYPE_MEM_PORT == i_chip->getType() ); - - ExtensibleChip * ocmbChip = getConnectedParent( i_chip, TYPE_OCMB_CHIP ); - - return clearEccFirs<TYPE_OCMB_CHIP>( ocmbChip ); -} - -template<> uint32_t clearEccFirs<TYPE_MBA>( ExtensibleChip * i_chip ) { uint32_t o_rc = SUCCESS; @@ -409,22 +376,20 @@ uint32_t checkEccFirs<TYPE_MCA>( ExtensibleChip * i_chip, //------------------------------------------------------------------------------ template<> -uint32_t checkEccFirs<TYPE_MEM_PORT>( ExtensibleChip * i_chip, - uint32_t & o_eccAttns ) +uint32_t checkEccFirs<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + uint32_t & o_eccAttns ) { - #define PRDF_FUNC "[checkEccFirs<TYPE_MEM_PORT>] " + #define PRDF_FUNC "[checkEccFirs<TYPE_OCMB_CHIP>] " uint32_t o_rc = SUCCESS; o_eccAttns = MAINT_NO_ERROR; PRDF_ASSERT( nullptr != i_chip ); - PRDF_ASSERT( TYPE_MEM_PORT == i_chip->getType() ); - - ExtensibleChip * ocmbChip = getConnectedParent( i_chip, TYPE_OCMB_CHIP ); + PRDF_ASSERT( TYPE_OCMB_CHIP == i_chip->getType() ); - SCAN_COMM_REGISTER_CLASS * rdffir = ocmbChip->getRegister( "RDFFIR" ); - SCAN_COMM_REGISTER_CLASS * mcbistfir = ocmbChip->getRegister( "MCBISTFIR" ); + SCAN_COMM_REGISTER_CLASS * rdffir = i_chip->getRegister( "RDFFIR" ); + SCAN_COMM_REGISTER_CLASS * mcbistfir = i_chip->getRegister( "MCBISTFIR" ); do { @@ -453,7 +418,7 @@ uint32_t checkEccFirs<TYPE_MEM_PORT>( ExtensibleChip * i_chip, if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "Read() failed on MCBISTFIR: mcbChip=0x%08x", - ocmbChip->getHuid() ); + i_chip->getHuid() ); break; } @@ -733,11 +698,11 @@ uint32_t setBgScrubThresholds<TYPE_MBA>( ExtensibleChip * i_chip, //------------------------------------------------------------------------------ -template<> -uint32_t didCmdStopOnLastAddr<TYPE_MBA>( ExtensibleChip * i_chip, - AddrRangeType i_rangeType, - bool & o_stoppedOnLastAddr, - bool i_rowRepair ) +template<TARGETING::TYPE T> +uint32_t didCmdStopOnLastAddr( ExtensibleChip * i_chip, + AddrRangeType i_rangeType, + bool & o_stoppedOnLastAddr, + bool i_rowRepair ) { #define PRDF_FUNC "[didCmdStopOnLastAddr] " @@ -749,7 +714,7 @@ uint32_t didCmdStopOnLastAddr<TYPE_MBA>( ExtensibleChip * i_chip, { // Get the current address. MemAddr curAddr; - o_rc = getMemMaintAddr<TYPE_MBA>( i_chip, curAddr ); + o_rc = getMemMaintAddr<T>( i_chip, curAddr ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "getMemMaintAddr(0x%08x) failed", @@ -759,7 +724,7 @@ uint32_t didCmdStopOnLastAddr<TYPE_MBA>( ExtensibleChip * i_chip, // Get the end address of the current rank. MemAddr junk, endAddr; - o_rc = getMemAddrRange<TYPE_MBA>( i_chip, curAddr.getRank(), junk, + o_rc = getMemAddrRange<T>( i_chip, curAddr.getRank(), junk, endAddr, i_rangeType ); if ( SUCCESS != o_rc ) { @@ -784,7 +749,16 @@ uint32_t didCmdStopOnLastAddr<TYPE_MBA>( ExtensibleChip * i_chip, #undef PRDF_FUNC } - +template +uint32_t didCmdStopOnLastAddr<TYPE_MBA>( ExtensibleChip * i_chip, + AddrRangeType i_rangeType, + bool & o_stoppedOnLastAddr, + bool i_rowRepair ); +template +uint32_t didCmdStopOnLastAddr<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + AddrRangeType i_rangeType, + bool & o_stoppedOnLastAddr, + bool i_rowRepair ); //------------------------------------------------------------------------------ } // end namespace PRDF diff --git a/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr.C b/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr.C index f86110458..5d310c51b 100644 --- a/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr.C +++ b/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr.C @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2016,2018 */ +/* Contributors Listed Below - COPYRIGHT 2016,2020 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -248,8 +248,8 @@ uint32_t __analyzeCmdComplete<TYPE_MCBIST>( ExtensibleChip * i_chip, do { // Get all ports in which the command was run. - std::vector<ExtensibleChip *> portList; - o_rc = getMcbistMaintPort( i_chip, portList ); + ExtensibleChipList portList; + o_rc = getMcbistMaintPort<TYPE_MCBIST>( i_chip, portList ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "getMcbistMaintPort(0x%08x) failed", @@ -291,6 +291,43 @@ uint32_t __analyzeCmdComplete<TYPE_MCBIST>( ExtensibleChip * i_chip, } template<> +uint32_t __analyzeCmdComplete<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + TdRankListEntry & o_stoppedRank, + const MemAddr & i_addr, + bool & o_errorsFound, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + #define PRDF_FUNC "[__analyzeCmdComplete] " + + uint32_t o_rc = SUCCESS; + + o_errorsFound = false; + + do + { + // Update iv_stoppedRank. + o_stoppedRank = __getStopRank<TYPE_OCMB_CHIP>( i_chip, i_addr ); + + // Check the OCMB for ECC errors. + bool errorsFound; + o_rc = __checkEcc<TYPE_OCMB_CHIP>( i_chip, i_addr, errorsFound, io_sc ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "__checkEcc<TYPE_OCMB_CHIP>(0x%08x) failed", + i_chip->getHuid() ); + break; + } + + if ( errorsFound ) o_errorsFound = true; + + } while (0); + + return o_rc; + + #undef PRDF_FUNC +} + +template<> uint32_t __analyzeCmdComplete<TYPE_MBA>( ExtensibleChip * i_chip, TdRankListEntry & o_stoppedRank, const MemAddr & i_addr, @@ -346,7 +383,7 @@ uint32_t MemTdCtlr<T>::analyzeCmdComplete( bool & o_errorsFound, // of in defaultStep() because a TD procedure could have been run // before defaultStep() and it is possible that canResumeBgScrub() // could give as a false positive in that case. - o_rc = canResumeBgScrub( iv_resumeBgScrub ); + o_rc = canResumeBgScrub( iv_resumeBgScrub, io_sc ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "canResumeBgScrub(0x%08x) failed", @@ -397,9 +434,15 @@ void MemTdCtlr<T>::collectStateCaptureData( STEP_CODE_DATA_STRUCT & io_sc, // Get the version to use. uint8_t version = TD_CTLR_DATA::VERSION_1; + bool isNimbus = false; if ( MODEL_NIMBUS == getChipModel(getMasterProc()) ) { version = TD_CTLR_DATA::VERSION_2; + isNimbus = true; + } + else if ( MODEL_AXONE == getChipModel(getMasterProc()) ) + { + version = TD_CTLR_DATA::VERSION_2; } // Get the IPL state. @@ -443,6 +486,11 @@ void MemTdCtlr<T>::collectStateCaptureData( STEP_CODE_DATA_STRUCT & io_sc, if ( TD_CTLR_DATA::VERSION_2 == version ) { curPort = iv_curProcedure->getChip()->getPos() % MAX_MCA_PER_MCBIST; + if ( !isNimbus ) + { + TargetHandle_t portTrgt = iv_curProcedure->getChip()->getTrgt(); + curPort = portTrgt->getAttr<ATTR_REL_POS>(); + } } } @@ -475,6 +523,11 @@ void MemTdCtlr<T>::collectStateCaptureData( STEP_CODE_DATA_STRUCT & io_sc, if ( TD_CTLR_DATA::VERSION_2 == version ) { itPort = queue[n]->getChip()->getPos() % MAX_MCA_PER_MCBIST; + if ( !isNimbus ) + { + TargetHandle_t portTrgt = queue[n]->getChip()->getTrgt(); + itPort = portTrgt->getAttr<ATTR_REL_POS>(); + } } bsb.setFieldJustify( pos, 3, itMrnk ); pos+=3; @@ -502,6 +555,7 @@ void MemTdCtlr<T>::collectStateCaptureData( STEP_CODE_DATA_STRUCT & io_sc, // Avoid linker errors with the template. template class MemTdCtlr<TYPE_MCBIST>; template class MemTdCtlr<TYPE_MBA>; +template class MemTdCtlr<TYPE_OCMB_CHIP>; //------------------------------------------------------------------------------ diff --git a/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr.H b/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr.H index 332109b48..da969e2c1 100644 --- a/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr.H +++ b/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr.H @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2016,2018 */ +/* Contributors Listed Below - COPYRIGHT 2016,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -54,14 +54,14 @@ class MemTdCtlr /** * @brief Constructor * - * This contructor will only be called in the MCBIST or MBA data bundle, - * which already checks for a valid type. + * This contructor will only be called in the MCBIST, MBA, or OCMB data + * bundle, which already checks for a valid type. * * Need to initialize iv_stoppedRank to a valid entry in iv_rankList. Use * the last entry in the list so that the 'next' rank is the first entry * in the list. * - * @param i_chip An MCBIST or MBA chip. + * @param i_chip An MCBIST, MBA, or OCMB chip. */ explicit MemTdCtlr( ExtensibleChip * i_chip ) : iv_chip( i_chip ), iv_rankList( i_chip ), @@ -122,7 +122,7 @@ class MemTdCtlr /** * @brief Bans TPS on the given rank. Any attempts to add a TPS procedure * to the queue for this rank will be ignored. - * @param i_chip MCA or MBA chip. + * @param i_chip MCA, MBA, or OCMB chip. * @param i_rank The target slave rank. */ void banTps( ExtensibleChip * i_chip, const MemRank & i_rank ) @@ -294,15 +294,17 @@ class MemTdCtlr /** * @param o_canResume True, if background scrubbing can be resumed. False, * if a new background scrub command must be started. + * @param io_sc The step code data struct. * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. */ - uint32_t canResumeBgScrub( bool & o_canResume ); + uint32_t canResumeBgScrub( bool & o_canResume, + STEP_CODE_DATA_STRUCT & io_sc ); #endif private: // instance variables - /** An MCBIST or MBA chip associated with this TD controller. */ + /** An MCBIST, MBA, or OCMB chip associated with this TD controller. */ ExtensibleChip * const iv_chip; /** The TD queue that contains all of the pending TD procedures. */ diff --git a/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_ipl.C b/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_ipl.C index ea04d2964..401a48042 100644 --- a/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_ipl.C +++ b/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_ipl.C @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2016,2018 */ +/* Contributors Listed Below - COPYRIGHT 2016,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -160,6 +160,14 @@ bool __mnfgCeCheck<TYPE_MCA>( uint32_t i_eccAttns ) } template<> inline +bool __mnfgCeCheck<TYPE_OCMB_CHIP>( uint32_t i_eccAttns ) +{ + return ( ( 0 != (i_eccAttns & MAINT_HARD_NCE_ETE) ) && + ( (0 != (i_eccAttns & MAINT_NCE)) || + (0 != (i_eccAttns & MAINT_TCE)) ) ); +} + +template<> inline bool __mnfgCeCheck<TYPE_MBA>( uint32_t i_eccAttns ) { return ( 0 != (i_eccAttns & MAINT_HARD_NCE_ETE) ); @@ -251,12 +259,18 @@ template uint32_t __checkEcc<TYPE_MBA>( ExtensibleChip * i_chip, const MemAddr & i_addr, bool & o_errorsFound, STEP_CODE_DATA_STRUCT & io_sc ); +template +uint32_t __checkEcc<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemAddr & i_addr, + bool & o_errorsFound, + STEP_CODE_DATA_STRUCT & io_sc ); //------------------------------------------------------------------------------ // Avoid linker errors with the template. template class MemTdCtlr<TYPE_MCBIST>; template class MemTdCtlr<TYPE_MBA>; +template class MemTdCtlr<TYPE_OCMB_CHIP>; //------------------------------------------------------------------------------ diff --git a/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_rt.C b/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_rt.C index d52ef2d1d..5565e217f 100644 --- a/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_rt.C +++ b/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_rt.C @@ -107,6 +107,36 @@ void __recaptureRegs<TYPE_MCBIST>( STEP_CODE_DATA_STRUCT & io_sc, } template<> +void __recaptureRegs<TYPE_OCMB_CHIP>( STEP_CODE_DATA_STRUCT & io_sc, + ExtensibleChip * i_chip ) +{ + #define PRDF_FUNC "[__recaptureRegs<TYPE_OCMB_CHIP>] " + + RegDataCache & cache = RegDataCache::getCachedRegisters(); + CaptureData & cd = io_sc.service_data->GetCaptureData(); + + // refresh and recapture the ocmb registers + const char * ocmbRegs[] = + { + "MCBISTFIR", "RDFFIR", "MBSEC0", "MBSEC1", "OCMB_MBSSYMEC0", + "OCMB_MBSSYMEC1", "OCMB_MBSSYMEC2", "OCMB_MBSSYMEC3", + "OCMB_MBSSYMEC4", "OCMB_MBSSYMEC5", "OCMB_MBSSYMEC6", + "OCMB_MBSSYMEC7", "OCMB_MBSSYMEC8", "MBSMSEC", "MCBMCAT", + }; + + for ( uint32_t i = 0; i < sizeof(ocmbRegs)/sizeof(char*); i++ ) + { + SCAN_COMM_REGISTER_CLASS * reg = + i_chip->getRegister( ocmbRegs[i] ); + cache.flush( i_chip, reg ); + } + + i_chip->CaptureErrorData( cd, Util::hashString("MaintCmdRegs_ocmb") ); + + #undef PRDF_FUNC +} + +template<> void __recaptureRegs<TYPE_MBA>( STEP_CODE_DATA_STRUCT & io_sc, ExtensibleChip * i_chip ) { @@ -283,7 +313,7 @@ uint32_t MemTdCtlr<T>::defaultStep( STEP_CODE_DATA_STRUCT & io_sc ) PRDF_TRAC( PRDF_FUNC "Calling resumeBgScrub<T>(0x%08x)", iv_chip->getHuid() ); - o_rc = resumeBgScrub<T>( iv_chip ); + o_rc = resumeBgScrub<T>( iv_chip, io_sc ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "resumeBgScrub<T>(0x%08x) failed", @@ -358,9 +388,48 @@ uint32_t __handleNceEte( ExtensibleChip * i_chip, uint32_t count = symData.size(); switch ( T ) { - case TYPE_MCA: PRDF_ASSERT( 1 <= count && count <= 2 ); break; - case TYPE_MBA: PRDF_ASSERT( 1 == count ); break; - default: PRDF_ASSERT( false ); + case TYPE_MCA: + { + PRDF_ASSERT( 1 <= count && count <= 2 ); + // Increment the CE counter and store the rank we're on, + // reset the UE and CE counts if we have stopped on a new rank. + ExtensibleChip * mcb = getConnectedParent(i_chip, TYPE_MCBIST); + McbistDataBundle * mcbdb = getMcbistDataBundle(mcb); + if ( mcbdb->iv_ceUeRank != i_addr.getRank() ) + { + mcbdb->iv_ceStopCounter.reset(); + mcbdb->iv_ueStopCounter.reset(); + } + mcbdb->iv_ceStopCounter.inc( io_sc ); + mcbdb->iv_ceUeRank = i_addr.getRank(); + + break; + } + case TYPE_MBA: + { + PRDF_ASSERT( 1 == count ); + break; + } + case TYPE_OCMB_CHIP: + { + PRDF_ASSERT( 1 <= count && count <= 2 ); + // Increment the UE counter and store the rank we're on, + // reset the UE and CE counts if we have stopped on a new rank. + OcmbDataBundle * ocmbdb = getOcmbDataBundle(i_chip); + if ( ocmbdb->iv_ceUeRank != i_addr.getRank() ) + { + ocmbdb->iv_ceStopCounter.reset(); + ocmbdb->iv_ueStopCounter.reset(); + } + ocmbdb->iv_ceStopCounter.inc( io_sc ); + ocmbdb->iv_ceUeRank = i_addr.getRank(); + + break; + } + default: + { + PRDF_ASSERT( false ); + } } for ( auto & d : symData ) @@ -408,6 +477,14 @@ uint32_t __handleSoftInterCeEte<TYPE_MCA>( ExtensibleChip * i_chip, } template<> +uint32_t __handleSoftInterCeEte<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemAddr & i_addr, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + return __handleNceEte<TYPE_OCMB_CHIP>( i_chip, i_addr, io_sc ); +} + +template<> uint32_t __handleSoftInterCeEte<TYPE_MBA>( ExtensibleChip * i_chip, const MemAddr & i_addr, STEP_CODE_DATA_STRUCT & io_sc ) @@ -480,6 +557,52 @@ uint32_t __handleRceEte<TYPE_MCA>( ExtensibleChip * i_chip, } template<> +uint32_t __handleRceEte<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemRank & i_rank, + bool & o_errorsFound, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + #define PRDF_FUNC "[__handleRceEte] " + + uint32_t o_rc = SUCCESS; + + // Should only get this attention in MNFG mode. + PRDF_ASSERT( mfgMode() ); + + do + { + // The RCE ETE attention could be from IUE, IMPE, or IRCD. Need to check + // RDFFIR[37] to determine if there was at least one IUE. + SCAN_COMM_REGISTER_CLASS * fir = i_chip->getRegister( "RDFFIR" ); + o_rc = fir->Read(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "Read() failed on RDFFIR: i_chip=0x%08x", + i_chip->getHuid() ); + break; + } + if ( !fir->IsBitSet(37) ) break; // nothing else to do + + // Handle the IUE. + o_errorsFound = true; + io_sc.service_data->AddSignatureList( i_chip->getTrgt(), + PRDFSIG_MaintIUE ); + o_rc = MemEcc::handleMemIue<TYPE_OCMB_CHIP>( i_chip, i_rank, io_sc ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "analyzeMaintIue(0x%08x) failed", + i_chip->getHuid() ); + break; + } + + } while (0); + + return o_rc; + + #undef PRDF_FUNC +} + +template<> uint32_t __handleRceEte<TYPE_MBA>( ExtensibleChip * i_chip, const MemRank & i_rank, bool & o_errorsFound, STEP_CODE_DATA_STRUCT & io_sc ) @@ -698,6 +821,11 @@ template uint32_t __checkEcc<TYPE_MBA>( ExtensibleChip * i_chip, const MemAddr & i_addr, bool & o_errorsFound, STEP_CODE_DATA_STRUCT & io_sc ); +template +uint32_t __checkEcc<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemAddr & i_addr, + bool & o_errorsFound, + STEP_CODE_DATA_STRUCT & io_sc ); //------------------------------------------------------------------------------ @@ -786,6 +914,76 @@ uint32_t MemTdCtlr<TYPE_MCBIST>::unmaskEccAttns() //------------------------------------------------------------------------------ template<> +uint32_t MemTdCtlr<TYPE_OCMB_CHIP>::maskEccAttns() +{ + #define PRDF_FUNC "[MemTdCtlr<TYPE_OCMB_CHIP>::maskEccAttns] " + + uint32_t o_rc = SUCCESS; + + SCAN_COMM_REGISTER_CLASS * mask = iv_chip->getRegister( "RDFFIR_MASK_OR" ); + + mask->clearAllBits(); + mask->SetBit(8); // Mainline read NCE + mask->SetBit(9); // Mainline read TCE + + o_rc = mask->Write(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "Write() failed on RDFFIR_MASK_OR" ); + } + + return o_rc; + + #undef PRDF_FUNC +} + +//------------------------------------------------------------------------------ + +template<> +uint32_t MemTdCtlr<TYPE_OCMB_CHIP>::unmaskEccAttns() +{ + #define PRDF_FUNC "[MemTdCtlr<TYPE_OCMB_CHIP>::unmaskEccAttns] " + + uint32_t o_rc = SUCCESS; + + // Memory CEs were masked at the beginning of the TD procedure, so + // clear and unmask them. Also, it is possible that memory UEs have + // thresholded so clear and unmask them as well. + + SCAN_COMM_REGISTER_CLASS * fir = iv_chip->getRegister( "RDFFIR_AND" ); + SCAN_COMM_REGISTER_CLASS * mask = iv_chip->getRegister( "RDFFIR_MASK_AND" ); + + fir->setAllBits(); mask->setAllBits(); + + // Do not unmask NCE and TCE attentions if they have been permanently + // masked due to certain TPS conditions. + if ( !(getOcmbDataBundle(iv_chip)->iv_maskMainlineNceTce) ) + { + fir->ClearBit(8); mask->ClearBit(8); // Mainline read NCE + fir->ClearBit(9); mask->ClearBit(9); // Mainline read TCE + } + fir->ClearBit(14); mask->ClearBit(14); // Mainline read UE + + o_rc = fir->Write(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "Write() failed on RDFFIR_AND" ); + } + + o_rc = mask->Write(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "Write() failed on RDFFIR_MASK_AND" ); + } + + return o_rc; + + #undef PRDF_FUNC +} + +//------------------------------------------------------------------------------ + +template<> uint32_t MemTdCtlr<TYPE_MBA>::maskEccAttns() { #define PRDF_FUNC "[MemTdCtlr<TYPE_MBA>::maskEccAttns] " @@ -887,6 +1085,13 @@ SCAN_COMM_REGISTER_CLASS * __getEccFirAnd<TYPE_MCA>( ExtensibleChip * i_chip ) } template<> +SCAN_COMM_REGISTER_CLASS * __getEccFirAnd<TYPE_OCMB_CHIP>( + ExtensibleChip * i_chip ) +{ + return i_chip->getRegister( "RDFFIR_AND" ); +} + +template<> SCAN_COMM_REGISTER_CLASS * __getEccFirAnd<TYPE_MBA>( ExtensibleChip * i_chip ) { ExtensibleChip * membChip = getConnectedParent( i_chip, TYPE_MEMBUF ); @@ -1009,6 +1214,45 @@ uint32_t MemTdCtlr<TYPE_MCBIST>::initialize() } template<> +uint32_t MemTdCtlr<TYPE_OCMB_CHIP>::initialize() +{ + #define PRDF_FUNC "[MemTdCtlr<TYPE_OCMB_CHIP>::initialize] " + + uint32_t o_rc = SUCCESS; + + do + { + if ( iv_initialized ) break; // nothing to do + + // Unmask the fetch attentions just in case there were masked during a + // TD procedure prior to a reset/reload. + o_rc = unmaskEccAttns(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "unmaskEccAttns() failed" ); + break; + } + + // Find all unverified chip marks. + o_rc = __findChipMarks<TYPE_OCMB_CHIP>( iv_rankList ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "__findChipMarks() failed on 0x%08x", + iv_chip->getHuid() ); + break; + } + + // At this point, the TD controller is initialized. + iv_initialized = true; + + } while (0); + + return o_rc; + + #undef PRDF_FUNC +} + +template<> uint32_t MemTdCtlr<TYPE_MBA>::initialize() { #define PRDF_FUNC "[MemTdCtlr<TYPE_MBA>::initialize] " @@ -1162,6 +1406,118 @@ uint32_t MemTdCtlr<TYPE_MCBIST>::handleRrFo() //------------------------------------------------------------------------------ template<> +uint32_t MemTdCtlr<TYPE_OCMB_CHIP>::handleRrFo() +{ + #define PRDF_FUNC "[MemTdCtlr<TYPE_OCMB_CHIP>::handleRrFo] " + + uint32_t o_rc = SUCCESS; + + do + { + // Check if maintenance command complete attention is set. + SCAN_COMM_REGISTER_CLASS * mcbistfir = + iv_chip->getRegister("MCBISTFIR"); + o_rc = mcbistfir->Read(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "Read() failed on MCBISTFIR"); + break; + } + + // If there is a command complete attention, nothing to do, break out. + if ( mcbistfir->IsBitSet(10) ) + break; + + + // Check if a command is not running. + // If bit 0 of MCB_CNTLSTAT is on, a mcbist run is in progress. + SCAN_COMM_REGISTER_CLASS * mcb_cntlstat = + iv_chip->getRegister("MCB_CNTLSTAT"); + o_rc = mcb_cntlstat->Read(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "Read() failed on MCB_CNTLSTAT" ); + break; + } + + // If a command is not running, set command complete attn, break. + if ( !mcb_cntlstat->IsBitSet(0) ) + { + SCAN_COMM_REGISTER_CLASS * mcbistfir_or = + iv_chip->getRegister("MCBISTFIR_OR"); + mcbistfir_or->SetBit( 10 ); + + mcbistfir_or->Write(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "Write() failed on MCBISTFIR_OR" ); + } + break; + } + + // Check if there are unverified chip marks. + std::vector<TdRankListEntry> vectorList = iv_rankList.getList(); + + for ( auto & entry : vectorList ) + { + ExtensibleChip * ocmbChip = entry.getChip(); + MemRank rank = entry.getRank(); + + // Get the chip mark + MemMark chipMark; + o_rc = MarkStore::readChipMark<TYPE_OCMB_CHIP>( ocmbChip, rank, + chipMark ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "readChipMark<TYPE_OCMB_CHIP>(0x%08x,%d) " + "failed", ocmbChip->getHuid(), rank.getMaster() ); + break; + } + + if ( !chipMark.isValid() ) continue; // no chip mark present + + // Get the DQ Bitmap data. + MemDqBitmap dqBitmap; + + o_rc = getBadDqBitmap( ocmbChip->getTrgt(), rank, dqBitmap ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "getBadDqBitmap(0x%08x, %d)", + ocmbChip->getHuid(), rank.getMaster() ); + break; + } + + // Check if the chip mark is verified or not. + bool cmVerified = false; + o_rc = dqBitmap.isChipMark( chipMark.getSymbol(), cmVerified ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "dqBitmap.isChipMark failed." ); + break; + } + + // If there are any unverified chip marks, stop the command, break. + if ( !cmVerified ) + { + o_rc = stopBgScrub<TYPE_OCMB_CHIP>( iv_chip ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "stopBgScrub<TYPE_OCMB_CHIP>(0x%08x) " + "failed", iv_chip->getHuid() ); + } + break; + } + } + + } while (0); + + return o_rc; + #undef PRDF_FUNC +} + +//------------------------------------------------------------------------------ + +template<> uint32_t MemTdCtlr<TYPE_MBA>::handleRrFo() { #define PRDF_FUNC "[MemTdCtlr<TYPE_MBA>::handleRrFo] " @@ -1289,7 +1645,8 @@ uint32_t MemTdCtlr<TYPE_MBA>::handleRrFo() //------------------------------------------------------------------------------ template<> -uint32_t MemTdCtlr<TYPE_MCBIST>::canResumeBgScrub( bool & o_canResume ) +uint32_t MemTdCtlr<TYPE_MCBIST>::canResumeBgScrub( bool & o_canResume, + STEP_CODE_DATA_STRUCT & io_sc ) { #define PRDF_FUNC "[MemTdCtlr<TYPE_MCBIST>::canResumeBgScrub] " @@ -1305,21 +1662,124 @@ uint32_t MemTdCtlr<TYPE_MCBIST>::canResumeBgScrub( bool & o_canResume ) // can use the stop conditions, which should be unique for background scrub, // to determine if it has been configured. - SCAN_COMM_REGISTER_CLASS * reg = iv_chip->getRegister( "MBSTR" ); - o_rc = reg->Read(); - if ( SUCCESS != o_rc ) + do { - PRDF_ERR( PRDF_FUNC "Read() failed on MBSTR: iv_chip=0x%08x", - iv_chip->getHuid() ); - } - else if ( 0xf != reg->GetBitFieldJustified(0,4) && // NCE int TH - 0xf != reg->GetBitFieldJustified(4,4) && // NCE soft TH - 0xf != reg->GetBitFieldJustified(8,4) && // NCE hard TH - reg->IsBitSet(34) && // pause on MPE - reg->IsBitSet(35) ) // pause on UE + SCAN_COMM_REGISTER_CLASS * reg = iv_chip->getRegister( "MBSTR" ); + o_rc = reg->Read(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "Read() failed on MBSTR: iv_chip=0x%08x", + iv_chip->getHuid() ); + break; + } + // Note: The stop conditions for background scrubbing can now be + // variable depending on whether we have hit threshold for the number + // of UEs or CEs that we have stopped on on a rank. + + // If we haven't hit CE or UE threshold, check the CE stop conditions + if ( !getMcbistDataBundle(iv_chip)->iv_ceStopCounter.thReached(io_sc) && + !getMcbistDataBundle(iv_chip)->iv_ueStopCounter.thReached(io_sc) ) + { + // If the stop conditions aren't set, just break out. + if ( !(0xf != reg->GetBitFieldJustified(0,4) && // NCE int TH + 0xf != reg->GetBitFieldJustified(4,4) && // NCE soft TH + 0xf != reg->GetBitFieldJustified(8,4)) ) // NCE hard TH + { + break; + } + + } + + // If we haven't hit UE threshold yet, check the UE stop condition + if ( !getMcbistDataBundle(iv_chip)->iv_ueStopCounter.thReached(io_sc) ) + { + // If the stop condition isn't set, just break out + if ( !reg->IsBitSet(35) ) // pause on UE + { + break; + } + } + + // Need to check the stop on mpe stop condition regardless of whether + // we hit the UE or CE threshold. + if ( reg->IsBitSet(34) ) // pause on MPE + { + // If we reach here, all the stop conditions are set for background + // scrub, so we can resume. + o_canResume = true; + } + }while(0); + + return o_rc; + + #undef PRDF_FUNC +} + +template<> +uint32_t MemTdCtlr<TYPE_OCMB_CHIP>::canResumeBgScrub( bool & o_canResume, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + #define PRDF_FUNC "[MemTdCtlr<TYPE_OCMB_CHIP>::canResumeBgScrub] " + + uint32_t o_rc = SUCCESS; + + o_canResume = false; + + // It is possible that we were running a TD procedure and the PRD service + // was reset. Therefore, we must check if background scrubbing was actually + // configured. There really is not a good way of doing this. A scrub command + // is a scrub command the only difference is the speed. Unfortunately, that + // speed can change depending on how the hardware team tunes it. For now, we + // can use the stop conditions, which should be unique for background scrub, + // to determine if it has been configured. + + do { - o_canResume = true; - } + SCAN_COMM_REGISTER_CLASS * reg = iv_chip->getRegister( "MBSTR" ); + o_rc = reg->Read(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "Read() failed on MBSTR: iv_chip=0x%08x", + iv_chip->getHuid() ); + break; + } + // Note: The stop conditions for background scrubbing can now be + // variable depending on whether we have hit threshold for the number + // of UEs or CEs that we have stopped on on a rank. + + // If we haven't hit CE or UE threshold, check the CE stop conditions + if ( !getOcmbDataBundle(iv_chip)->iv_ceStopCounter.thReached(io_sc) && + !getOcmbDataBundle(iv_chip)->iv_ueStopCounter.thReached(io_sc) ) + { + // If the stop conditions aren't set, just break out. + if ( !(0xf != reg->GetBitFieldJustified(0,4) && // NCE int TH + 0xf != reg->GetBitFieldJustified(4,4) && // NCE soft TH + 0xf != reg->GetBitFieldJustified(8,4)) ) // NCE hard TH + { + break; + } + + } + + // If we haven't hit UE threshold yet, check the UE stop condition + if ( !getOcmbDataBundle(iv_chip)->iv_ueStopCounter.thReached(io_sc) ) + { + // If the stop condition isn't set, just break out + if ( !reg->IsBitSet(35) ) // pause on UE + { + break; + } + } + + // Need to check the stop on mpe stop condition regardless of whether + // we hit the UE or CE threshold. + if ( reg->IsBitSet(34) ) // pause on MPE + { + // If we reach here, all the stop conditions are set for background + // scrub, so we can resume. + o_canResume = true; + } + }while(0); return o_rc; @@ -1327,7 +1787,8 @@ uint32_t MemTdCtlr<TYPE_MCBIST>::canResumeBgScrub( bool & o_canResume ) } template<> -uint32_t MemTdCtlr<TYPE_MBA>::canResumeBgScrub( bool & o_canResume ) +uint32_t MemTdCtlr<TYPE_MBA>::canResumeBgScrub( bool & o_canResume, + STEP_CODE_DATA_STRUCT & io_sc ) { #define PRDF_FUNC "[MemTdCtlr<TYPE_MBA>::canResumeBgScrub] " @@ -1365,6 +1826,7 @@ uint32_t MemTdCtlr<TYPE_MBA>::canResumeBgScrub( bool & o_canResume ) // Avoid linker errors with the template. template class MemTdCtlr<TYPE_MCBIST>; template class MemTdCtlr<TYPE_MBA>; +template class MemTdCtlr<TYPE_OCMB_CHIP>; //------------------------------------------------------------------------------ diff --git a/src/usr/diag/prdf/plat/mem/prdfMemTdRankList.H b/src/usr/diag/prdf/plat/mem/prdfMemTdRankList.H index e61389ea2..2e833a12a 100644 --- a/src/usr/diag/prdf/plat/mem/prdfMemTdRankList.H +++ b/src/usr/diag/prdf/plat/mem/prdfMemTdRankList.H @@ -80,8 +80,8 @@ class TdRankListEntry private: - ExtensibleChip * iv_chip = nullptr; ///< MCA, MBA, or MEM_PORT chip. - MemRank iv_rank = MemRank(0); ///< Any rank on the MCA/MBA/MEM_PORT + ExtensibleChip * iv_chip = nullptr; ///< MCA, MBA, or OCMB chip. + MemRank iv_rank = MemRank(0); ///< Any rank on the MCA/MBA/OCMB }; /** @@ -95,7 +95,7 @@ class TdRankList /** * @brief Constructor. - * @param MCBIST or MBA chip. + * @param MCBIST, OCMB, or MBA chip. */ explicit TdRankList( ExtensibleChip * i_chip ); @@ -191,17 +191,13 @@ inline TdRankList<TARGETING::TYPE_OCMB_CHIP>::TdRankList( PRDF_ASSERT( TYPE_OCMB_CHIP == i_chip->getType() ); - ExtensibleChipList memPortChipList = getConnected( i_chip, TYPE_MEM_PORT ); - for ( auto & memPortChip : memPortChipList ) - { - std::vector<MemRank> rankList; - getSlaveRanks<TYPE_MEM_PORT>( memPortChip->getTrgt(), rankList ); - PRDF_ASSERT( !rankList.empty() ); // target configured with no ranks + std::vector<MemRank> rankList; + getSlaveRanks<TYPE_OCMB_CHIP>( i_chip->getTrgt(), rankList ); + PRDF_ASSERT( !rankList.empty() ); // target configured with no ranks - for ( auto & rank : rankList ) - { - iv_list.push_back( TdRankListEntry(memPortChip, rank) ); - } + for ( auto & rank : rankList ) + { + iv_list.push_back( TdRankListEntry(i_chip, rank) ); } } diff --git a/src/usr/diag/prdf/plat/mem/prdfMemTps_ipl.C b/src/usr/diag/prdf/plat/mem/prdfMemTps_ipl.C index de3e62e23..64eb74648 100644 --- a/src/usr/diag/prdf/plat/mem/prdfMemTps_ipl.C +++ b/src/usr/diag/prdf/plat/mem/prdfMemTps_ipl.C @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2016,2018 */ +/* Contributors Listed Below - COPYRIGHT 2016,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -36,6 +36,8 @@ #include <prdfP9McaExtraSig.H> #include <prdfPlatServices.H> +#include <hwp_wrappers.H> + using namespace TARGETING; namespace PRDF @@ -125,6 +127,12 @@ bool __iueCheck<TYPE_MCA>( uint32_t i_eccAttns ) } template<> inline +bool __iueCheck<TYPE_OCMB_CHIP>( uint32_t i_eccAttns ) +{ + return ( 0 != (i_eccAttns & MAINT_IUE) ); +} + +template<> inline bool __iueCheck<TYPE_MBA>( uint32_t i_eccAttns ) { // IUES are reported via RCE ETE on Centaur @@ -252,13 +260,15 @@ uint32_t TpsEvent<TYPE_MCA>::startCmd() uint32_t o_rc = SUCCESS; + #ifndef CONFIG_AXONE + // We don't need to set any stop-on-error conditions or thresholds for // soft/inter/hard CEs during Memory Diagnostics. The design is to let the // command continue to the end of the rank and we do diagnostics on the // CE counts found in the per-symbol counters. Therefore, all we need to do // is tell the hardware which CE types to count. - mss::mcbist::stop_conditions stopCond; + mss::mcbist::stop_conditions<mss::mc_type::NIMBUS> stopCond; switch ( iv_phase ) { @@ -284,6 +294,8 @@ uint32_t TpsEvent<TYPE_MCA>::startCmd() iv_chip->getHuid(), getKey() ); } + #endif + return o_rc; #undef PRDF_FUNC @@ -362,11 +374,66 @@ uint32_t TpsEvent<TYPE_MBA>::startCmd() #undef PRDF_FUNC } +//############################################################################## +// +// Specializations for OCMB +// +//############################################################################## + +template<> +uint32_t TpsEvent<TYPE_OCMB_CHIP>::startCmd() +{ + #define PRDF_FUNC "[TpsEvent::startCmd] " + + uint32_t o_rc = SUCCESS; + + #ifdef CONFIG_AXONE + + // We don't need to set any stop-on-error conditions or thresholds for + // soft/inter/hard CEs during Memory Diagnostics. The design is to let the + // command continue to the end of the rank and we do diagnostics on the + // CE counts found in the per-symbol counters. Therefore, all we need to do + // is tell the hardware which CE types to count. + + mss::mcbist::stop_conditions<mss::mc_type::EXPLORER> stopCond; + + switch ( iv_phase ) + { + case TD_PHASE_1: + // Set the per symbol counters to count only soft/inter CEs. + stopCond.set_nce_soft_symbol_count_enable( mss::ON); + stopCond.set_nce_inter_symbol_count_enable(mss::ON); + break; + + case TD_PHASE_2: + // Set the per symbol counters to count only hard CEs. + stopCond.set_nce_hard_symbol_count_enable(mss::ON); + break; + + default: PRDF_ASSERT( false ); // invalid phase + } + + // Start the time based scrub procedure on this slave rank. + o_rc = startTdScrub<TYPE_OCMB_CHIP>(iv_chip, iv_rank, SLAVE_RANK, stopCond); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "startTdScrub(0x%08x,0x%2x) failed", + iv_chip->getHuid(), getKey() ); + } + + #endif + + return o_rc; + + #undef PRDF_FUNC +} + //------------------------------------------------------------------------------ // Avoid linker errors with the template. template class TpsEvent<TYPE_MCA>; template class TpsEvent<TYPE_MBA>; +template class TpsEvent<TYPE_OCMB_CHIP>; //------------------------------------------------------------------------------ diff --git a/src/usr/diag/prdf/plat/mem/prdfMemTps_rt.C b/src/usr/diag/prdf/plat/mem/prdfMemTps_rt.C index 187b9b28d..8b3b220c6 100644 --- a/src/usr/diag/prdf/plat/mem/prdfMemTps_rt.C +++ b/src/usr/diag/prdf/plat/mem/prdfMemTps_rt.C @@ -37,6 +37,8 @@ #include <prdfP9McaExtraSig.H> #include <prdfTargetServices.H> +#include <hwp_wrappers.H> + using namespace TARGETING; namespace PRDF @@ -54,6 +56,13 @@ static const char *mcbCeStatReg[CE_REGS_PER_PORT] = "MCB_MBSSYMEC6", "MCB_MBSSYMEC7", "MCB_MBSSYMEC8" }; +static const char *ocmbCeStatReg[CE_REGS_PER_PORT] = + { + "OCMB_MBSSYMEC0", "OCMB_MBSSYMEC1", "OCMB_MBSSYMEC2", + "OCMB_MBSSYMEC3", "OCMB_MBSSYMEC4", "OCMB_MBSSYMEC5", + "OCMB_MBSSYMEC6", "OCMB_MBSSYMEC7", "OCMB_MBSSYMEC8" + }; + //------------------------------------------------------------------------------ template <TARGETING::TYPE T> @@ -66,6 +75,13 @@ TpsFalseAlarm * __getTpsFalseAlarmCounter<TYPE_MCA>( ExtensibleChip * i_chip ) } template<> +TpsFalseAlarm * __getTpsFalseAlarmCounter<TYPE_OCMB_CHIP>( + ExtensibleChip * i_chip ) +{ + return getOcmbDataBundle(i_chip)->getTpsFalseAlarmCounter(); +} + +template<> TpsFalseAlarm * __getTpsFalseAlarmCounter<TYPE_MBA>( ExtensibleChip * i_chip ) { return getMbaDataBundle(i_chip)->getTpsFalseAlarmCounter(); @@ -73,6 +89,23 @@ TpsFalseAlarm * __getTpsFalseAlarmCounter<TYPE_MBA>( ExtensibleChip * i_chip ) //------------------------------------------------------------------------------ +template <TARGETING::TYPE T> +void __maskMainlineNceTces( ExtensibleChip * i_chip ); + +template<> +void __maskMainlineNceTces<TYPE_MCA>( ExtensibleChip * i_chip ) +{ + getMcaDataBundle(i_chip)->iv_maskMainlineNceTce = true; +} + +template<> +void __maskMainlineNceTces<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip ) +{ + getOcmbDataBundle(i_chip)->iv_maskMainlineNceTce = true; +} + +//------------------------------------------------------------------------------ + template<TARGETING::TYPE T> void __getNextPhase( ExtensibleChip * i_chip, const MemRank & i_rank, STEP_CODE_DATA_STRUCT & io_sc, @@ -98,12 +131,7 @@ void __getNextPhase( ExtensibleChip * i_chip, const MemRank & i_rank, //------------------------------------------------------------------------------ template<TARGETING::TYPE T> -bool __badDqCount( MemUtils::MaintSymbols i_nibbleStats, - CeCount & io_badDqCount ); - -template<> -bool __badDqCount<TYPE_MCA>( MemUtils::MaintSymbols i_nibbleStats, - CeCount & io_badDqCount ) +bool __badDqCount(MemUtils::MaintSymbols i_nibbleStats, CeCount & io_badDqCount) { bool badDqFound = false; @@ -142,11 +170,7 @@ bool __badDqCount<TYPE_MCA>( MemUtils::MaintSymbols i_nibbleStats, template<TARGETING::TYPE T> bool __badChipCount( MemUtils::MaintSymbols i_nibbleStats, - CeCount & io_badChipCount ); - -template<> -bool __badChipCount<TYPE_MCA>( MemUtils::MaintSymbols i_nibbleStats, - CeCount & io_badChipCount ) + CeCount & io_badChipCount ) { bool badChipFound = false; uint8_t nonZeroCount = 0; @@ -191,11 +215,7 @@ bool __badChipCount<TYPE_MCA>( MemUtils::MaintSymbols i_nibbleStats, template<TARGETING::TYPE T> void __sumAboveOneCount( MemUtils::MaintSymbols i_nibbleStats, - CeCount & io_sumAboveOneCount ); - -template<> -void __sumAboveOneCount<TYPE_MCA>( MemUtils::MaintSymbols i_nibbleStats, - CeCount & io_sumAboveOneCount ) + CeCount & io_sumAboveOneCount ) { uint8_t sum = 0; MemUtils::MaintSymbols symList; @@ -226,11 +246,7 @@ void __sumAboveOneCount<TYPE_MCA>( MemUtils::MaintSymbols i_nibbleStats, template<TARGETING::TYPE T> void __singleSymbolCount( MemUtils::MaintSymbols i_nibbleStats, - CeCount & io_singleSymCount ); - -template<> -void __singleSymbolCount<TYPE_MCA>( MemUtils::MaintSymbols i_nibbleStats, - CeCount & io_singleSymCount ) + CeCount & io_singleSymCount ) { uint8_t count = 0; bool multNonZeroSyms = false; @@ -315,12 +331,12 @@ uint32_t __updateVpdSumAboveOne( CeCount i_sumAboveOneCount, //------------------------------------------------------------------------------ -template <> -uint32_t TpsEvent<TYPE_MCA>::analyzeEccErrors( const uint32_t & i_eccAttns, - STEP_CODE_DATA_STRUCT & io_sc, - bool & o_done ) +template <TARGETING::TYPE T> +uint32_t TpsEvent<T>::analyzeEccErrors( const uint32_t & i_eccAttns, + STEP_CODE_DATA_STRUCT & io_sc, + bool & o_done ) { - #define PRDF_FUNC "[TpsEvent<TYPE_MCA>::analyzeEccErrors] " + #define PRDF_FUNC "[TpsEvent<T>::analyzeEccErrors] " uint32_t o_rc = SUCCESS; @@ -338,7 +354,7 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeEccErrors( const uint32_t & i_eccAttns, // At this point we don't actually have an address for the UE. The // best we can do is get the address in which the command stopped. MemAddr addr; - o_rc = getMemMaintAddr<TYPE_MCA>( iv_chip, addr ); + o_rc = getMemMaintAddr<T>( iv_chip, addr ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "getMemMaintAddr(0x%08x) failed", @@ -346,8 +362,8 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeEccErrors( const uint32_t & i_eccAttns, break; } - o_rc = MemEcc::handleMemUe<TYPE_MCA>( iv_chip, addr, - UE_TABLE::SCRUB_UE, io_sc ); + o_rc = MemEcc::handleMemUe<T>( iv_chip, addr, + UE_TABLE::SCRUB_UE, io_sc ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "handleMemUe(0x%08x,0x%02x) failed", @@ -357,7 +373,7 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeEccErrors( const uint32_t & i_eccAttns, // Because of the UE, any further TPS requests will likely have no // effect. So ban all subsequent requests. - MemDbUtils::banTps<TYPE_MCA>( iv_chip, addr.getRank() ); + MemDbUtils::banTps<T>( iv_chip, addr.getRank() ); // Abort this procedure because additional repairs will likely // not help (also avoids complication of having UE and MPE at @@ -371,7 +387,7 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeEccErrors( const uint32_t & i_eccAttns, io_sc.service_data->setSignature( iv_chip->getHuid(), PRDFSIG_MaintIUE ); - o_rc = MemEcc::handleMemIue<TYPE_MCA>( iv_chip, iv_rank, io_sc ); + o_rc = MemEcc::handleMemIue<T>( iv_chip, iv_rank, io_sc ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "handleMemIue(0x%08x,0x%02x) failed", @@ -397,8 +413,8 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeEccErrors( const uint32_t & i_eccAttns, io_sc.service_data->setSignature( iv_chip->getHuid(), PRDFSIG_MaintMPE ); - o_rc = MemEcc::handleMpe<TYPE_MCA>( iv_chip, iv_rank, - UE_TABLE::SCRUB_MPE, io_sc ); + o_rc = MemEcc::handleMpe<T>( iv_chip, iv_rank, + UE_TABLE::SCRUB_MPE, io_sc ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "handleMpe<T>(0x%08x, 0x%02x) failed", @@ -419,36 +435,51 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeEccErrors( const uint32_t & i_eccAttns, } +template +uint32_t TpsEvent<TYPE_MCA>::analyzeEccErrors( const uint32_t & i_eccAttns, + STEP_CODE_DATA_STRUCT & io_sc, + bool & o_done ); +template +uint32_t TpsEvent<TYPE_OCMB_CHIP>::analyzeEccErrors(const uint32_t & i_eccAttns, + STEP_CODE_DATA_STRUCT & io_sc, + bool & o_done); + //------------------------------------------------------------------------------ -template<> -uint32_t TpsEvent<TYPE_MCA>::handleFalseAlarm( STEP_CODE_DATA_STRUCT & io_sc ) +template<TARGETING::TYPE T> +uint32_t TpsEvent<T>::handleFalseAlarm( STEP_CODE_DATA_STRUCT & io_sc ) { io_sc.service_data->setSignature( iv_chip->getHuid(), PRDFSIG_TpsFalseAlarm ); // Increase false alarm counter and check threshold. - if ( __getTpsFalseAlarmCounter<TYPE_MCA>(iv_chip)->inc( iv_rank, io_sc) ) + if ( __getTpsFalseAlarmCounter<T>(iv_chip)->inc( iv_rank, io_sc) ) { io_sc.service_data->setSignature( iv_chip->getHuid(), PRDFSIG_TpsFalseAlarmTH ); // Permanently mask mainline NCEs and TCEs - getMcaDataBundle(iv_chip)->iv_maskMainlineNceTce = true; + __maskMainlineNceTces<T>( iv_chip ); } return SUCCESS; } +template +uint32_t TpsEvent<TYPE_MCA>::handleFalseAlarm( STEP_CODE_DATA_STRUCT & io_sc ); +template +uint32_t TpsEvent<TYPE_OCMB_CHIP>::handleFalseAlarm( + STEP_CODE_DATA_STRUCT & io_sc ); + //------------------------------------------------------------------------------ -template<> -uint32_t TpsEvent<TYPE_MCA>::analyzeCeSymbolCounts( CeCount i_badDqCount, +template<TARGETING::TYPE T> +uint32_t TpsEvent<T>::analyzeCeSymbolCounts( CeCount i_badDqCount, CeCount i_badChipCount, CeCount i_sumAboveOneCount, CeCount i_singleSymCount, STEP_CODE_DATA_STRUCT & io_sc ) { - #define PRDF_FUNC "[TpsEvent<TYPE_MCA>::analyzeCeSymbolCounts] " + #define PRDF_FUNC "[TpsEvent<T>::analyzeCeSymbolCounts] " uint32_t o_rc = SUCCESS; @@ -457,33 +488,33 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeCeSymbolCounts( CeCount i_badDqCount, bool tpsFalseAlarm = false; // Get the Bad DQ Bitmap. - TargetHandle_t mcaTrgt = iv_chip->getTrgt(); + TargetHandle_t trgt = iv_chip->getTrgt(); MemDqBitmap dqBitmap; - o_rc = getBadDqBitmap( mcaTrgt, iv_rank, dqBitmap ); + o_rc = getBadDqBitmap( trgt, iv_rank, dqBitmap ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "getBadDqBitmap(0x%08x, 0x%02x) failed", - getHuid(mcaTrgt), iv_rank.getKey() ); + getHuid(trgt), iv_rank.getKey() ); break; } // Get the symbol mark. MemMark symMark; - o_rc = MarkStore::readSymbolMark<TYPE_MCA>( iv_chip, iv_rank, symMark ); + o_rc = MarkStore::readSymbolMark<T>( iv_chip, iv_rank, symMark ); if ( SUCCESS != o_rc ) { - PRDF_ERR( PRDF_FUNC "readSymbolMark<TYPE_MCA>(0x%08x, 0x%02x) " + PRDF_ERR( PRDF_FUNC "readSymbolMark<T>(0x%08x, 0x%02x) " "failed", iv_chip->getHuid(), iv_rank.getKey() ); break; } // Get the chip mark. MemMark chipMark; - o_rc = MarkStore::readChipMark<TYPE_MCA>( iv_chip, iv_rank, chipMark ); + o_rc = MarkStore::readChipMark<T>( iv_chip, iv_rank, chipMark ); if ( SUCCESS != o_rc ) { - PRDF_ERR( PRDF_FUNC "readChipMark<TYPE_MCA>(0x%08x, 0x%02x) " + PRDF_ERR( PRDF_FUNC "readChipMark<T>(0x%08x, 0x%02x) " "failed", iv_chip->getHuid(), iv_rank.getKey() ); break; } @@ -512,9 +543,9 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeCeSymbolCounts( CeCount i_badDqCount, // TCE. Both are still correctable after a symbol mark // is placed. // Place a symbol mark on this bad DQ. - MemMark newSymMark( mcaTrgt, iv_rank, + MemMark newSymMark( trgt, iv_rank, i_badDqCount.symList[0].symbol ); - o_rc = MarkStore::writeSymbolMark<TYPE_MCA>( iv_chip, + o_rc = MarkStore::writeSymbolMark<T>( iv_chip, iv_rank, newSymMark ); if ( SUCCESS != o_rc ) { @@ -552,7 +583,7 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeCeSymbolCounts( CeCount i_badDqCount, io_sc.service_data->setServiceCall(); // Permanently mask mainline NCEs and TCEs. - getMcaDataBundle(iv_chip)->iv_maskMainlineNceTce = true; + __maskMainlineNceTces<T>( iv_chip ); } } else @@ -566,7 +597,7 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeCeSymbolCounts( CeCount i_badDqCount, else if ( 2 == i_badDqCount.count && 0 == i_badChipCount.count ) { // Permanently mask mainline NCEs and TCEs. - getMcaDataBundle(iv_chip)->iv_maskMainlineNceTce = true; + __maskMainlineNceTces<T>( iv_chip ); // If the symbol mark is available. if ( !symMark.isValid() ) @@ -587,9 +618,9 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeCeSymbolCounts( CeCount i_badDqCount, highSym = sym; } - MemMark newSymMark( mcaTrgt, iv_rank, + MemMark newSymMark( trgt, iv_rank, highSym.symbol ); - o_rc = MarkStore::writeSymbolMark<TYPE_MCA>( iv_chip, + o_rc = MarkStore::writeSymbolMark<T>( iv_chip, iv_rank, newSymMark ); if ( SUCCESS != o_rc ) { @@ -669,10 +700,10 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeCeSymbolCounts( CeCount i_badDqCount, // This means we have only one more potential bad DQ, which // is still correctable after a chip mark is placed. // Place a chip mark on this bad chip. - MemMark newChipMark( mcaTrgt, iv_rank, + MemMark newChipMark( trgt, iv_rank, i_badChipCount.symList[0].symbol ); - o_rc = MarkStore::writeChipMark<TYPE_MCA>( iv_chip, iv_rank, - newChipMark ); + o_rc = MarkStore::writeChipMark<T>( iv_chip, iv_rank, + newChipMark ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "writeChipMark(0x%08x,0x%02x) " @@ -708,7 +739,7 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeCeSymbolCounts( CeCount i_badDqCount, io_sc.service_data->setServiceCall(); // Permanently mask mainline NCEs and TCEs - getMcaDataBundle(iv_chip)->iv_maskMainlineNceTce = true; + __maskMainlineNceTces<T>( iv_chip ); } } else @@ -731,7 +762,7 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeCeSymbolCounts( CeCount i_badDqCount, io_sc.service_data->setServiceCall(); // Permanently mask mainline NCEs and TCEs - getMcaDataBundle(iv_chip)->iv_maskMainlineNceTce = true; + __maskMainlineNceTces<T>( iv_chip ); } // If the chip mark is available. if ( !chipMark.isValid() ) @@ -742,10 +773,10 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeCeSymbolCounts( CeCount i_badDqCount, // This means we have no more potential bad DQ or bad chips // since we can't correct those after chip mark is placed. // Place a chip mark on the bad chip. - MemMark newChipMark( mcaTrgt, iv_rank, + MemMark newChipMark( trgt, iv_rank, i_badChipCount.symList[0].symbol ); - o_rc = MarkStore::writeChipMark<TYPE_MCA>( iv_chip, iv_rank, - newChipMark ); + o_rc = MarkStore::writeChipMark<T>( iv_chip, iv_rank, + newChipMark ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "writeChipMark(0x%08x,0x%02x) " @@ -763,8 +794,8 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeCeSymbolCounts( CeCount i_badDqCount, // this chip mark, we need to clear the symbol mark now // instead of at the end of the function to make room // for the additional symbol mark. - o_rc = MarkStore::clearSymbolMark<TYPE_MCA>( iv_chip, - iv_rank ); + o_rc = MarkStore::clearSymbolMark<T>( iv_chip, + iv_rank ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "MarkStore::clearSymbolMark(" @@ -810,7 +841,7 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeCeSymbolCounts( CeCount i_badDqCount, io_sc.service_data->setServiceCall(); // Permanently mask mainline NCEs and TCEs. - getMcaDataBundle(iv_chip)->iv_maskMainlineNceTce = true; + __maskMainlineNceTces<T>( iv_chip ); } } // If the symbol mark is available. @@ -822,9 +853,9 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeCeSymbolCounts( CeCount i_badDqCount, // This means we have no more potential bad DQ or bad chips // since we can't correct those after symbol mark is placed. // Place a symbol mark on this bad DQ. - MemMark newSymMark( mcaTrgt, iv_rank, + MemMark newSymMark( trgt, iv_rank, i_badDqCount.symList[0].symbol ); - o_rc = MarkStore::writeSymbolMark<TYPE_MCA>( iv_chip, + o_rc = MarkStore::writeSymbolMark<T>( iv_chip, iv_rank, newSymMark ); if ( SUCCESS != o_rc ) { @@ -865,7 +896,7 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeCeSymbolCounts( CeCount i_badDqCount, io_sc.service_data->setServiceCall(); // Permanently mask mainline NCEs and TCEs. - getMcaDataBundle(iv_chip)->iv_maskMainlineNceTce = true; + __maskMainlineNceTces<T>( iv_chip ); } } @@ -888,7 +919,7 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeCeSymbolCounts( CeCount i_badDqCount, io_sc.service_data->setServiceCall(); // Permanently mask mainline NCEs and TCEs. - getMcaDataBundle(iv_chip)->iv_maskMainlineNceTce = true; + __maskMainlineNceTces<T>( iv_chip ); } // If analysis resulted in a false alarm. @@ -903,18 +934,18 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeCeSymbolCounts( CeCount i_badDqCount, } // Write any updates to VPD. - o_rc = setBadDqBitmap( mcaTrgt, iv_rank, dqBitmap ); + o_rc = setBadDqBitmap( trgt, iv_rank, dqBitmap ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "setBadDqBitmap(0x%08x, 0x%02x) failed", - getHuid(mcaTrgt), iv_rank.getKey() ); + getHuid(trgt), iv_rank.getKey() ); break; } // We may have placed a chip mark so do any necessary cleanup. This must // be called after writing the bad DQ bitmap because the this function // will also write it if necessary. - o_rc = MarkStore::chipMarkCleanup<TYPE_MCA>( iv_chip, iv_rank, io_sc ); + o_rc = MarkStore::chipMarkCleanup<T>( iv_chip, iv_rank, io_sc ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "MarkStore::chipMarkCleanup(0x%08x,0x%02x) " @@ -929,6 +960,15 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeCeSymbolCounts( CeCount i_badDqCount, #undef PRDF_FUNC } +template +uint32_t TpsEvent<TYPE_MCA>::analyzeCeSymbolCounts( CeCount i_badDqCount, + CeCount i_badChipCount, CeCount i_sumAboveOneCount, + CeCount i_singleSymCount, STEP_CODE_DATA_STRUCT & io_sc ); +template +uint32_t TpsEvent<TYPE_OCMB_CHIP>::analyzeCeSymbolCounts( CeCount i_badDqCount, + CeCount i_badChipCount, CeCount i_sumAboveOneCount, + CeCount i_singleSymCount, STEP_CODE_DATA_STRUCT & io_sc ); + //------------------------------------------------------------------------------ template<> @@ -1031,11 +1071,110 @@ uint32_t TpsEvent<TYPE_MCA>::getSymbolCeCounts( CeCount & io_badDqCount, //------------------------------------------------------------------------------ -template <> -uint32_t TpsEvent<TYPE_MCA>::analyzeCeStats( STEP_CODE_DATA_STRUCT & io_sc, - bool & o_done ) +template<> +uint32_t TpsEvent<TYPE_OCMB_CHIP>::getSymbolCeCounts( CeCount & io_badDqCount, + CeCount & io_badChipCount, CeCount & io_sumAboveOneCount, + CeCount & io_singleSymCount, STEP_CODE_DATA_STRUCT & io_sc ) +{ + #define PRDF_FUNC "[TpsEvent<TYPE_OCMB_CHIP>::getSymbolCeCounts] " + + uint32_t o_rc = SUCCESS; + + do + { + // Get the Bad DQ Bitmap. + TargetHandle_t ocmbTrgt = iv_chip->getTrgt(); + MemDqBitmap dqBitmap; + + o_rc = getBadDqBitmap( ocmbTrgt, iv_rank, dqBitmap ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "getBadDqBitmap(0x%08x,%d) failed", + getHuid(ocmbTrgt), iv_rank.getMaster() ); + break; + } + std::vector<MemSymbol> bmSymList = dqBitmap.getSymbolList(); + + const char * reg_str = nullptr; + SCAN_COMM_REGISTER_CLASS * reg = nullptr; + + for ( uint8_t regIdx = 0; regIdx < CE_REGS_PER_PORT; regIdx++ ) + { + reg_str = ocmbCeStatReg[regIdx]; + reg = iv_chip->getRegister( reg_str ); + + o_rc = reg->Read(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "Read() failed on %s.", reg_str ); + break; + } + uint8_t baseSymbol = SYMBOLS_PER_CE_REG * regIdx; + + for ( uint8_t i = 0; i < SYMBOLS_PER_CE_REG; + i += MEM_SYMBOLS_PER_NIBBLE ) + { + MemUtils::MaintSymbols nibbleStats; + + // Get a nibble's worth of symbols. + for ( uint8_t n = 0; n < MEM_SYMBOLS_PER_NIBBLE; n++ ) + { + uint8_t sym = baseSymbol + (i+n); + PRDF_ASSERT( sym < SYMBOLS_PER_RANK ); + + MemUtils::SymbolData symData; + symData.symbol = MemSymbol::fromSymbol( ocmbTrgt, iv_rank, + sym, CEN_SYMBOL::ODD_SYMBOL_DQ ); + if ( !symData.symbol.isValid() ) + { + PRDF_ERR( PRDF_FUNC "MemSymbol() failed: symbol=%d", + sym ); + o_rc = FAIL; + break; + } + + // Any symbol set in the DRAM repairs VPD will have an + // automatic CE count of 0xFF + if ( std::find( bmSymList.begin(), bmSymList.end(), + symData.symbol ) != bmSymList.end() ) + symData.count = 0xFF; + else + symData.count = reg->GetBitFieldJustified(((i+n)*8), 8); + + nibbleStats.push_back( symData ); + + // Add all symbols with non-zero counts to the callout list. + if ( symData.count != 0 ) + { + MemoryMru mm { ocmbTrgt, iv_rank, symData.symbol }; + io_sc.service_data->SetCallout( mm ); + } + } + if ( SUCCESS != o_rc ) break; + + // Analyze the nibble of symbols. + __analyzeNibbleSyms<TYPE_OCMB_CHIP>( nibbleStats, io_badDqCount, + io_badChipCount, io_sumAboveOneCount, io_singleSymCount ); + + } + if ( SUCCESS != o_rc ) break; + } + + }while(0); + + return o_rc; + + #undef PRDF_FUNC + +} + +//------------------------------------------------------------------------------ + +template <TARGETING::TYPE T> +uint32_t TpsEvent<T>::analyzeCeStats( STEP_CODE_DATA_STRUCT & io_sc, + bool & o_done ) { - #define PRDF_FUNC "[TpsEvent<TYPE_MCA>::analyzeCeStats] " + #define PRDF_FUNC "[TpsEvent<T>::analyzeCeStats] " uint32_t o_rc = SUCCESS; @@ -1086,11 +1225,18 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeCeStats( STEP_CODE_DATA_STRUCT & io_sc, } +template +uint32_t TpsEvent<TYPE_MCA>::analyzeCeStats( STEP_CODE_DATA_STRUCT & io_sc, + bool & o_done ); +template +uint32_t TpsEvent<TYPE_OCMB_CHIP>::analyzeCeStats(STEP_CODE_DATA_STRUCT & io_sc, + bool & o_done); + //------------------------------------------------------------------------------ -template<> -uint32_t TpsEvent<TYPE_MCA>::analyzePhase( STEP_CODE_DATA_STRUCT & io_sc, - bool & o_done ) +template<TARGETING::TYPE T> +uint32_t TpsEvent<T>::analyzePhase( STEP_CODE_DATA_STRUCT & io_sc, + bool & o_done ) { #define PRDF_FUNC "[TpsEvent::analyzePhase] " @@ -1102,11 +1248,11 @@ uint32_t TpsEvent<TYPE_MCA>::analyzePhase( STEP_CODE_DATA_STRUCT & io_sc, // Analyze Ecc Attentions uint32_t eccAttns; - o_rc = checkEccFirs<TYPE_MCA>( iv_chip, eccAttns ); + o_rc = checkEccFirs<T>( iv_chip, eccAttns ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "checkEccFirs(0x%08x) failed", - iv_chip->getHuid() ); + iv_chip->getHuid() ); break; } @@ -1135,7 +1281,7 @@ uint32_t TpsEvent<TYPE_MCA>::analyzePhase( STEP_CODE_DATA_STRUCT & io_sc, if ( (SUCCESS == o_rc) && o_done ) { // Clear the ECC FFDC for this master rank. - MemDbUtils::resetEccFfdc<TYPE_MCA>( iv_chip, iv_rank, SLAVE_RANK ); + MemDbUtils::resetEccFfdc<T>( iv_chip, iv_rank, SLAVE_RANK ); } return o_rc; @@ -1143,6 +1289,36 @@ uint32_t TpsEvent<TYPE_MCA>::analyzePhase( STEP_CODE_DATA_STRUCT & io_sc, #undef PRDF_FUNC } +template +uint32_t TpsEvent<TYPE_MCA>::analyzePhase( STEP_CODE_DATA_STRUCT & io_sc, + bool & o_done ); +template +uint32_t TpsEvent<TYPE_OCMB_CHIP>::analyzePhase( STEP_CODE_DATA_STRUCT & io_sc, + bool & o_done ); + +//------------------------------------------------------------------------------ + +template<TARGETING::TYPE T> +uint32_t TpsEvent<T>::startNextPhase( STEP_CODE_DATA_STRUCT & io_sc ) +{ + uint32_t signature = 0; + + __getNextPhase<T>( iv_chip, iv_rank, io_sc, iv_phase, signature ); + + PRDF_TRAC( "[TpsEvent] Starting TPS Phase %d: 0x%08x,0x%02x", + iv_phase, iv_chip->getHuid(), getKey() ); + + io_sc.service_data->AddSignatureList( iv_chip->getTrgt(), signature ); + + return startCmd(); +} + +template +uint32_t TpsEvent<TYPE_MCA>::startNextPhase( STEP_CODE_DATA_STRUCT & io_sc ); +template +uint32_t TpsEvent<TYPE_OCMB_CHIP>::startNextPhase( + STEP_CODE_DATA_STRUCT & io_sc ); + //############################################################################## // // Specializations for MCA @@ -1156,13 +1332,15 @@ uint32_t TpsEvent<TYPE_MCA>::startCmd() uint32_t o_rc = SUCCESS; + #ifndef CONFIG_AXONE + // We don't need to set any stop-on-error conditions or thresholds for // soft/inter/hard CEs at runtime. The design is to let the command continue // to the end of the rank and we do diagnostics on the CE counts found in // the per-symbol counters. Therefore, all we need to do is tell the // hardware which CE types to count. - mss::mcbist::stop_conditions stopCond; + mss::mcbist::stop_conditions<mss::mc_type::NIMBUS> stopCond; switch ( iv_phase ) { @@ -1190,26 +1368,67 @@ uint32_t TpsEvent<TYPE_MCA>::startCmd() iv_chip->getHuid(), getKey() ); } + #endif + return o_rc; #undef PRDF_FUNC } -//------------------------------------------------------------------------------ +//############################################################################## +// +// Specializations for OCMB +// +//############################################################################## template<> -uint32_t TpsEvent<TYPE_MCA>::startNextPhase( STEP_CODE_DATA_STRUCT & io_sc ) +uint32_t TpsEvent<TYPE_OCMB_CHIP>::startCmd() { - uint32_t signature = 0; + #define PRDF_FUNC "[TpsEvent::startCmd] " - __getNextPhase<TYPE_MCA>( iv_chip, iv_rank, io_sc, iv_phase, signature ); + uint32_t o_rc = SUCCESS; - PRDF_TRAC( "[TpsEvent] Starting TPS Phase %d: 0x%08x,0x%02x", - iv_phase, iv_chip->getHuid(), getKey() ); + #ifdef CONFIG_AXONE - io_sc.service_data->AddSignatureList( iv_chip->getTrgt(), signature ); + // We don't need to set any stop-on-error conditions or thresholds for + // soft/inter/hard CEs at runtime. The design is to let the command continue + // to the end of the rank and we do diagnostics on the CE counts found in + // the per-symbol counters. Therefore, all we need to do is tell the + // hardware which CE types to count. - return startCmd(); + mss::mcbist::stop_conditions<mss::mc_type::EXPLORER> stopCond; + + switch ( iv_phase ) + { + case TD_PHASE_1: + // Set the per symbol counters to count only hard CEs. + stopCond.set_nce_hard_symbol_count_enable(mss::ON); + break; + + case TD_PHASE_2: + // Since there are not enough hard CEs to trigger a symbol mark, set + // the per symbol counters to count all CE types. + stopCond.set_nce_soft_symbol_count_enable( mss::ON); + stopCond.set_nce_inter_symbol_count_enable(mss::ON); + stopCond.set_nce_hard_symbol_count_enable( mss::ON); + break; + + default: PRDF_ASSERT( false ); // invalid phase + } + + // Start the time based scrub procedure on this slave rank. + o_rc = startTdScrub<TYPE_OCMB_CHIP>(iv_chip, iv_rank, SLAVE_RANK, stopCond); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "startTdScrub(0x%08x,0x%2x) failed", + iv_chip->getHuid(), getKey() ); + } + + #endif + + return o_rc; + + #undef PRDF_FUNC } //############################################################################## diff --git a/src/usr/diag/prdf/plat/mem/prdfMemVcm.C b/src/usr/diag/prdf/plat/mem/prdfMemVcm.C index 8c3c4480a..784306baf 100644 --- a/src/usr/diag/prdf/plat/mem/prdfMemVcm.C +++ b/src/usr/diag/prdf/plat/mem/prdfMemVcm.C @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2018 */ +/* Contributors Listed Below - COPYRIGHT 2018,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -30,6 +30,8 @@ // Platform includes #include <prdfCenMbaExtraSig.H> +#include <hwp_wrappers.H> + using namespace TARGETING; namespace PRDF @@ -39,41 +41,16 @@ using namespace PlatServices; //############################################################################## // -// Specializations for MCA +// Generic Specializations // //############################################################################## -template<> -uint32_t VcmEvent<TYPE_MCA>::startCmd() +template<TARGETING::TYPE T> +uint32_t VcmEvent<T>::handlePhaseComplete( const uint32_t & i_eccAttns, + STEP_CODE_DATA_STRUCT & io_sc, + bool & o_done ) { - #define PRDF_FUNC "[VcmEvent::startCmd] " - - uint32_t o_rc = SUCCESS; - - // No stop conditions. - mss::mcbist::stop_conditions stopCond; - - // Start the time based scrub procedure on this master rank. - o_rc = startTdScrub<TYPE_MCA>( iv_chip, iv_rank, MASTER_RANK, stopCond ); - if ( SUCCESS != o_rc ) - { - PRDF_ERR( PRDF_FUNC "startTdScrub(0x%08x,0x%2x) failed", - iv_chip->getHuid(), getKey() ); - } - - return o_rc; - - #undef PRDF_FUNC -} - -//------------------------------------------------------------------------------ - -template<> -uint32_t VcmEvent<TYPE_MCA>::handlePhaseComplete( const uint32_t & i_eccAttns, - STEP_CODE_DATA_STRUCT & io_sc, - bool & o_done ) -{ - #define PRDF_FUNC "[VcmEvent<TYPE_MCA>::handlePhaseComplete] " + #define PRDF_FUNC "[VcmEvent<T>::handlePhaseComplete] " uint32_t o_rc = SUCCESS; @@ -100,6 +77,49 @@ uint32_t VcmEvent<TYPE_MCA>::handlePhaseComplete( const uint32_t & i_eccAttns, #undef PRDF_FUNC } +template +uint32_t VcmEvent<TYPE_MCA>::handlePhaseComplete( const uint32_t & i_eccAttns, + STEP_CODE_DATA_STRUCT & io_sc, + bool & o_done ); +template +uint32_t VcmEvent<TYPE_OCMB_CHIP>::handlePhaseComplete( + const uint32_t & i_eccAttns, + STEP_CODE_DATA_STRUCT & io_sc, + bool & o_done ); + +//############################################################################## +// +// Specializations for MCA +// +//############################################################################## + +template<> +uint32_t VcmEvent<TYPE_MCA>::startCmd() +{ + #define PRDF_FUNC "[VcmEvent::startCmd] " + + uint32_t o_rc = SUCCESS; + + #ifndef CONFIG_AXONE + + // No stop conditions. + mss::mcbist::stop_conditions<mss::mc_type::NIMBUS> stopCond; + + // Start the time based scrub procedure on this master rank. + o_rc = startTdScrub<TYPE_MCA>( iv_chip, iv_rank, MASTER_RANK, stopCond ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "startTdScrub(0x%08x,0x%2x) failed", + iv_chip->getHuid(), getKey() ); + } + + #endif + + return o_rc; + + #undef PRDF_FUNC +} + //############################################################################## // // Specializations for MBA @@ -448,6 +468,40 @@ uint32_t VcmEvent<TYPE_MBA>::handlePhaseComplete( const uint32_t & i_eccAttns, #undef PRDF_FUNC } +//############################################################################## +// +// Specializations for OCMB +// +//############################################################################## + +template<> +uint32_t VcmEvent<TYPE_OCMB_CHIP>::startCmd() +{ + #define PRDF_FUNC "[VcmEvent::startCmd] " + + uint32_t o_rc = SUCCESS; + + #ifdef CONFIG_AXONE + + // No stop conditions. + mss::mcbist::stop_conditions<mss::mc_type::EXPLORER> stopCond; + + // Start the time based scrub procedure on this master rank. + o_rc = startTdScrub<TYPE_OCMB_CHIP>( iv_chip, iv_rank, MASTER_RANK, + stopCond ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "startTdScrub(0x%08x,0x%2x) failed", + iv_chip->getHuid(), getKey() ); + } + + #endif + + return o_rc; + + #undef PRDF_FUNC +} + //------------------------------------------------------------------------------ } // end namespace PRDF diff --git a/src/usr/diag/prdf/plat/mem/prdfMemVcm.H b/src/usr/diag/prdf/plat/mem/prdfMemVcm.H index b319f910b..c712d6aa3 100644 --- a/src/usr/diag/prdf/plat/mem/prdfMemVcm.H +++ b/src/usr/diag/prdf/plat/mem/prdfMemVcm.H @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2016,2018 */ +/* Contributors Listed Below - COPYRIGHT 2016,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -342,6 +342,9 @@ class VcmEvent : public TdEntry #ifdef __HOSTBOOT_RUNTIME template<> uint32_t VcmEvent<TARGETING::TYPE_MCA>::cleanup(STEP_CODE_DATA_STRUCT & io_sc); +template<> +uint32_t VcmEvent<TARGETING::TYPE_OCMB_CHIP>::cleanup( + STEP_CODE_DATA_STRUCT & io_sc); #endif template<> diff --git a/src/usr/diag/prdf/plat/mem/prdfMemVcm_ipl.C b/src/usr/diag/prdf/plat/mem/prdfMemVcm_ipl.C index 26ef1d727..5ffa9a84b 100644 --- a/src/usr/diag/prdf/plat/mem/prdfMemVcm_ipl.C +++ b/src/usr/diag/prdf/plat/mem/prdfMemVcm_ipl.C @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2016,2018 */ +/* Contributors Listed Below - COPYRIGHT 2016,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -92,6 +92,12 @@ bool __iueCheck<TYPE_MCA>( uint32_t i_eccAttns ) } template<> inline +bool __iueCheck<TYPE_OCMB_CHIP>( uint32_t i_eccAttns ) +{ + return ( 0 != (i_eccAttns & MAINT_IUE) ); +} + +template<> inline bool __iueCheck<TYPE_MBA>( uint32_t i_eccAttns ) { // IUES are reported via RCE ETE on Centaur @@ -218,6 +224,7 @@ uint32_t VcmEvent<TYPE_MBA>::startCmd() // Avoid linker errors with the template. template class VcmEvent<TYPE_MCA>; template class VcmEvent<TYPE_MBA>; +template class VcmEvent<TYPE_OCMB_CHIP>; } // end namespace PRDF diff --git a/src/usr/diag/prdf/plat/mem/prdfMemVcm_rt.C b/src/usr/diag/prdf/plat/mem/prdfMemVcm_rt.C index ca4de8e5a..e64227996 100644 --- a/src/usr/diag/prdf/plat/mem/prdfMemVcm_rt.C +++ b/src/usr/diag/prdf/plat/mem/prdfMemVcm_rt.C @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2016,2018 */ +/* Contributors Listed Below - COPYRIGHT 2016,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -55,6 +55,12 @@ VcmFalseAlarm * __getFalseAlarmCounter<TYPE_MCA>( ExtensibleChip * i_chip ) } template<> +VcmFalseAlarm * __getFalseAlarmCounter<TYPE_OCMB_CHIP>(ExtensibleChip * i_chip) +{ + return getOcmbDataBundle(i_chip)->getVcmFalseAlarmCounter(); +} + +template<> VcmFalseAlarm * __getFalseAlarmCounter<TYPE_MBA>( ExtensibleChip * i_chip ) { return getMbaDataBundle(i_chip)->getVcmFalseAlarmCounter(); @@ -62,16 +68,16 @@ VcmFalseAlarm * __getFalseAlarmCounter<TYPE_MBA>( ExtensibleChip * i_chip ) //############################################################################## // -// Specializations for MCA +// Generic Specializations // //############################################################################## -template<> -uint32_t VcmEvent<TYPE_MCA>::checkEcc( const uint32_t & i_eccAttns, - STEP_CODE_DATA_STRUCT & io_sc, - bool & o_done ) +template<TARGETING::TYPE T> +uint32_t VcmEvent<T>::checkEcc( const uint32_t & i_eccAttns, + STEP_CODE_DATA_STRUCT & io_sc, + bool & o_done ) { - #define PRDF_FUNC "[VcmEvent<TYPE_MCA>::checkEcc] " + #define PRDF_FUNC "[VcmEvent<T>::checkEcc] " uint32_t o_rc = SUCCESS; @@ -88,7 +94,7 @@ uint32_t VcmEvent<TYPE_MCA>::checkEcc( const uint32_t & i_eccAttns, // At this point we don't actually have an address for the UE. The // best we can do is get the address in which the command stopped. MemAddr addr; - o_rc = getMemMaintAddr<TYPE_MCA>( iv_chip, addr ); + o_rc = getMemMaintAddr<T>( iv_chip, addr ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "getMemMaintAddr(0x%08x) failed", @@ -96,7 +102,7 @@ uint32_t VcmEvent<TYPE_MCA>::checkEcc( const uint32_t & i_eccAttns, break; } - o_rc = MemEcc::handleMemUe<TYPE_MCA>( iv_chip, addr, + o_rc = MemEcc::handleMemUe<T>( iv_chip, addr, UE_TABLE::SCRUB_UE, io_sc ); if ( SUCCESS != o_rc ) { @@ -107,7 +113,7 @@ uint32_t VcmEvent<TYPE_MCA>::checkEcc( const uint32_t & i_eccAttns, // Because of the UE, any further TPS requests will likely have no // effect. So ban all subsequent requests. - MemDbUtils::banTps<TYPE_MCA>( iv_chip, addr.getRank() ); + MemDbUtils::banTps<T>( iv_chip, addr.getRank() ); // Leave the mark in place and abort this procedure. o_done = true; break; @@ -118,7 +124,7 @@ uint32_t VcmEvent<TYPE_MCA>::checkEcc( const uint32_t & i_eccAttns, io_sc.service_data->setSignature( iv_chip->getHuid(), PRDFSIG_MaintIUE ); - o_rc = MemEcc::handleMemIue<TYPE_MCA>( iv_chip, iv_rank, io_sc ); + o_rc = MemEcc::handleMemIue<T>( iv_chip, iv_rank, io_sc ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "handleMemIue(0x%08x,0x%02x) failed", @@ -143,6 +149,14 @@ uint32_t VcmEvent<TYPE_MCA>::checkEcc( const uint32_t & i_eccAttns, #undef PRDF_FUNC } +template +uint32_t VcmEvent<TYPE_MCA>::checkEcc( const uint32_t & i_eccAttns, + STEP_CODE_DATA_STRUCT & io_sc, + bool & o_done ); +template +uint32_t VcmEvent<TYPE_OCMB_CHIP>::checkEcc( const uint32_t & i_eccAttns, + STEP_CODE_DATA_STRUCT & io_sc, + bool & o_done ); //------------------------------------------------------------------------------ @@ -180,6 +194,41 @@ uint32_t VcmEvent<TYPE_MCA>::cleanup( STEP_CODE_DATA_STRUCT & io_sc ) #undef PRDF_FUNC } +template<> +uint32_t VcmEvent<TYPE_OCMB_CHIP>::cleanup( STEP_CODE_DATA_STRUCT & io_sc ) +{ + #define PRDF_FUNC "[VcmEvent::cleanup] " + + uint32_t o_rc = SUCCESS; + + do + { + o_rc = MarkStore::chipMarkCleanup<TYPE_OCMB_CHIP>( iv_chip, iv_rank, + io_sc ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "chipMarkCleanup(0x%08x,0x%02x) failed", + iv_chip->getHuid(), iv_rank.getKey() ); + break; + } + + // The cleanup() function is called by both verified() and falseAlarm(). + // In either case, the error log should be predictive if there has been + // a least one false alarm on any DRAM on this rank other than this + // DRAM. This is required on Nimbus because of two symbol correction, + // which does not exist on Centaur. + VcmFalseAlarm * faCntr =__getFalseAlarmCounter<TYPE_OCMB_CHIP>(iv_chip); + uint8_t dram = iv_mark.getSymbol().getDram(); + if ( faCntr->queryDrams(iv_rank, dram, io_sc) ) + io_sc.service_data->setServiceCall(); + + } while (0); + + return o_rc; + + #undef PRDF_FUNC +} + //############################################################################## // // Specializations for MBA @@ -386,6 +435,7 @@ uint32_t VcmEvent<T>::falseAlarm( STEP_CODE_DATA_STRUCT & io_sc ) // Avoid linker errors with the template. template class VcmEvent<TYPE_MCA>; template class VcmEvent<TYPE_MBA>; +template class VcmEvent<TYPE_OCMB_CHIP>; //------------------------------------------------------------------------------ diff --git a/src/usr/diag/prdf/plat/mem/prdfP9Mca.C b/src/usr/diag/prdf/plat/mem/prdfP9Mca.C index 5f7efa274..fac29fce3 100644 --- a/src/usr/diag/prdf/plat/mem/prdfP9Mca.C +++ b/src/usr/diag/prdf/plat/mem/prdfP9Mca.C @@ -27,7 +27,6 @@ #include <iipServiceDataCollector.h> #include <prdfExtensibleChip.H> #include <prdfPluginMap.H> -#include <isteps/nvdimm/nvdimm.H> // Platform includes #include <prdfMemDbUtils.H> @@ -38,6 +37,10 @@ #include <prdfMemTps.H> #endif +#ifdef CONFIG_NVDIMM + #include <nvdimm.H> +#endif + using namespace TARGETING; namespace PRDF @@ -296,18 +299,9 @@ PRDF_PLUGIN_DEFINE( nimbus_mca, MemPortFailure ); // //############################################################################## +#ifdef CONFIG_NVDIMM #ifdef __HOSTBOOT_RUNTIME -enum nvdimmRegOffset -{ - NVDIMM_MGT_CMD1 = 0x041, - MODULE_HEALTH = 0x0A0, - MODULE_HEALTH_STATUS0 = 0x0A1, - MODULE_HEALTH_STATUS1 = 0x0A2, - ERROR_THRESHOLD_STATUS = 0x0A5, - WARNING_THRESHOLD_STATUS = 0x0A7, -}; - /** * @brief Gets a map list of which bits are set from a uint8_t bit list (7:0) * @param i_data uint8_t bit list (7:0) @@ -349,6 +343,7 @@ uint32_t __addBpmCallout( TargetHandle_t i_dimm, break; } + // addPartCallout will default to GARD_NULL, NO_DECONFIG mainErrl->addPartCallout( i_dimm, HWAS::BPM_PART_TYPE, i_priority ); @@ -362,10 +357,12 @@ uint32_t __addBpmCallout( TargetHandle_t i_dimm, /** * @brief Adds a callout of the cable connecting an NVDIMM to its * backup power module (BPM) + * @param i_dimm The target dimm. * @param i_priority The callout priority. * @return FAIL if unable to get the global error log, else SUCCESS */ -uint32_t __addNvdimmCableCallout( HWAS::callOutPriority i_priority ) +uint32_t __addNvdimmCableCallout( TargetHandle_t i_dimm, + HWAS::callOutPriority i_priority ) { #define PRDF_FUNC "[__addNvdimmCableCallout] " @@ -382,7 +379,9 @@ uint32_t __addNvdimmCableCallout( HWAS::callOutPriority i_priority ) break; } - mainErrl->addProcedureCallout( HWAS::EPUB_PRC_NVDIMM_ERR, i_priority ); + // addPartCallout will default to GARD_NULL, NO_DECONFIG + mainErrl->addPartCallout( i_dimm, HWAS::BPM_CABLE_PART_TYPE, + i_priority ); }while(0); @@ -391,21 +390,45 @@ uint32_t __addNvdimmCableCallout( HWAS::callOutPriority i_priority ) #undef PRDF_FUNC } +/** + * @brief If a previous error has been found, add a signature to the + * multi-signature list, else set the primary signature. + * @param io_sc The step code data struct. + * @param i_trgt The target. + * @param i_errFound Whether an error has already been found or not. + * @param i_sig The signature to be set. + */ +void __addSignature( STEP_CODE_DATA_STRUCT & io_sc, TargetHandle_t i_trgt, + bool i_errFound, uint32_t i_sig ) +{ + if ( i_errFound ) + { + io_sc.service_data->AddSignatureList( i_trgt, i_sig ); + } + else + { + io_sc.service_data->setSignature( getHuid(i_trgt), i_sig ); + } +} /** * @brief Analyze NVDIMM Health Status0 Register for errors - * @param io_sc The step code data struct. - * @param i_dimm The target dimm. + * @param io_sc The step code data struct. + * @param i_dimm The target dimm. + * @param io_errFound Whether an error has already been found or not. * @return FAIL if unable to read register, else SUCCESS */ -uint32_t __analyzeHealthStatus0Reg( STEP_CODE_DATA_STRUCT & io_sc, - TargetHandle_t i_dimm ) +uint32_t __analyzeHealthStatus0Reg(STEP_CODE_DATA_STRUCT & io_sc, + TargetHandle_t i_dimm, bool & io_errFound) { #define PRDF_FUNC "[__analyzeHealthStatus0Reg] " uint32_t o_rc = SUCCESS; uint8_t data = 0; + // Get MCA, for signatures + TargetHandle_t mca = getConnectedParent( i_dimm, TYPE_MCA ); + do { // NVDIMM health status registers size = 1 byte @@ -413,7 +436,7 @@ uint32_t __analyzeHealthStatus0Reg( STEP_CODE_DATA_STRUCT & io_sc, // Read the Health Status0 Register (0xA1) 7:0 errlHndl_t errl = deviceRead( i_dimm, &data, NVDIMM_SIZE, - DEVICE_NVDIMM_ADDRESS(MODULE_HEALTH_STATUS0) ); + DEVICE_NVDIMM_ADDRESS(NVDIMM::i2cReg::MODULE_HEALTH_STATUS0) ); if ( errl ) { PRDF_ERR( PRDF_FUNC "Failed to read Health Status0 Register. " @@ -427,58 +450,66 @@ uint32_t __analyzeHealthStatus0Reg( STEP_CODE_DATA_STRUCT & io_sc, // BIT 0: Voltage Regulator Fail if ( bitList.count(0) ) { - io_sc.service_data->AddSignatureList( i_dimm, PRDFSIG_VoltRegFail ); + __addSignature( io_sc, mca, io_errFound, PRDFSIG_VoltRegFail ); // Callout NVDIMM on 1st, no gard - io_sc.service_data->SetCallout( i_dimm, MRU_HIGH, NO_GARD ); + io_sc.service_data->SetCallout( i_dimm, MRU_MED, NO_GARD ); + io_errFound = true; } // BIT 1: VDD Lost if ( bitList.count(1) ) { - io_sc.service_data->AddSignatureList( i_dimm, PRDFSIG_VddLost ); + __addSignature( io_sc, mca, io_errFound, PRDFSIG_VddLost ); // Callout NVDIMM on 1st, no gard - io_sc.service_data->SetCallout( i_dimm, MRU_HIGH, NO_GARD ); + io_sc.service_data->SetCallout( i_dimm, MRU_MED, NO_GARD ); + io_errFound = true; } // BIT 2: VPP Lost if ( bitList.count(2) ) { - io_sc.service_data->AddSignatureList( i_dimm, PRDFSIG_VppLost ); + __addSignature( io_sc, mca, io_errFound, PRDFSIG_VppLost ); // Callout NVDIMM on 1st, no gard - io_sc.service_data->SetCallout( i_dimm, MRU_HIGH, NO_GARD ); + io_sc.service_data->SetCallout( i_dimm, MRU_MED, NO_GARD ); + io_errFound = true; } // BIT 3: VTT Lost if ( bitList.count(3) ) { - io_sc.service_data->AddSignatureList( i_dimm, PRDFSIG_VttLost ); + __addSignature( io_sc, mca, io_errFound, PRDFSIG_VttLost ); // Callout NVDIMM on 1st, no gard - io_sc.service_data->SetCallout( i_dimm, MRU_HIGH, NO_GARD ); + io_sc.service_data->SetCallout( i_dimm, MRU_MED, NO_GARD ); + io_errFound = true; } // BIT 4: DRAM not Self Refresh if ( bitList.count(4) ) { - io_sc.service_data->AddSignatureList( i_dimm, PRDFSIG_NotSelfRefr ); + __addSignature( io_sc, mca, io_errFound, PRDFSIG_NotSelfRefr ); // Callout NVDIMM on 1st, no gard - io_sc.service_data->SetCallout( i_dimm, MRU_HIGH, NO_GARD ); + io_sc.service_data->SetCallout( i_dimm, MRU_MED, NO_GARD ); + io_errFound = true; } // BIT 5: Controller HW Error if ( bitList.count(5) ) { - io_sc.service_data->AddSignatureList( i_dimm, PRDFSIG_CtrlHwErr ); + __addSignature( io_sc, mca, io_errFound, PRDFSIG_CtrlHwErr ); // Callout NVDIMM on 1st, no gard - io_sc.service_data->SetCallout( i_dimm, MRU_HIGH, NO_GARD ); + io_sc.service_data->SetCallout( i_dimm, MRU_MED, NO_GARD ); + io_errFound = true; } // BIT 6: NVM Controller Error if ( bitList.count(6) ) { - io_sc.service_data->AddSignatureList( i_dimm, PRDFSIG_NvmCtrlErr ); + __addSignature( io_sc, mca, io_errFound, PRDFSIG_NvmCtrlErr ); // Callout NVDIMM on 1st, no gard - io_sc.service_data->SetCallout( i_dimm, MRU_HIGH, NO_GARD ); + io_sc.service_data->SetCallout( i_dimm, MRU_MED, NO_GARD ); + io_errFound = true; } // BIT 7: NVM Lifetime Error if ( bitList.count(7) ) { - io_sc.service_data->AddSignatureList( i_dimm, PRDFSIG_NvmLifeErr ); + __addSignature( io_sc, mca, io_errFound, PRDFSIG_NvmLifeErr ); // Callout NVDIMM on 1st, no gard - io_sc.service_data->SetCallout( i_dimm, MRU_HIGH, NO_GARD ); + io_sc.service_data->SetCallout( i_dimm, MRU_MED, NO_GARD ); + io_errFound = true; } }while(0); @@ -491,18 +522,22 @@ uint32_t __analyzeHealthStatus0Reg( STEP_CODE_DATA_STRUCT & io_sc, /** * @brief Analyze NVDIMM Health Status1 Register for errors - * @param io_sc The step code data struct. - * @param i_dimm The target dimm. + * @param io_sc The step code data struct. + * @param i_dimm The target dimm. + * @param io_errFound Whether an error has already been found or not. * @return FAIL if unable to read register, else SUCCESS */ uint32_t __analyzeHealthStatus1Reg( STEP_CODE_DATA_STRUCT & io_sc, - TargetHandle_t i_dimm ) + TargetHandle_t i_dimm, bool & io_errFound ) { #define PRDF_FUNC "[__analyzeHealthStatus1Reg] " uint32_t o_rc = SUCCESS; uint8_t data = 0; + // Get MCA, for signatures + TargetHandle_t mca = getConnectedParent( i_dimm, TYPE_MCA ); + do { // NVDIMM health status registers size = 1 byte @@ -510,7 +545,7 @@ uint32_t __analyzeHealthStatus1Reg( STEP_CODE_DATA_STRUCT & io_sc, // Read the Health Status1 Register (0xA2) 7:0 errlHndl_t errl = deviceRead( i_dimm, &data, NVDIMM_SIZE, - DEVICE_NVDIMM_ADDRESS(MODULE_HEALTH_STATUS1) ); + DEVICE_NVDIMM_ADDRESS(NVDIMM::i2cReg::MODULE_HEALTH_STATUS1) ); if ( errl ) { PRDF_ERR( PRDF_FUNC "Failed to read Health Status1 Register. " @@ -524,83 +559,90 @@ uint32_t __analyzeHealthStatus1Reg( STEP_CODE_DATA_STRUCT & io_sc, // BIT 0: Insufficient Energy if ( bitList.count(0) ) { - io_sc.service_data->AddSignatureList(i_dimm, PRDFSIG_InsuffEnergy); + __addSignature( io_sc, mca, io_errFound, PRDFSIG_InsuffEnergy ); // Callout BPM (backup power module) high, cable high o_rc = __addBpmCallout( i_dimm, HWAS::SRCI_PRIORITY_HIGH ); if ( SUCCESS != o_rc ) break; - o_rc = __addNvdimmCableCallout( HWAS::SRCI_PRIORITY_HIGH ); + o_rc = __addNvdimmCableCallout( i_dimm, HWAS::SRCI_PRIORITY_HIGH ); if ( SUCCESS != o_rc ) break; // Callout NVDIMM low, no gard io_sc.service_data->SetCallout( i_dimm, MRU_LOW, NO_GARD ); + io_errFound = true; } // BIT 1: Invalid Firmware if ( bitList.count(1) ) { - io_sc.service_data->AddSignatureList( i_dimm, PRDFSIG_InvFwErr ); + __addSignature( io_sc, mca, io_errFound, PRDFSIG_InvFwErr ); // Callout NVDIMM on 1st, no gard - io_sc.service_data->SetCallout( i_dimm, MRU_HIGH, NO_GARD ); + io_sc.service_data->SetCallout( i_dimm, MRU_MED, NO_GARD ); + io_errFound = true; } // BIT 2: Configuration Data Error if ( bitList.count(2) ) { - io_sc.service_data->AddSignatureList( i_dimm, PRDFSIG_CnfgDataErr ); + __addSignature( io_sc, mca, io_errFound, PRDFSIG_CnfgDataErr ); // Callout NVDIMM on 1st, no gard - io_sc.service_data->SetCallout( i_dimm, MRU_HIGH, NO_GARD ); + io_sc.service_data->SetCallout( i_dimm, MRU_MED, NO_GARD ); + io_errFound = true; } // BIT 3: No Energy Source if ( bitList.count(3) ) { - io_sc.service_data->AddSignatureList(i_dimm, PRDFSIG_NoEsPres); + __addSignature( io_sc, mca, io_errFound, PRDFSIG_NoEsPres ); // Callout BPM (backup power module) high, cable high o_rc = __addBpmCallout( i_dimm, HWAS::SRCI_PRIORITY_HIGH ); if ( SUCCESS != o_rc ) break; - o_rc = __addNvdimmCableCallout( HWAS::SRCI_PRIORITY_HIGH ); + o_rc = __addNvdimmCableCallout( i_dimm, HWAS::SRCI_PRIORITY_HIGH ); if ( SUCCESS != o_rc ) break; // Callout NVDIMM low, no gard io_sc.service_data->SetCallout( i_dimm, MRU_LOW, NO_GARD ); + io_errFound = true; } // BIT 4: Energy Policy Not Set if ( bitList.count(4) ) { - io_sc.service_data->AddSignatureList( i_dimm, PRDFSIG_EsPolNotSet ); + __addSignature( io_sc, mca, io_errFound, PRDFSIG_EsPolNotSet ); // Callout FW (Level2 Support) High io_sc.service_data->SetCallout( LEVEL2_SUPPORT, MRU_HIGH, NO_GARD ); // Callout NVDIMM low on 1st, no gard io_sc.service_data->SetCallout( i_dimm, MRU_LOW, NO_GARD ); + io_errFound = true; } // BIT 5: Energy Source HW Error if ( bitList.count(5) ) { - io_sc.service_data->AddSignatureList ( i_dimm, PRDFSIG_EsHwFail ); + __addSignature( io_sc, mca, io_errFound, PRDFSIG_EsHwFail ); // Callout BPM (backup power module) high, cable high o_rc = __addBpmCallout( i_dimm, HWAS::SRCI_PRIORITY_HIGH ); if ( SUCCESS != o_rc ) break; - o_rc = __addNvdimmCableCallout( HWAS::SRCI_PRIORITY_HIGH ); + o_rc = __addNvdimmCableCallout( i_dimm, HWAS::SRCI_PRIORITY_HIGH ); if ( SUCCESS != o_rc ) break; // Callout NVDIMM low, no gard io_sc.service_data->SetCallout( i_dimm, MRU_LOW, NO_GARD ); + io_errFound = true; } // BIT 6: Energy Source Health Assessment Error if ( bitList.count(6) ) { - io_sc.service_data->AddSignatureList(i_dimm, PRDFSIG_EsHlthAssess); + __addSignature( io_sc, mca, io_errFound, PRDFSIG_EsHlthAssess); // Callout BPM (backup power module) high, cable high o_rc = __addBpmCallout( i_dimm, HWAS::SRCI_PRIORITY_HIGH ); if ( SUCCESS != o_rc ) break; - o_rc = __addNvdimmCableCallout( HWAS::SRCI_PRIORITY_HIGH ); + o_rc = __addNvdimmCableCallout( i_dimm, HWAS::SRCI_PRIORITY_HIGH ); if ( SUCCESS != o_rc ) break; // Callout NVDIMM low, no gard io_sc.service_data->SetCallout( i_dimm, MRU_LOW, NO_GARD ); + io_errFound = true; } // BIT 7: Reserved @@ -613,18 +655,105 @@ uint32_t __analyzeHealthStatus1Reg( STEP_CODE_DATA_STRUCT & io_sc, } /** + * @brief Reads and merges the data from two ES_TEMP registers to get the + * correct temperature format. + * @param i_dimm The target nvdimm. + * @param i_tempMsbReg The address of the register that contains the most + * significant byte of the temperature data. + * @param i_tempLsbReg The address of the register that contains the least + * significant byte of the temperature data. + * @param o_tempData The 16 bit temperature data. + * @return FAIL if unable to read register, else SUCCESS + */ +uint32_t __readTemp( TargetHandle_t i_dimm, uint16_t i_tempMsbReg, + uint16_t i_tempLsbReg, uint16_t & o_tempData ) +{ + #define PRDF_FUNC "[__readTemp] " + + /* + * -NOTE: Example showing how to read the temperature format: + * ES_TEMP1 = 0x03 (MSB: bits 15-8) + * ES_TEMP0 = 0x48 (LSB: bits 7-0) + * + * 0x0348 = 0000 0011 0100 1000 = 52.5 C + * + * -NOTE: bit definition: + * [15:13]Reserved + * [12]Sign 0 = positive, 1 = negative; 0°C should be expressed as positive + * [11] 128°C + * [10] 64°C + * [9] 32°C + * [8] 16°C + * [7] 8°C + * [6] 4°C + * [5] 2°C + * [4] 1°C + * [3] 0.5°C + * [2] 0.25°C + * [1] 0.125°C Optional for temp fields; not used for temp th fields + * [0]0.0625°C Optional for temp fields; not used for temp th fields + */ + uint32_t o_rc = SUCCESS; + + do + { + // NVDIMM health status registers size = 1 byte + size_t NVDIMM_SIZE = 1; + uint8_t msbData = 0; + uint8_t lsbData = 0; + + // Read the two inputted temperature registers. + errlHndl_t errl = deviceRead( i_dimm, &msbData, NVDIMM_SIZE, + DEVICE_NVDIMM_ADDRESS(i_tempMsbReg) ); + if ( errl ) + { + PRDF_ERR( PRDF_FUNC "Failed to read ES Temperature MSB Register. " + "HUID: 0x%08x", getHuid(i_dimm) ); + PRDF_COMMIT_ERRL( errl, ERRL_ACTION_REPORT ); + o_rc = FAIL; + break; + } + + errl = deviceRead( i_dimm, &lsbData, NVDIMM_SIZE, + DEVICE_NVDIMM_ADDRESS(i_tempLsbReg) ); + if ( errl ) + { + PRDF_ERR( PRDF_FUNC "Failed to read ES Temperature LSB Register. " + "HUID: 0x%08x", getHuid(i_dimm) ); + PRDF_COMMIT_ERRL( errl, ERRL_ACTION_REPORT ); + o_rc = FAIL; + break; + } + + o_tempData = ((uint16_t)msbData << 8) | lsbData; + + }while(0); + + return o_rc; + + #undef PRDF_FUNC +} + +/** * @brief Analyze NVDIMM Error Threshold Status Register for errors - * @param io_sc The step code data struct. - * @param i_dimm The target dimm. + * @param io_sc The step code data struct. + * @param i_dimm The target dimm. + * @param io_errFound Whether an error has already been found or not. + * @param o_esTempErr A flag for whether we hit an ES TEMP error or not. * @return FAIL if unable to read register, else SUCCESS */ uint32_t __analyzeErrorThrStatusReg( STEP_CODE_DATA_STRUCT & io_sc, - TargetHandle_t i_dimm ) + TargetHandle_t i_dimm, bool & io_errFound, + bool & o_esTempErr ) { #define PRDF_FUNC "[__analyzeErrorThrStatusReg] " uint32_t o_rc = SUCCESS; uint8_t data = 0; + o_esTempErr = false; + + // Get MCA, for signatures + TargetHandle_t mca = getConnectedParent( i_dimm, TYPE_MCA ); do { @@ -633,7 +762,7 @@ uint32_t __analyzeErrorThrStatusReg( STEP_CODE_DATA_STRUCT & io_sc, // Read the Error Threshold Status Register (0xA5) 7:0 errlHndl_t errl = deviceRead( i_dimm, &data, NVDIMM_SIZE, - DEVICE_NVDIMM_ADDRESS(ERROR_THRESHOLD_STATUS) ); + DEVICE_NVDIMM_ADDRESS(NVDIMM::i2cReg::ERROR_THRESHOLD_STATUS) ); if ( errl ) { PRDF_ERR( PRDF_FUNC "Failed to read Error Threshold Status Reg. " @@ -648,7 +777,7 @@ uint32_t __analyzeErrorThrStatusReg( STEP_CODE_DATA_STRUCT & io_sc, // BIT 1: ES Lifetime Error if ( bitList.count(1) ) { - io_sc.service_data->AddSignatureList ( i_dimm, PRDFSIG_EsLifeErr ); + __addSignature( io_sc, mca, io_errFound, PRDFSIG_EsLifeErr ); // Callout BPM (backup power module) high o_rc = __addBpmCallout( i_dimm, HWAS::SRCI_PRIORITY_HIGH ); @@ -656,11 +785,60 @@ uint32_t __analyzeErrorThrStatusReg( STEP_CODE_DATA_STRUCT & io_sc, // Callout NVDIMM low, no gard io_sc.service_data->SetCallout( i_dimm, MRU_LOW, NO_GARD ); + io_errFound = true; } // BIT 2: ES Temperature Error if ( bitList.count(2) ) { - io_sc.service_data->AddSignatureList( i_dimm, PRDFSIG_EsTmpErr ); + // Sleep two seconds to avoid exiting PRD analysis faster than the + // ES_TEMP sample rate. + PlatServices::milliSleep( 2, 0 ); + + // Read the ES_TEMP and ES_TEMP_ERROR_HIGH_THRESHOLD values + uint16_t msbEsTempReg = NVDIMM::i2cReg::ES_TEMP1; + uint16_t lsbEsTempReg = NVDIMM::i2cReg::ES_TEMP0; + uint16_t esTemp = 0; + o_rc = __readTemp( i_dimm, msbEsTempReg, lsbEsTempReg, esTemp ); + if ( SUCCESS != o_rc ) break; + + uint16_t msbThReg = NVDIMM::i2cReg::ES_TEMP_ERROR_HIGH_THRESHOLD1; + uint16_t lsbThReg = NVDIMM::i2cReg::ES_TEMP_ERROR_HIGH_THRESHOLD0; + uint16_t esTempHighTh = 0; + o_rc = __readTemp( i_dimm, msbThReg, lsbThReg, esTempHighTh ); + if ( SUCCESS != o_rc ) break; + + msbThReg = NVDIMM::i2cReg::ES_TEMP_ERROR_LOW_THRESHOLD1; + lsbThReg = NVDIMM::i2cReg::ES_TEMP_ERROR_LOW_THRESHOLD0; + uint16_t esTempLowTh = 0; + o_rc = __readTemp( i_dimm, msbThReg, lsbThReg, esTempLowTh ); + if ( SUCCESS != o_rc ) break; + + // Check to see if the ES_TEMP is negative (bit 12) + bool esTempNeg = false; + if ( esTemp & 0x1000 ) esTempNeg = true; + + // If ES_TEMP is equal or above ES_TEMP_ERROR_HIGH_THRESHOLD + // Just in case ES_TEMP has moved before we read it out, we'll add + // a 2°C margin when comparing to the threshold. + if ( (esTemp >= (esTempHighTh - 0x0020)) && !esTempNeg ) + { + __addSignature( io_sc, mca, io_errFound, + PRDFSIG_EsTmpErrHigh ); + } + // Else check if the error hit the low threshold, again with the + // same 2°C margin. + else if ( (esTemp <= (esTempLowTh + 0x0020)) || esTempNeg ) + { + __addSignature( io_sc, mca, io_errFound, + PRDFSIG_EsTmpErrLow ); + } + // Else the temperature must have gone back to a normal value, so + // we will label this as a false alarm case. + else + { + __addSignature( io_sc, mca, io_errFound, + PRDFSIG_EsTmpErrFa ); + } // Callout BPM (backup power module) high o_rc = __addBpmCallout( i_dimm, HWAS::SRCI_PRIORITY_HIGH ); @@ -668,6 +846,9 @@ uint32_t __analyzeErrorThrStatusReg( STEP_CODE_DATA_STRUCT & io_sc, // Callout NVDIMM low, no gard io_sc.service_data->SetCallout( i_dimm, MRU_LOW, NO_GARD ); + + o_esTempErr = true; + io_errFound = true; } // BIT 3:7: Reserved @@ -680,6 +861,419 @@ uint32_t __analyzeErrorThrStatusReg( STEP_CODE_DATA_STRUCT & io_sc, } /** + * @brief Adjusts the warning threshold so that future warnings are allowed + * to report. + * @param io_sc The step code data struct. + * @param i_dimm The target nvdimm. + * @param i_warnThReg The address of the relevant warning threshold register. + * @param i_errThReg The address of the relevant error threshold register. + * @param o_firstWarn Flag if this is the first warning of this type. + * @param o_statusErr Flag to tell if we found an error from checking the + * notification status register. + * @return FAIL if unable to read register, else SUCCESS + */ +uint32_t __adjustThreshold( STEP_CODE_DATA_STRUCT & io_sc, + TargetHandle_t i_dimm, uint16_t i_warnThReg, + uint16_t i_errThReg, bool & o_firstWarn, + bool & o_statusErr ) +{ + #define PRDF_FUNC "[__adjustThreshold] " + + uint32_t o_rc = SUCCESS; + uint16_t notifCmdReg = NVDIMM::i2cReg::SET_EVENT_NOTIFICATION_CMD; + uint16_t notifStatusReg = NVDIMM::i2cReg::SET_EVENT_NOTIFICATION_STATUS; + o_firstWarn = false; + o_statusErr = false; + + do + { + // NVDIMM health status registers size = 1 byte + size_t NVDIMM_SIZE = 1; + + // Read the corresponding warning threshold + uint8_t warnTh = 0; + errlHndl_t errl = deviceRead( i_dimm, &warnTh, NVDIMM_SIZE, + DEVICE_NVDIMM_ADDRESS(i_warnThReg) ); + if ( errl ) + { + PRDF_ERR( PRDF_FUNC "Failed to read Warning Threshold Reg. HUID: " + "0x%08x", getHuid(i_dimm) ); + PRDF_COMMIT_ERRL( errl, ERRL_ACTION_REPORT ); + o_rc = FAIL; + break; + } + + // Read the corresponding error threshold + uint8_t errTh = 0; + errl = deviceRead( i_dimm, &errTh, NVDIMM_SIZE, + DEVICE_NVDIMM_ADDRESS(i_errThReg) ); + if ( errl ) + { + PRDF_ERR( PRDF_FUNC "Failed to read Error Threshold Reg. HUID: " + "0x%08x", getHuid(i_dimm) ); + PRDF_COMMIT_ERRL( errl, ERRL_ACTION_REPORT ); + o_rc = FAIL; + break; + } + + // If the warning threshold is not set to the error threshold+1, + // move the threshold. + if ( warnTh != (errTh+1) ) + { + o_firstWarn = true; + + // SET_EVENT_NOTIFICATION_CMD is a write only register that is + // used to change the SET_EVENT_NOTIFICATION_STATUS register. + // The only bits within it that are used are bits 0 and 1, as such + // we can safely set the rest to 0. It is defined as: + // [0]: Persistency Notification + // [1]: Warning Threshold Notification + // [2]: Obsolete + // [3]: Firmware Activation Notification (Not Used) + // [4:7]: Reserved + + // Clear SET_EVENT_NOTIFICATION_CMD bit 1 and keep bit 0 set + uint8_t notifCmd = 0x01; + errl = deviceWrite( i_dimm, ¬ifCmd, NVDIMM_SIZE, + DEVICE_NVDIMM_ADDRESS(notifCmdReg) ); + if ( errl ) + { + PRDF_ERR( PRDF_FUNC "Failed to clear Set Event Notification " + "Cmd Reg. HUID: 0x%08x", getHuid(i_dimm) ); + PRDF_COMMIT_ERRL( errl, ERRL_ACTION_REPORT ); + o_rc = FAIL; + break; + } + + // Check SET_EVENT_NOTIFICATION_STATUS to ensure everything is set + // as we expect and we don't see any errors. + uint8_t notifStat = 0; + errl = deviceRead( i_dimm, ¬ifStat, NVDIMM_SIZE, + DEVICE_NVDIMM_ADDRESS(notifStatusReg) ); + if ( errl ) + { + PRDF_ERR( PRDF_FUNC "Failed to read Set Event Notification " + "Status Reg. HUID: 0x%08x", getHuid(i_dimm) ); + PRDF_COMMIT_ERRL( errl, ERRL_ACTION_REPORT ); + o_rc = FAIL; + break; + } + std::map<uint8_t,bool> bitList = __nvdimmGetActiveBits( notifStat ); + + // if Bit [1]: SET_EVENT_NOTIFICATION_ERROR = 1 + // or Bit [2]: PERSISTENCY_ENABLED = 0 + // or Bit [3]: WARNING_THRESHOLD_ENABLED = 1 + if ( bitList.count(1) || !bitList.count(2) || bitList.count(3) ) + { + o_statusErr = true; + + // Make the log predictive and mask the fir + io_sc.service_data->SetThresholdMaskId(0); + + // Callout the NVDIMM, no gard + io_sc.service_data->SetCallout( i_dimm, MRU_MED, NO_GARD ); + + // Send message to PHYP that save/restore may work + o_rc = PlatServices::nvdimmNotifyProtChange( i_dimm, + NVDIMM::NVDIMM_RISKY_HW_ERROR ); + if ( SUCCESS != o_rc ) break; + + break; + } + + + // Set the warning threshold to error threshold + 1 + warnTh = errTh+1; + errl = deviceWrite( i_dimm, &warnTh, NVDIMM_SIZE, + DEVICE_NVDIMM_ADDRESS(i_warnThReg) ); + if ( errl ) + { + PRDF_ERR( PRDF_FUNC "Failed to write Warning Threshold Reg. " + "HUID: 0x%08x", getHuid(i_dimm) ); + PRDF_COMMIT_ERRL( errl, ERRL_ACTION_REPORT ); + o_rc = FAIL; + break; + } + + // Set SET_EVENT_NOTIFICATION_CMD bit 1 and keep bit 0 set + notifCmd = 0x03; + errl = deviceWrite( i_dimm, ¬ifCmd, NVDIMM_SIZE, + DEVICE_NVDIMM_ADDRESS(notifCmdReg) ); + if ( errl ) + { + PRDF_ERR( PRDF_FUNC "Failed to write Set Event Notification " + "Cmd Reg. HUID: 0x%08x", getHuid(i_dimm) ); + PRDF_COMMIT_ERRL( errl, ERRL_ACTION_REPORT ); + o_rc = FAIL; + break; + } + + // Recheck SET_EVENT_NOTIFICATION_STATUS to ensure everything is set + // as we expect and we don't see any errors. + errl = deviceRead( i_dimm, ¬ifStat, NVDIMM_SIZE, + DEVICE_NVDIMM_ADDRESS(notifStatusReg) ); + if ( errl ) + { + PRDF_ERR( PRDF_FUNC "Failed to read Set Event Notification " + "Status Reg. HUID: 0x%08x", getHuid(i_dimm) ); + PRDF_COMMIT_ERRL( errl, ERRL_ACTION_REPORT ); + o_rc = FAIL; + break; + } + bitList = __nvdimmGetActiveBits( notifStat ); + + // if Bit [1]: SET_EVENT_NOTIFICATION_ERROR = 1 + // or Bit [2]: PERSISTENCY_ENABLED = 0 + // or Bit [3]: WARNING_THRESHOLD_ENABLED = 0 + if ( bitList.count(1) || !bitList.count(2) || !bitList.count(3) ) + { + o_statusErr = true; + + // Make the log predictive and mask the fir + io_sc.service_data->SetThresholdMaskId(0); + + // Callout the NVDIMM, no gard + io_sc.service_data->SetCallout( i_dimm, MRU_MED, NO_GARD ); + + // Send message to PHYP that save/restore may work + o_rc = PlatServices::nvdimmNotifyProtChange( i_dimm, + NVDIMM::NVDIMM_RISKY_HW_ERROR ); + if ( SUCCESS != o_rc ) break; + + break; + } + } + // Note: moving the threshold should clear the warning from + // WARNING_THRESHOLD_STATUS, which allows future warnings to report. + + }while(0); + + return o_rc; + + #undef PRDF_FUNC +} + +/** + * @brief Analyze NVDIMM Warning Threshold Status Register for errors + * @param io_sc The step code data struct. + * @param i_dimm The target dimm. + * @param io_errFound Whether an error has already been found or not. + * @return FAIL if unable to read register, else SUCCESS + */ +uint32_t __analyzeWarningThrStatusReg(STEP_CODE_DATA_STRUCT & io_sc, + TargetHandle_t i_dimm, bool & io_errFound) +{ + #define PRDF_FUNC "[__analyzeWarningThrStatusReg] " + + uint32_t o_rc = SUCCESS; + uint8_t data = 0; + + // Get MCA, for signatures + TargetHandle_t mca = getConnectedParent( i_dimm, TYPE_MCA ); + + do + { + // NVDIMM health status registers size = 1 byte + size_t NVDIMM_SIZE = 1; + + // Read the Warning Threshold Status Register (0xA7) 7:0 + errlHndl_t errl = deviceRead( i_dimm, &data, NVDIMM_SIZE, + DEVICE_NVDIMM_ADDRESS(NVDIMM::i2cReg::WARNING_THRESHOLD_STATUS) ); + if ( errl ) + { + PRDF_ERR( PRDF_FUNC "Failed to read Warning Threshold Status Reg. " + "HUID: 0x%08x", getHuid(i_dimm) ); + PRDF_COMMIT_ERRL( errl, ERRL_ACTION_REPORT ); + o_rc = FAIL; + break; + } + std::map<uint8_t,bool> bitList = __nvdimmGetActiveBits( data ); + + // Analyze Bit 2 First + // BIT 2: ES_TEMP_WARNING + if ( bitList.count(2) ) + { + // Sleep two seconds to avoid exiting PRD analysis faster than the + // ES_TEMP sample rate. + PlatServices::milliSleep( 2, 0 ); + + // Read the ES_TEMP and ES_TEMP_WARNING_HIGH_THRESHOLD values + uint16_t msbEsTempReg = NVDIMM::i2cReg::ES_TEMP1; + uint16_t lsbEsTempReg = NVDIMM::i2cReg::ES_TEMP0; + uint16_t esTemp = 0; + o_rc = __readTemp( i_dimm, msbEsTempReg, lsbEsTempReg, esTemp ); + if ( SUCCESS != o_rc ) break; + + uint16_t msbThReg = NVDIMM::i2cReg::ES_TEMP_WARNING_HIGH_THRESHOLD1; + uint16_t lsbThReg = NVDIMM::i2cReg::ES_TEMP_WARNING_HIGH_THRESHOLD0; + uint16_t esTempHighTh = 0; + o_rc = __readTemp( i_dimm, msbThReg, lsbThReg, esTempHighTh ); + if ( SUCCESS != o_rc ) break; + + msbThReg = NVDIMM::i2cReg::ES_TEMP_WARNING_LOW_THRESHOLD1; + lsbThReg = NVDIMM::i2cReg::ES_TEMP_WARNING_LOW_THRESHOLD0; + uint16_t esTempLowTh = 0; + o_rc = __readTemp( i_dimm, msbThReg, lsbThReg, esTempLowTh ); + if ( SUCCESS != o_rc ) break; + + // Check to see if the ES_TEMP is negative (bit 12) + bool esTempNeg = false; + if ( esTemp & 0x1000 ) esTempNeg = true; + + // If ES_TEMP is equal or above ES_TEMP_WARNING_HIGH_THRESHOLD + // Just in case ES_TEMP has moved before we read it out, we'll add + // a 2°C margin when comparing to the threshold. + if ( (esTemp >= (esTempHighTh - 0x0020)) && !esTempNeg ) + { + __addSignature( io_sc, mca, io_errFound, + PRDFSIG_EsTmpWarnHigh ); + } + // Else check if the warning hit the low threshold, again with the + // same 2°C margin. + else if ( (esTemp <= (esTempLowTh + 0x0020)) || esTempNeg ) + { + __addSignature( io_sc, mca, io_errFound, + PRDFSIG_EsTmpWarnLow ); + } + // Else the temperature must have gone back to a normal value, so + // we will label this as a false alarm case. + else + { + __addSignature( io_sc, mca, io_errFound, + PRDFSIG_EsTmpWarnFa ); + } + + // Callout BPM (backup power module) high + o_rc = __addBpmCallout( i_dimm, HWAS::SRCI_PRIORITY_HIGH ); + if ( SUCCESS != o_rc ) break; + + // Callout NVDIMM low, no gard + io_sc.service_data->SetCallout( i_dimm, MRU_LOW, NO_GARD ); + + // Because of the possibility of intermittent ES temperature + // false alarm readings, we will keep the log hidden. If there is + // an actual ES temperature problem, we assume we will continue + // to be called to handle the temperature warning and hit threshold. + + // Only send the save/restore message to PHYP if we hit threshold. + if ( io_sc.service_data->IsAtThreshold() ) + { + // Send message to PHYP that save/restore may work + o_rc = PlatServices::nvdimmNotifyProtChange( i_dimm, + NVDIMM::NVDIMM_RISKY_HW_ERROR ); + if ( SUCCESS != o_rc ) break; + } + + io_errFound = true; + } + // BIT 0: NVM_LIFETIME_WARNING + if ( bitList.count(0) ) + { + // Adjust warning threshold. + uint16_t warnThReg = NVDIMM::i2cReg::NVM_LIFETIME_WARNING_THRESHOLD; + uint16_t errThReg = NVDIMM::i2cReg::NVM_LIFETIME_ERROR_THRESHOLD; + bool firstWarn = false; + bool statusErr = false; + o_rc = __adjustThreshold( io_sc, i_dimm, warnThReg, errThReg, + firstWarn, statusErr ); + if ( SUCCESS != o_rc ) break; + + // Make the log predictive, but do not mask the FIR + io_sc.service_data->setServiceCall(); + + // If we got a set event notification status error, add the + // signature for that before adding the signature for the warning. + // Also do not take our normal callout action since we already will + // have called out the NVDIMM because of the status error. + if ( statusErr ) + { + __addSignature( io_sc, mca, io_errFound, PRDFSIG_NotifStatErr ); + + // Need to set io_errFound here so the warning signature is + // added to the multi-signature list instead of as the primary + // signature. + io_errFound = true; + } + else + { + // Callout NVDIMM on 1st, no gard + io_sc.service_data->SetCallout( i_dimm, MRU_MED, NO_GARD ); + } + + // Update signature depending on whether this is the first or second + // warning of this type. + if ( firstWarn ) + { + __addSignature( io_sc, mca, io_errFound, PRDFSIG_NvmLifeWarn1 ); + } + else + { + __addSignature( io_sc, mca, io_errFound, PRDFSIG_NvmLifeWarn2 ); + } + + + io_errFound = true; + } + // BIT 1: ES_LIFETIME_WARNING + if ( bitList.count(1) ) + { + // Adjust warning threshold. + uint16_t warnThReg = NVDIMM::i2cReg::ES_LIFETIME_WARNING_THRESHOLD; + uint16_t errThReg = NVDIMM::i2cReg::ES_LIFETIME_ERROR_THRESHOLD; + bool firstWarn = false; + bool statusErr = false; + o_rc = __adjustThreshold( io_sc, i_dimm, warnThReg, errThReg, + firstWarn, statusErr ); + if ( SUCCESS != o_rc ) break; + + // Make the log predictive, but do not mask the FIR + io_sc.service_data->setServiceCall(); + + // If we got a set event notification status error, add the + // signature for that before adding the signature for the warning. + // Also do not take our normal callout action since we already will + // have called out the NVDIMM because of the status error. + if ( statusErr ) + { + __addSignature( io_sc, mca, io_errFound, PRDFSIG_NotifStatErr ); + + // Need to set io_errFound here so the warning signature is + // added to the multi-signature list instead of as the primary + // signature. + io_errFound = true; + } + else + { + // Callout BPM (backup power module) high + o_rc = __addBpmCallout( i_dimm, HWAS::SRCI_PRIORITY_HIGH ); + if ( SUCCESS != o_rc ) break; + + // Callout NVDIMM low, no gard + io_sc.service_data->SetCallout( i_dimm, MRU_LOW, NO_GARD ); + } + + // Update signature depending on whether this is the first or second + // warning of this type. + if ( firstWarn ) + { + __addSignature(io_sc, mca, io_errFound, PRDFSIG_EsLifeWarn1); + } + else + { + __addSignature(io_sc, mca, io_errFound, PRDFSIG_EsLifeWarn2); + } + + io_errFound = true; + } + + }while(0); + + return o_rc; + + #undef PRDF_FUNC +} + +/** * @brief De-assert the EVENT_N pin by setting bit 2 in NVDIMM_MGT_CMD1 (0x41) * @param i_dimm The target dimm. * @return FAIL if unable to read/write register, else SUCCESS @@ -698,7 +1292,7 @@ uint32_t __deassertEventN( TargetHandle_t i_dimm ) // Read the NVDIMM_MGT_CMD1 register (0x41) 7:0 errlHndl_t errl = deviceRead( i_dimm, &data, NVDIMM_SIZE, - DEVICE_NVDIMM_ADDRESS(NVDIMM_MGT_CMD1) ); + DEVICE_NVDIMM_ADDRESS(NVDIMM::i2cReg::NVDIMM_MGT_CMD1) ); if ( errl ) { PRDF_ERR( PRDF_FUNC "Failed to read NVDIMM_MGT_CMD1. " @@ -713,7 +1307,7 @@ uint32_t __deassertEventN( TargetHandle_t i_dimm ) // Write the updated data back to NVDIMM_MGT_CMD1 errl = deviceWrite( i_dimm, &data, NVDIMM_SIZE, - DEVICE_NVDIMM_ADDRESS(NVDIMM_MGT_CMD1) ); + DEVICE_NVDIMM_ADDRESS(NVDIMM::i2cReg::NVDIMM_MGT_CMD1) ); if ( errl ) { PRDF_ERR( PRDF_FUNC "Failed to write NVDIMM_MGT_CMD1. " @@ -732,6 +1326,7 @@ uint32_t __deassertEventN( TargetHandle_t i_dimm ) } #endif // HOSTBOOT_RUNTIME +#endif // CONFIG_NVDIMM /** * @brief MCACALFIR[8] - Error from NVDIMM health status registers @@ -744,13 +1339,28 @@ int32_t AnalyzeNvdimmHealthStatRegs( ExtensibleChip * i_chip, { #define PRDF_FUNC "[nimbus_mca::AnalyzeNvdimmHealthStatRegs] " + #ifdef CONFIG_NVDIMM #ifdef __HOSTBOOT_RUNTIME uint32_t l_rc = SUCCESS; + bool errFound = false; // We need to check both dimms for errors for ( auto & dimm : getConnected(i_chip->getTrgt(), TYPE_DIMM) ) { + // Skip any non-NVDIMMs + if ( !isNVDIMM(dimm) ) continue; + + // Add SMART-specific, page 4 registers to FFDC + errlHndl_t mainErrl = nullptr; + mainErrl = ServiceGeneratorClass::ThisServiceGenerator().getErrl(); + if ( nullptr == mainErrl ) + { + PRDF_ERR( PRDF_FUNC "Failed to get the global error log." ); + continue; + } + PlatServices::nvdimmAddFfdc( dimm, mainErrl ); + // De-assert the EVENT_N pin by setting bit 2 in NVDIMM_MGT_CMD1 l_rc = __deassertEventN( dimm ); if ( SUCCESS != l_rc ) continue; @@ -762,7 +1372,7 @@ int32_t AnalyzeNvdimmHealthStatRegs( ExtensibleChip * i_chip, // Read the Module Health Register (0xA0) 7:0 errlHndl_t errl = deviceRead( dimm, &data, NVDIMM_SIZE, - DEVICE_NVDIMM_ADDRESS(MODULE_HEALTH) ); + DEVICE_NVDIMM_ADDRESS(NVDIMM::i2cReg::MODULE_HEALTH) ); if ( errl ) { PRDF_ERR( PRDF_FUNC "Failed to read Module Health Register. " @@ -775,6 +1385,30 @@ int32_t AnalyzeNvdimmHealthStatRegs( ExtensibleChip * i_chip, // BIT 0: Persistency Lost if ( bitList.count(0) ) { + // Analyze Health Status0 Reg, Health Status1 Reg, + // and Error Theshold Status Reg + l_rc = __analyzeHealthStatus0Reg( io_sc, dimm, errFound ); + if ( SUCCESS != l_rc ) continue; + l_rc = __analyzeHealthStatus1Reg( io_sc, dimm, errFound ); + if ( SUCCESS != l_rc ) continue; + bool esTempErr = false; + l_rc = __analyzeErrorThrStatusReg(io_sc, dimm, errFound, esTempErr); + if ( SUCCESS != l_rc ) continue; + + // If we hit an ES temperature error and have not yet hit threshold, + // then keep the log hidden. + if ( esTempErr && !io_sc.service_data->IsAtThreshold() ) continue; + + // If we didn't find any error, then keep the log hidden. + if ( !errFound ) + { + io_sc.service_data->setSignature( i_chip->getHuid(), + PRDFSIG_FirEvntGone ); + // Callout NVDIMM + io_sc.service_data->SetCallout( dimm, MRU_MED, NO_GARD ); + continue; + } + // EVENT_N cannot be retriggered on a new PERSISTENCY_LOST_ERROR // if a previous PERSISTENCY_LOST_ERROR still exists. Meaning, we // cannot detect/report multiple errors that happen at different @@ -782,43 +1416,77 @@ int32_t AnalyzeNvdimmHealthStatRegs( ExtensibleChip * i_chip, // and make the log predictive. io_sc.service_data->SetThresholdMaskId(0); - // Send persistency lost message to PHYP - l_rc = PlatServices::nvdimmNotifyPhypProtChange( dimm, - NVDIMM::UNPROTECTED_BECAUSE_ERROR ); + // Send message to PHYP that save/restore may work + l_rc = PlatServices::nvdimmNotifyProtChange( dimm, + NVDIMM::NVDIMM_RISKY_HW_ERROR ); if ( SUCCESS != l_rc ) continue; - // Analyze Health Status0 Reg, Health Status1 Reg, - // and Error Theshold Status Reg - l_rc = __analyzeHealthStatus0Reg( io_sc, dimm ); - if ( SUCCESS != l_rc ) continue; - l_rc = __analyzeHealthStatus1Reg( io_sc, dimm ); - if ( SUCCESS != l_rc ) continue; - l_rc = __analyzeErrorThrStatusReg( io_sc, dimm ); + } + // BIT 1: Warning Threshold Exceeded + else if ( bitList.count(1) ) + { + l_rc = __analyzeWarningThrStatusReg( io_sc, dimm, errFound ); if ( SUCCESS != l_rc ) continue; + + if ( !errFound ) + { + io_sc.service_data->setSignature( i_chip->getHuid(), + PRDFSIG_FirEvntGone ); + // Callout NVDIMM + io_sc.service_data->SetCallout( dimm, MRU_MED, NO_GARD ); + continue; + } } - // BIT 1: Warning Threshold Exceeded -- ignore // BIT 2: Persistency Restored - if ( bitList.count(2) ) + else if ( bitList.count(2) ) { // It would be rare to have an intermittent error that comes and // goes so fast we only see PERSISTENCY_RESTORED and not // PERSISTENCY_LOST_ERROR. Set predictive on threshold of 32 // per day (rule code handles the thresholding), else just keep // as a hidden log. - io_sc.service_data->AddSignatureList( dimm, PRDFSIG_NvdimmPersRes ); + __addSignature( io_sc, i_chip->getTrgt(), errFound, + PRDFSIG_NvdimmPersRes ); + + // Callout NVDIMM + io_sc.service_data->SetCallout( dimm, MRU_MED, NO_GARD ); + } + // BIT 3: Below Warning Threshold + else if ( bitList.count(3) ) + { + // Much like the persistency restored bit above, we don't expect + // to see this, so just make a hidden log. + __addSignature( io_sc, i_chip->getTrgt(), errFound, + PRDFSIG_BelowWarnTh ); + + // Callout NVDIMM + io_sc.service_data->SetCallout( dimm, MRU_MED, NO_GARD ); + } + // BIT 4: Hardware Failure -- ignore - no logic feeding this + // BIT 5: EVENT_N_LOW -- ignore + // BIT 6:7: Unused + + // If we reach a threshold on MCACALFIR[8] of 32 per day, we assume + // some intermittent error must be triggering the FIR that isn't a + // persistency lost error which would cause us to mask. The rule code + // handles the actual thresholding here. + if ( io_sc.service_data->IsAtThreshold() && !errFound ) + { + io_sc.service_data->setSignature( i_chip->getHuid(), + PRDFSIG_IntNvdimmErr ); // callout NVDIMM high, cable high, BPM high, no gard io_sc.service_data->SetCallout( dimm, MRU_HIGH, NO_GARD ); l_rc = __addBpmCallout( dimm, HWAS::SRCI_PRIORITY_HIGH ); if ( SUCCESS != l_rc ) continue; - l_rc = __addNvdimmCableCallout( HWAS::SRCI_PRIORITY_HIGH ); + l_rc = __addNvdimmCableCallout( dimm, HWAS::SRCI_PRIORITY_HIGH ); if ( SUCCESS != l_rc ) continue; - } - // BIT 3: Below Warning Threshold -- ignore - // BIT 4: Hardware Failure -- ignore - // BIT 5: EVENT_N_LOW -- ignore - // BIT 6:7: Unused + // Send message to PHYP that save/restore may work + l_rc = PlatServices::nvdimmNotifyProtChange( dimm, + NVDIMM::NVDIMM_RISKY_HW_ERROR ); + if ( SUCCESS != l_rc ) continue; + } } #else // IPL only @@ -826,7 +1494,14 @@ int32_t AnalyzeNvdimmHealthStatRegs( ExtensibleChip * i_chip, PRDF_ERR( PRDF_FUNC "Unexpected call to analyze NVDIMMs at IPL." ); io_sc.service_data->SetCallout( LEVEL2_SUPPORT, MRU_HIGH, NO_GARD ); - #endif + #endif // end runtime vs IPL check + + #else // CONFIG_NVDIMM not defined + + PRDF_ERR( PRDF_FUNC "CONFIG_NVDIMM not defined." ); + io_sc.service_data->SetCallout( LEVEL2_SUPPORT, MRU_HIGH, NO_GARD ); + + #endif // end CONFIG_NVDIMM check return SUCCESS; // nothing to return to rule code diff --git a/src/usr/diag/prdf/plat/mem/prdfP9Mcbist.C b/src/usr/diag/prdf/plat/mem/prdfP9Mcbist.C index 4a4391c0c..0e11b1a86 100644 --- a/src/usr/diag/prdf/plat/mem/prdfP9Mcbist.C +++ b/src/usr/diag/prdf/plat/mem/prdfP9Mcbist.C @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2016,2018 */ +/* Contributors Listed Below - COPYRIGHT 2016,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -301,9 +301,9 @@ int32_t commandAddrTimeout( ExtensibleChip * i_chip, // was executed. Restarting the command will likely fail with the same // issue. Callout and gard all MCAs in which the command was executed. - std::vector<ExtensibleChip *> mcaList; + ExtensibleChipList mcaList; - if ( SUCCESS != getMcbistMaintPort(i_chip, mcaList) ) + if ( SUCCESS != getMcbistMaintPort<TYPE_MCBIST>(i_chip, mcaList) ) { PRDF_ERR( PRDF_FUNC "getMcbistMaintPort(0x%08x) failed", i_chip->getHuid() ); diff --git a/src/usr/diag/prdf/plat/mem/prdfP9McbistDataBundle.H b/src/usr/diag/prdf/plat/mem/prdfP9McbistDataBundle.H index 4a284253a..44ef77ec7 100644 --- a/src/usr/diag/prdf/plat/mem/prdfP9McbistDataBundle.H +++ b/src/usr/diag/prdf/plat/mem/prdfP9McbistDataBundle.H @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2016 */ +/* Contributors Listed Below - COPYRIGHT 2016,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -36,6 +36,7 @@ // Platform includes #include <prdfMemTdCtlr.H> #include <prdfPlatServices.H> +#include <prdfThresholdUtils.H> namespace PRDF { @@ -81,6 +82,24 @@ class McbistDataBundle : public DataBundle /** The Targeted Diagnostics controller. */ MemTdCtlr<TARGETING::TYPE_MCBIST> * iv_tdCtlr = nullptr; + + public: // instance variables + #ifdef __HOSTBOOT_RUNTIME + + // These are used to limit the number of times a scrub command will stop + // on a UE or CE on a rank. This is to prevent potential flooding of + // maintenance UEs or CEs. The threshold will be 16 per rank for each. + TimeBasedThreshold iv_ueStopCounter = + TimeBasedThreshold( 16, ThresholdResolution::TEN_HOURS ); + TimeBasedThreshold iv_ceStopCounter = + TimeBasedThreshold( 16, ThresholdResolution::TEN_HOURS ); + + // If we stop on a UE or a CE, we will need to store the rank that the + // error is on so that we can clear our respective thresholds if the + // next error we stop on is on a different rank. + MemRank iv_ceUeRank; + + #endif }; /** diff --git a/src/usr/diag/prdf/plat/mem/prdfRestoreDramRepairs.C b/src/usr/diag/prdf/plat/mem/prdfRestoreDramRepairs.C index ef3a143eb..fc389000a 100644 --- a/src/usr/diag/prdf/plat/mem/prdfRestoreDramRepairs.C +++ b/src/usr/diag/prdf/plat/mem/prdfRestoreDramRepairs.C @@ -99,7 +99,7 @@ void commitErrl( errlHndl_t i_errl, TargetHandle_t i_trgt ) template<TARGETING::TYPE T> void __calloutDimm( errlHndl_t & io_errl, TargetHandle_t i_portTrgt, - TargetHandle_t i_dimmTrgt ) + TargetHandle_t i_dimmTrgt, bool i_nvdimmNoGard = false ) { #define PRDF_FUNC "[RDR::__calloutDimm] " @@ -109,9 +109,31 @@ void __calloutDimm( errlHndl_t & io_errl, TargetHandle_t i_portTrgt, PRDF_ASSERT( nullptr != i_dimmTrgt ); PRDF_ASSERT( TYPE_DIMM == getTargetType(i_dimmTrgt) ); - // Callout the DIMM. + HWAS::DeconfigEnum deconfigPolicy = HWAS::DELAYED_DECONFIG; + HWAS::GARD_ErrorType gardPolicy = HWAS::GARD_Predictive; + + #ifdef CONFIG_NVDIMM + // For the "RDR: All repairs used" case, If the DIMM is an NVDIMM, change + // the gard and deconfig options to no gard/deconfig and call + // nvdimmNotifyProtChange to indicate a save/restore may work. + if ( i_nvdimmNoGard ) + { + deconfigPolicy = HWAS::NO_DECONFIG; + gardPolicy = HWAS::GARD_NULL; + + uint32_t l_rc = PlatServices::nvdimmNotifyProtChange( i_dimmTrgt, + NVDIMM::NVDIMM_RISKY_HW_ERROR ); + if ( SUCCESS != l_rc ) + { + PRDF_TRAC( PRDF_FUNC "nvdimmNotifyProtChange(0x%08x) " + "failed.", PlatServices::getHuid(i_dimmTrgt) ); + } + } + #endif + io_errl->addHwCallout( i_dimmTrgt, HWAS::SRCI_PRIORITY_HIGH, - HWAS::DELAYED_DECONFIG, HWAS::GARD_Predictive ); + deconfigPolicy, gardPolicy ); + // Clear the VPD on this DIMM. The DIMM has been garded, but it is possible // the customer will want to ungard the DIMM. Without clearing the VPD, the @@ -120,16 +142,20 @@ void __calloutDimm( errlHndl_t & io_errl, TargetHandle_t i_portTrgt, // customer takes the risk of ungarding the DIMM (that they should replace), // the repairs will need to be rediscovered. - std::vector<MemRank> ranks; - getMasterRanks<T>( i_portTrgt, ranks, getDimmSlct(i_dimmTrgt) ); - - for ( auto & rank : ranks ) + // Do not clear the VPD if we had an NVDIMM that we avoided garding. + if ( !i_nvdimmNoGard ) { - if ( SUCCESS != clearBadDqBitmap(i_portTrgt, rank) ) + std::vector<MemRank> ranks; + getMasterRanks<T>( i_portTrgt, ranks, getDimmSlct(i_dimmTrgt) ); + + for ( auto & rank : ranks ) { - PRDF_ERR( PRDF_FUNC "clearBadDqBitmap(0x%08x,0x%02x) failed", - getHuid(i_portTrgt), rank.getKey() ); - continue; + if ( SUCCESS != clearBadDqBitmap(i_portTrgt, rank) ) + { + PRDF_ERR( PRDF_FUNC "clearBadDqBitmap(0x%08x,0x%02x) failed", + getHuid(i_portTrgt), rank.getKey() ); + continue; + } } } @@ -156,11 +182,7 @@ void commitSoftError( uint32_t i_reasonCode, TargetHandle_t i_trgt, //------------------------------------------------------------------------------ template<TARGETING::TYPE T> -bool processRepairedRanks( TargetHandle_t i_trgt, uint8_t i_repairedRankMask ); - -template<> -bool processRepairedRanks<TYPE_MCA>( TargetHandle_t i_trgt, - uint8_t i_repairedRankMask ) +bool processRepairedRanks( TargetHandle_t i_trgt, uint8_t i_repairedRankMask ) { #define PRDF_FUNC "[processRepairedRanks] " @@ -179,7 +201,7 @@ bool processRepairedRanks<TYPE_MCA>( TargetHandle_t i_trgt, // map value has no significance. std::map<TargetHandle_t, uint32_t> calloutList; - ExtensibleChip * mcaChip = (ExtensibleChip *)systemPtr->GetChip(i_trgt); + ExtensibleChip * chip = (ExtensibleChip *)systemPtr->GetChip(i_trgt); for ( uint8_t r = 0; r < MASTER_RANKS_PER_PORT; ++r ) { @@ -191,20 +213,18 @@ bool processRepairedRanks<TYPE_MCA>( TargetHandle_t i_trgt, MemRank rank ( r ); MemMark cm; - if ( SUCCESS != MarkStore::readChipMark<TYPE_MCA>( mcaChip, rank, - cm ) ) + if ( SUCCESS != MarkStore::readChipMark<T>( chip, rank, cm ) ) { - PRDF_ERR( PRDF_FUNC "readChipMark<TYPE_MCA>(0x%08x,0x%02x) " - "failed", mcaChip->getHuid(), rank.getKey() ); + PRDF_ERR( PRDF_FUNC "readChipMark<T>(0x%08x,0x%02x) " + "failed", chip->getHuid(), rank.getKey() ); continue; // skip this rank } MemMark sm; - if ( SUCCESS != MarkStore::readSymbolMark<TYPE_MCA>( mcaChip, rank, - sm ) ) + if ( SUCCESS != MarkStore::readSymbolMark<T>( chip, rank, sm ) ) { - PRDF_ERR( PRDF_FUNC "readSymbolMark<TYPE_MCA>(0x%08x,0x%02x) " - "failed", mcaChip->getHuid(), rank.getKey() ); + PRDF_ERR( PRDF_FUNC "readSymbolMark<T>(0x%08x,0x%02x) " + "failed", chip->getHuid(), rank.getKey() ); continue; // skip this rank } @@ -214,9 +234,8 @@ bool processRepairedRanks<TYPE_MCA>( TargetHandle_t i_trgt, if ( NULL == errl ) { - errl = createErrl<TYPE_MCA>( PRDF_DETECTED_FAIL_HARDWARE, - i_trgt, - PRDFSIG_RdrRepairsUsed ); + errl = createErrl<T>( PRDF_DETECTED_FAIL_HARDWARE, + i_trgt, PRDFSIG_RdrRepairsUsed ); } std::vector<MemSymbol> symList; @@ -246,16 +265,21 @@ bool processRepairedRanks<TYPE_MCA>( TargetHandle_t i_trgt, // Callout all DIMMs in the map. for ( auto const & dimm : calloutList ) { - __calloutDimm<TYPE_MCA>( errl, i_trgt, dimm.first ); + bool nvdimmNoGard = false; + #ifdef CONFIG_NVDIMM + if ( isNVDIMM(dimm.first) ) nvdimmNoGard = true; + #endif + + __calloutDimm<T>( errl, i_trgt, dimm.first, nvdimmNoGard ); } // Commit the error log, if needed. - commitErrl<TYPE_MCA>( errl, i_trgt ); + commitErrl<T>( errl, i_trgt ); // Commit an additional error log indicating something failed in the // analysis, if needed. - commitSoftError<TYPE_MCA>( PRDF_DETECTED_FAIL_SOFTWARE, i_trgt, - PRDFSIG_RdrInternalFail, analysisErrors ); + commitSoftError<T>( PRDF_DETECTED_FAIL_SOFTWARE, i_trgt, + PRDFSIG_RdrInternalFail, analysisErrors ); }while(0); return o_calloutMade; @@ -263,6 +287,14 @@ bool processRepairedRanks<TYPE_MCA>( TargetHandle_t i_trgt, #undef PRDF_FUNC } + +template +bool processRepairedRanks<TYPE_MCA>( TargetHandle_t i_trgt, + uint8_t i_repairedRankMask ); +template +bool processRepairedRanks<TYPE_OCMB_CHIP>( TargetHandle_t i_trgt, + uint8_t i_repairedRankMask ); + //------------------------------------------------------------------------------ template<> @@ -368,7 +400,12 @@ bool processRepairedRanks<TYPE_MBA>( TargetHandle_t i_trgt, // Callout all DIMMs in the map. for ( auto const & dimm : calloutList ) { - __calloutDimm<TYPE_MBA>( errl, i_trgt, dimm.first ); + bool nvdimmNoGard = false; + #ifdef CONFIG_NVDIMM + if ( isNVDIMM(dimm.first) ) nvdimmNoGard = true; + #endif + + __calloutDimm<TYPE_MBA>(errl, i_trgt, dimm.first, nvdimmNoGard); } o_calloutMade = true; @@ -392,10 +429,7 @@ bool processRepairedRanks<TYPE_MBA>( TargetHandle_t i_trgt, template<TARGETING::TYPE T> -bool processBadDimms( TargetHandle_t i_trgt, uint8_t i_badDimmMask ); - -template<> -bool processBadDimms<TYPE_MCA>( TargetHandle_t i_trgt, uint8_t i_badDimmMask ) +bool processBadDimms( TargetHandle_t i_trgt, uint8_t i_badDimmMask ) { #define PRDF_FUNC "[processBadDimms] " @@ -421,29 +455,35 @@ bool processBadDimms<TYPE_MCA>( TargetHandle_t i_trgt, uint8_t i_badDimmMask ) { if ( NULL == errl ) { - errl = createErrl<TYPE_MCA>( PRDF_DETECTED_FAIL_HARDWARE, - i_trgt, PRDFSIG_RdrRepairUnavail ); + errl = createErrl<T>( PRDF_DETECTED_FAIL_HARDWARE, + i_trgt, PRDFSIG_RdrRepairUnavail ); } - __calloutDimm<TYPE_MCA>( errl, i_trgt, dimm ); + __calloutDimm<T>( errl, i_trgt, dimm ); o_calloutMade = true; } } // Commit the error log, if needed. - commitErrl<TYPE_MCA>( errl, i_trgt ); + commitErrl<T>( errl, i_trgt ); // Commit an additional error log indicating something failed in the // analysis, if needed. - commitSoftError<TYPE_MCA>( PRDF_DETECTED_FAIL_SOFTWARE, i_trgt, - PRDFSIG_RdrInternalFail, analysisErrors ); + commitSoftError<T>( PRDF_DETECTED_FAIL_SOFTWARE, i_trgt, + PRDFSIG_RdrInternalFail, analysisErrors ); return o_calloutMade; #undef PRDF_FUNC } +template +bool processBadDimms<TYPE_MCA>( TargetHandle_t i_trgt, uint8_t i_badDimmMask ); +template +bool processBadDimms<TYPE_OCMB_CHIP>( TargetHandle_t i_trgt, + uint8_t i_badDimmMask ); + //------------------------------------------------------------------------------ template<> @@ -580,6 +620,25 @@ void deployDramSpares<TYPE_MBA>( TargetHandle_t i_trgt, } } +template<> +void deployDramSpares<TYPE_OCMB_CHIP>( TargetHandle_t i_trgt, + const std::vector<MemRank> & i_ranks ) +{ + for ( auto & rank : i_ranks ) + { + MemSymbol sym = MemSymbol::fromSymbol( i_trgt, rank, 71 ); + + int32_t l_rc = mssSetSteerMux<TYPE_OCMB_CHIP>(i_trgt, rank, sym, false); + if ( SUCCESS != l_rc ) + { + // mssSetSteerMux() will print a trace and commit the error log, + // however, we need to handle the return code or we get a compile + // warning in Hostboot. + continue; + } + } +} + } // end namespace RDR //------------------------------------------------------------------------------ @@ -680,6 +739,8 @@ template uint32_t restoreDramRepairs<TYPE_MCA>( TargetHandle_t i_trgt ); template uint32_t restoreDramRepairs<TYPE_MBA>( TargetHandle_t i_trgt ); +template +uint32_t restoreDramRepairs<TYPE_OCMB_CHIP>( TargetHandle_t i_trgt ); //------------------------------------------------------------------------------ diff --git a/src/usr/diag/prdf/plat/prdfPlatServices.C b/src/usr/diag/prdf/plat/prdfPlatServices.C index 8c17c2fd9..0ad247134 100644 --- a/src/usr/diag/prdf/plat/prdfPlatServices.C +++ b/src/usr/diag/prdf/plat/prdfPlatServices.C @@ -40,6 +40,8 @@ #include <prdfRegisterCache.H> #include <prdfCenMbaDataBundle.H> +#include <prdfP9McbistDataBundle.H> +#include <prdfOcmbDataBundle.H> #include <prdfMemScrubUtils.H> #include <iipServiceDataCollector.h> @@ -50,7 +52,7 @@ #include <time.h> #include <initservice/initserviceif.H> #include <devicefw/userif.H> -#include <iipMopRegisterAccess.h> +#include <prdfHomRegisterAccess.H> #include <ibscomreasoncodes.H> #include <scom/scomreasoncodes.H> #include <p9_proc_gettracearray.H> @@ -58,6 +60,13 @@ #include <p9c_mss_maint_cmds.H> #include <prdfParserUtils.H> #include <p9c_mss_rowRepairFuncs.H> +#include <errl/errludlogregister.H> + +#include <hwp_wrappers.H> + +#ifdef CONFIG_NVDIMM +#include <nvdimm.H> +#endif using namespace TARGETING; @@ -387,31 +396,31 @@ uint32_t getMemAddrRange<TYPE_MCA>( ExtensibleChip * i_chip, //------------------------------------------------------------------------------ template<> -uint32_t getMemAddrRange<TYPE_MEM_PORT>( ExtensibleChip * i_chip, - const MemRank & i_rank, - mss::mcbist::address & o_startAddr, - mss::mcbist::address & o_endAddr, - AddrRangeType i_rangeType ) +uint32_t getMemAddrRange<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemRank & i_rank, + mss::mcbist::address & o_startAddr, + mss::mcbist::address & o_endAddr, + AddrRangeType i_rangeType ) { - #define PRDF_FUNC "[PlatServices::getMemAddrRange<TYPE_MEM_PORT>] " + #define PRDF_FUNC "[PlatServices::getMemAddrRange<TYPE_OCMB_CHIP>] " - PRDF_ASSERT( nullptr != i_chip ); - PRDF_ASSERT( TYPE_MEM_PORT == i_chip->getType() ); + #ifdef CONFIG_AXONE - /* TODO RTC 207273 - no HWP support yet - uint32_t port = i_chip->getPos() % MAX_PORT_PER_OCMB; + PRDF_ASSERT( nullptr != i_chip ); + PRDF_ASSERT( TYPE_OCMB_CHIP == i_chip->getType() ); + // TODO RTC 210072 - support for multiple ports if ( SLAVE_RANK == i_rangeType ) { FAPI_CALL_HWP_NORETURN( mss::mcbist::address::get_srank_range, - port, i_rank.getDimmSlct(), + 0, i_rank.getDimmSlct(), i_rank.getRankSlct(), i_rank.getSlave(), o_startAddr, o_endAddr ); } else if ( MASTER_RANK == i_rangeType ) { FAPI_CALL_HWP_NORETURN( mss::mcbist::address::get_mrank_range, - port, i_rank.getDimmSlct(), + 0, i_rank.getDimmSlct(), i_rank.getRankSlct(), o_startAddr, o_endAddr ); } else @@ -419,7 +428,8 @@ uint32_t getMemAddrRange<TYPE_MEM_PORT>( ExtensibleChip * i_chip, PRDF_ERR( PRDF_FUNC "unsupported range type %d", i_rangeType ); PRDF_ASSERT(false); } - */ + + #endif return SUCCESS; @@ -520,15 +530,15 @@ uint32_t getMemAddrRange<TYPE_MCA>( ExtensibleChip * i_chip, //------------------------------------------------------------------------------ template<> -uint32_t getMemAddrRange<TYPE_MEM_PORT>( ExtensibleChip * i_chip, - const MemRank & i_rank, - MemAddr & o_startAddr, - MemAddr & o_endAddr, - AddrRangeType i_rangeType ) +uint32_t getMemAddrRange<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemRank & i_rank, + MemAddr & o_startAddr, + MemAddr & o_endAddr, + AddrRangeType i_rangeType ) { mss::mcbist::address saddr, eaddr; - uint32_t o_rc = getMemAddrRange<TYPE_MEM_PORT>( i_chip, i_rank, saddr, - eaddr, i_rangeType ); + uint32_t o_rc = getMemAddrRange<TYPE_OCMB_CHIP>( i_chip, i_rank, saddr, + eaddr, i_rangeType ); if ( SUCCESS == o_rc ) { o_startAddr = __convertMssMcbistAddr( saddr ); @@ -630,16 +640,16 @@ uint32_t getMemAddrRange<TYPE_MCA>( ExtensibleChip * i_chip, uint8_t i_dimmSlct ); template -uint32_t getMemAddrRange<TYPE_MEM_PORT>( ExtensibleChip * i_chip, - mss::mcbist::address & o_startAddr, - mss::mcbist::address & o_endAddr, - uint8_t i_dimmSlct ); +uint32_t getMemAddrRange<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + mss::mcbist::address & o_startAddr, + mss::mcbist::address & o_endAddr, + uint8_t i_dimmSlct ); template -uint32_t getMemAddrRange<TYPE_MEM_PORT>( ExtensibleChip * i_chip, - MemAddr & o_startAddr, - MemAddr & o_endAddr, - uint8_t i_dimmSlct ); +uint32_t getMemAddrRange<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + MemAddr & o_startAddr, + MemAddr & o_endAddr, + uint8_t i_dimmSlct ); //------------------------------------------------------------------------------ @@ -696,17 +706,16 @@ bool isRowRepairEnabled<TYPE_MCA>( ExtensibleChip * i_chip, } template<> -bool isRowRepairEnabled<TYPE_MEM_PORT>( ExtensibleChip * i_chip, - const MemRank & i_rank ) +bool isRowRepairEnabled<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemRank & i_rank ) { - #define PRDF_FUNC "[PlatServices::isRowRepairEnabled<TYPE_MEM_PORT>] " + #define PRDF_FUNC "[PlatServices::isRowRepairEnabled<TYPE_OCMB_CHIP>] " PRDF_ASSERT( nullptr != i_chip ); - PRDF_ASSERT( TYPE_MEM_PORT == i_chip->getType() ); + PRDF_ASSERT( TYPE_OCMB_CHIP == i_chip->getType() ); bool o_isEnabled = false; - /* TODO RTC 207273 - no HWP support yet do { // Don't do row repair if DRAM repairs is disabled. @@ -732,13 +741,110 @@ bool isRowRepairEnabled<TYPE_MEM_PORT>( ExtensibleChip * i_chip, } }while(0); - */ return o_isEnabled; #undef PRDF_FUNC } +//------------------------------------------------------------------------------ + +#ifdef CONFIG_NVDIMM +uint32_t nvdimmNotifyProtChange( TARGETING::TargetHandle_t i_target, + const NVDIMM::nvdimm_protection_t i_state ) +{ + #define PRDF_FUNC "[PlatServices::nvdimmNotifyProtChange] " + + uint32_t o_rc = SUCCESS; + + errlHndl_t errl = NVDIMM::notifyNvdimmProtectionChange( i_target, i_state ); + if ( nullptr != errl ) + { + PRDF_ERR( PRDF_FUNC "NVDIMM::notifyNvdimmProtectionChange(0x%08x) " + "failed.", getHuid(i_target) ); + PRDF_COMMIT_ERRL( errl, ERRL_ACTION_REPORT ); + o_rc = FAIL; + } + + return o_rc; + + #undef PRDF_FUNC + +} + +void nvdimmAddFfdc( TARGETING::TargetHandle_t i_nvdimm, errlHndl_t & io_errl ) +{ + #define PRDF_FUNC "[PlatServices::nvdimmAddFfdc] " + // Add Page 4 Regs and Vendor Log using external Hostboot interfaces. + NVDIMM::nvdimmAddPage4Regs( i_nvdimm, io_errl ); + NVDIMM::nvdimmAddVendorLog( i_nvdimm, io_errl ); + + // Add PRD specific registers relevant to runtime NVDIMM analysis. + const uint16_t regList[] = + { + // Module health registers + NVDIMM::i2cReg::MODULE_HEALTH, + NVDIMM::i2cReg::MODULE_HEALTH_STATUS0, + NVDIMM::i2cReg::MODULE_HEALTH_STATUS1, + + // Threshold status registers + NVDIMM::i2cReg::ERROR_THRESHOLD_STATUS, + NVDIMM::i2cReg::WARNING_THRESHOLD_STATUS, + + // ES_TEMP registers + NVDIMM::i2cReg::ES_TEMP0, + NVDIMM::i2cReg::ES_TEMP1, + NVDIMM::i2cReg::ES_TEMP_WARNING_HIGH_THRESHOLD0, + NVDIMM::i2cReg::ES_TEMP_WARNING_HIGH_THRESHOLD1, + NVDIMM::i2cReg::ES_TEMP_WARNING_LOW_THRESHOLD0, + NVDIMM::i2cReg::ES_TEMP_WARNING_LOW_THRESHOLD1, + + // NVM Lifetime registers + NVDIMM::i2cReg::NVM_LIFETIME, + NVDIMM::i2cReg::NVM_LIFETIME_ERROR_THRESHOLD, + NVDIMM::i2cReg::NVM_LIFETIME_WARNING_THRESHOLD, + + // ES Lifetime registers + NVDIMM::i2cReg::ES_LIFETIME, + NVDIMM::i2cReg::ES_LIFETIME_ERROR_THRESHOLD, + NVDIMM::i2cReg::ES_LIFETIME_WARNING_THRESHOLD, + + // Status registers + NVDIMM::i2cReg::ERASE_STATUS, + NVDIMM::i2cReg::ARM_STATUS, + NVDIMM::i2cReg::SET_EVENT_NOTIFICATION_STATUS, + }; + + ERRORLOG::ErrlUserDetailsLogRegister regUd( i_nvdimm ); + for ( auto const & reg : regList ) + { + // NVDIMM register size = 1 byte + size_t NVDIMM_SIZE = 1; + + uint8_t data = 0; + errlHndl_t errl = deviceRead( i_nvdimm, &data, NVDIMM_SIZE, + DEVICE_NVDIMM_ADDRESS(reg) ); + if ( errl ) + { + PRDF_ERR( PRDF_FUNC "Failed to read register 0x%X on " + "NVDIMM HUID: 0x%08x", reg, getHuid(i_nvdimm) ); + // Don't commit, just delete the error and continue + delete errl; errl = nullptr; + continue; + } + // Only add registers that have non-zero data. + if ( 0 == data ) continue; + + regUd.addDataBuffer( &data, sizeof(data), DEVICE_NVDIMM_ADDRESS(reg) ); + } + + regUd.addToLog( io_errl ); + + #undef PRDF_FUNC +} + +#endif + //############################################################################## //## Nimbus Maintenance Command wrappers //############################################################################## @@ -758,10 +864,16 @@ uint32_t startBgScrub<TYPE_MCA>( ExtensibleChip * i_mcaChip, ExtensibleChip * mcbChip = getConnectedParent( i_mcaChip, TYPE_MCBIST ); fapi2::Target<fapi2::TARGET_TYPE_MCBIST> fapiTrgt ( mcbChip->getTrgt() ); + #ifdef __HOSTBOOT_RUNTIME + // Starting a new command. Clear the UE and CE scrub stop counters + getMcbistDataBundle( mcbChip )->iv_ueStopCounter.reset(); + getMcbistDataBundle( mcbChip )->iv_ceStopCounter.reset(); + #endif + // Get the stop conditions. // NOTE: If HBRT_PRD is not configured, we want to use the defaults so that // background scrubbing never stops. - mss::mcbist::stop_conditions stopCond; + mss::mcbist::stop_conditions<> stopCond; // AUEs are checkstop attentions. Unfortunately, MCBIST commands do not stop // when the system checkstops. Therefore, we must set the stop condition for @@ -851,11 +963,11 @@ uint32_t startBgScrub<TYPE_MCBIST>( ExtensibleChip * i_mcaChip, //------------------------------------------------------------------------------ +#ifndef CONFIG_AXONE template<> uint32_t startTdScrub<TYPE_MCA>( ExtensibleChip * i_chip, - const MemRank & i_rank, - AddrRangeType i_rangeType, - mss::mcbist::stop_conditions i_stopCond ) + const MemRank & i_rank, AddrRangeType i_rangeType, + mss::mcbist::stop_conditions<mss::mc_type::NIMBUS> i_stopCond ) { #define PRDF_FUNC "[PlatServices::startTdScrub<TYPE_MCA>] " @@ -912,6 +1024,7 @@ uint32_t startTdScrub<TYPE_MCA>( ExtensibleChip * i_chip, #undef PRDF_FUNC } +#endif //############################################################################## //## Centaur Maintenance Command wrappers @@ -1316,25 +1429,31 @@ uint32_t incMaintAddr<TYPE_MBA>( ExtensibleChip * i_chip, //############################################################################## template<> -uint32_t startBgScrub<TYPE_MEM_PORT>( ExtensibleChip * i_memPort, - const MemRank & i_rank ) +uint32_t startBgScrub<TYPE_OCMB_CHIP>( ExtensibleChip * i_ocmb, + const MemRank & i_rank ) { - #define PRDF_FUNC "[PlatServices::startBgScrub<TYPE_MEM_PORT>] " + #define PRDF_FUNC "[PlatServices::startBgScrub<TYPE_OCMB_CHIP>] " - PRDF_ASSERT( nullptr != i_memPort ); - PRDF_ASSERT( TYPE_MEM_PORT == i_memPort->getType() ); + PRDF_ASSERT( nullptr != i_ocmb ); + PRDF_ASSERT( TYPE_OCMB_CHIP == i_ocmb->getType() ); uint32_t o_rc = SUCCESS; - /* TODO RTC 207273 - no HWP support yet + #ifdef CONFIG_AXONE + // Get the OCMB fapi target - ExtensibleChip * ocmbChip = getConnectedParent( i_memPort, TYPE_OCMB_CHIP ); - fapi2::Target<fapi2::TARGET_TYPE_OCMB_CHIP> fapiTrgt (ocmbChip->getTrgt()); + fapi2::Target<fapi2::TARGET_TYPE_OCMB_CHIP> fapiTrgt (i_ocmb->getTrgt()); + + #ifdef __HOSTBOOT_RUNTIME + // Starting a new command. Clear the UE and CE scrub stop counters + getOcmbDataBundle( i_ocmb )->iv_ueStopCounter.reset(); + getOcmbDataBundle( i_ocmb )->iv_ceStopCounter.reset(); + #endif // Get the stop conditions. // NOTE: If HBRT_PRD is not configured, we want to use the defaults so that // background scrubbing never stops. - mss::mcbist::stop_conditions stopCond; + mss::mcbist::stop_conditions<mss::mc_type::EXPLORER> stopCond; // AUEs are checkstop attentions. Unfortunately, MCBIST commands do not stop // when the system checkstops. Therefore, we must set the stop condition for @@ -1373,40 +1492,40 @@ uint32_t startBgScrub<TYPE_MEM_PORT>( ExtensibleChip * i_memPort, { // Get the first address of the given rank. mss::mcbist::address saddr, eaddr; - o_rc = getMemAddrRange<TYPE_MEM_PORT>( i_memPort, i_rank, saddr, eaddr, - SLAVE_RANK ); + o_rc = getMemAddrRange<TYPE_OCMB_CHIP>( i_ocmb, i_rank, saddr, eaddr, + SLAVE_RANK ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "getMemAddrRange(0x%08x,0x%2x) failed", - i_memPort->getHuid(), i_rank.getKey() ); + i_ocmb->getHuid(), i_rank.getKey() ); break; } // Clear all of the counters and maintenance ECC attentions. - o_rc = prepareNextCmd<TYPE_OCMB_CHIP>( ocmbChip ); + o_rc = prepareNextCmd<TYPE_OCMB_CHIP>( i_ocmb ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "prepareNextCmd(0x%08x) failed", - ocmbChip->getHuid() ); + i_ocmb->getHuid() ); break; } // Start the background scrub command. errlHndl_t errl = nullptr; - FAPI_INVOKE_HWP( errl, mss::memdiags::background_scrub, fapiTrgt, + FAPI_INVOKE_HWP( errl, exp_background_scrub, fapiTrgt, stopCond, scrubSpeed, saddr ); if ( nullptr != errl ) { - PRDF_ERR( PRDF_FUNC "mss::memdiags::background_scrub(0x%08x,%d) " - "failed", ocmbChip->getHuid(), i_rank.getMaster() ); + PRDF_ERR( PRDF_FUNC "exp_background_scrub(0x%08x,%d) " + "failed", i_ocmb->getHuid(), i_rank.getMaster() ); PRDF_COMMIT_ERRL( errl, ERRL_ACTION_REPORT ); o_rc = FAIL; break; } } while (0); + #endif - */ return o_rc; #undef PRDF_FUNC @@ -1414,31 +1533,19 @@ uint32_t startBgScrub<TYPE_MEM_PORT>( ExtensibleChip * i_memPort, //------------------------------------------------------------------------------ -// This specialization only exists to avoid a lot of extra code in some classes. -// The input chip must still be a MEM_PORT. -template<> -uint32_t startBgScrub<TYPE_OCMB_CHIP>( ExtensibleChip * i_memPort, - const MemRank & i_rank ) -{ - return startBgScrub<TYPE_MEM_PORT>( i_memPort, i_rank ); -} - -//------------------------------------------------------------------------------ - +#ifdef CONFIG_AXONE template<> -uint32_t startTdScrub<TYPE_MEM_PORT>( ExtensibleChip * i_chip, - const MemRank & i_rank, - AddrRangeType i_rangeType, - mss::mcbist::stop_conditions i_stopCond ) +uint32_t startTdScrub<TYPE_OCMB_CHIP>(ExtensibleChip * i_chip, + const MemRank & i_rank, AddrRangeType i_rangeType, + mss::mcbist::stop_conditions<mss::mc_type::EXPLORER> i_stopCond) { - #define PRDF_FUNC "[PlatServices::startTdScrub<TYPE_MEM_PORT>] " + #define PRDF_FUNC "[PlatServices::startTdScrub<TYPE_OCMB_CHIP>] " PRDF_ASSERT( nullptr != i_chip ); - PRDF_ASSERT( TYPE_MEM_PORT == i_chip->getType() ); + PRDF_ASSERT( TYPE_OCMB_CHIP == i_chip->getType() ); uint32_t o_rc = SUCCESS; - /* TODO RTC 207273 - no HWP support yet // Set stop-on-AUE for all target scrubs. See explanation in startBgScrub() // for the reasons why. i_stopCond.set_pause_on_aue(mss::ON); @@ -1447,8 +1554,8 @@ uint32_t startTdScrub<TYPE_MEM_PORT>( ExtensibleChip * i_chip, { // Get the address range of the given rank. mss::mcbist::address saddr, eaddr; - o_rc = getMemAddrRange<TYPE_MEM_PORT>( i_chip, i_rank, saddr, eaddr, - i_rangeType ); + o_rc = getMemAddrRange<TYPE_OCMB_CHIP>( i_chip, i_rank, saddr, eaddr, + i_rangeType ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "getMemAddrRange(0x%08x,0x%2x) failed", @@ -1457,12 +1564,10 @@ uint32_t startTdScrub<TYPE_MEM_PORT>( ExtensibleChip * i_chip, } // Get the OCMB_CHIP fapi target. - ExtensibleChip * ocmbChip = getConnectedParent(i_chip, TYPE_OCMB_CHIP); - fapi2::Target<fapi2::TARGET_TYPE_OCMB_CHIP> - fapiTrgt(ocmbChip->getTrgt()); + fapi2::Target<fapi2::TARGET_TYPE_OCMB_CHIP> fapiTrgt(i_chip->getTrgt()); // Clear all of the counters and maintenance ECC attentions. - o_rc = prepareNextCmd<TYPE_OCMB_CHIP>( ocmbChip ); + o_rc = prepareNextCmd<TYPE_OCMB_CHIP>( i_chip ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "prepareNextCmd(0x%08x) failed", @@ -1472,23 +1577,23 @@ uint32_t startTdScrub<TYPE_MEM_PORT>( ExtensibleChip * i_chip, // Start targeted scrub command. errlHndl_t errl = nullptr; - FAPI_INVOKE_HWP( errl, mss::memdiags::targeted_scrub, fapiTrgt, + FAPI_INVOKE_HWP( errl, exp_targeted_scrub, fapiTrgt, i_stopCond, saddr, eaddr, mss::mcbist::NONE ); if ( nullptr != errl ) { - PRDF_ERR( PRDF_FUNC "mss::memdiags::targeted_scrub(0x%08x,0x%02x) " - "failed", ocmbChip->getHuid(), i_rank.getKey() ); + PRDF_ERR( PRDF_FUNC "exp_targeted_scrub(0x%08x,0x%02x) " + "failed", i_chip->getHuid(), i_rank.getKey() ); PRDF_COMMIT_ERRL( errl, ERRL_ACTION_REPORT ); o_rc = FAIL; break; } } while (0); - */ return o_rc; #undef PRDF_FUNC } +#endif //############################################################################## //## Core/cache trace array functions diff --git a/src/usr/diag/prdf/plat/prdfPlatServices.H b/src/usr/diag/prdf/plat/prdfPlatServices.H index e1710119c..b99c20bed 100644 --- a/src/usr/diag/prdf/plat/prdfPlatServices.H +++ b/src/usr/diag/prdf/plat/prdfPlatServices.H @@ -53,6 +53,10 @@ #include <prdfBitString.H> #include <mem/prdfMemRank.H> +#ifdef CONFIG_NVDIMM +#include <isteps/nvdimm/nvdimm.H> +#endif + //------------------------------------------------------------------------------ namespace PRDF @@ -169,6 +173,26 @@ uint32_t getMemAddrRange( ExtensibleChip * i_chip, template<TARGETING::TYPE T> bool isRowRepairEnabled( ExtensibleChip * i_chip, const MemRank & i_rank ); +#ifdef CONFIG_NVDIMM +/** + * @brief Notify PHYP/Hostboot of NVDIMM protection status + * + * @param i_target Processor with NVDIMM + * @param i_state Protection state of NVDIMM + */ +uint32_t nvdimmNotifyProtChange( TARGETING::TargetHandle_t i_target, + const NVDIMM::nvdimm_protection_t i_state ); + +/** + * @brief Add SMART-specific, page 4 NVDIMM registers to the FFDC + * + * @param i_nvdimm An nvdimm target + * @param io_errl Error log to add the FFDC to + */ +void nvdimmAddFfdc( TARGETING::TargetHandle_t i_nvdimm, errlHndl_t & io_errl ); + +#endif + //############################################################################## //## Nimbus/Centaur Maintenance Command wrappers //############################################################################## diff --git a/src/usr/diag/prdf/plat/prdfPlatServices_ipl.C b/src/usr/diag/prdf/plat/prdfPlatServices_ipl.C index 21cea0c85..14d1c26ba 100644 --- a/src/usr/diag/prdf/plat/prdfPlatServices_ipl.C +++ b/src/usr/diag/prdf/plat/prdfPlatServices_ipl.C @@ -43,7 +43,8 @@ #include <prdfMfgThresholdMgr.H> #include <diag/mdia/mdia.H> -#include <config.h> + +#include <hwp_wrappers.H> using namespace TARGETING; @@ -211,19 +212,19 @@ uint32_t mssRestoreDramRepairs<TYPE_MBA>( TargetHandle_t i_target, //------------------------------------------------------------------------------ template<> -uint32_t mssRestoreDramRepairs<TYPE_MEM_PORT>( TargetHandle_t i_target, - uint8_t & o_repairedRankMask, - uint8_t & o_badDimmMask ) +uint32_t mssRestoreDramRepairs<TYPE_OCMB_CHIP>( TargetHandle_t i_target, + uint8_t & o_repairedRankMask, + uint8_t & o_badDimmMask ) { uint32_t o_rc = SUCCESS; - /* TODO RTC 207273 - no HWP support yet + /* TODO RTC 199032 - no HWP support yet errlHndl_t errl = NULL; fapi2::buffer<uint8_t> tmpRepairedRankMask, tmpBadDimmMask; FAPI_INVOKE_HWP( errl, mss::restore_repairs, - fapi2::Target<fapi2::TARGET_TYPE_MEM_PORT>( i_target ), + fapi2::Target<fapi2::TARGET_TYPE_OCMB_CHIP>( i_target ), tmpRepairedRankMask, tmpBadDimmMask ); if ( NULL != errl ) @@ -315,7 +316,7 @@ uint32_t startSfRead<TYPE_MCA>( ExtensibleChip * i_mcaChip, fapi2::Target<fapi2::TARGET_TYPE_MCBIST> fapiTrgt ( mcbChip->getTrgt() ); // Get the stop conditions. - mss::mcbist::stop_conditions stopCond; + mss::mcbist::stop_conditions<> stopCond; stopCond.set_pause_on_mpe(mss::ON) .set_pause_on_ue(mss::ON) .set_pause_on_aue(mss::ON) @@ -843,41 +844,43 @@ uint32_t resumeTdSteerCleanup<TYPE_MBA>( ExtensibleChip * i_chip, template<> bool isBroadcastModeCapable<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip ) { - /* TODO RTC 207273 - no HWP support yet PRDF_ASSERT( nullptr != i_chip ); PRDF_ASSERT( TYPE_OCMB_CHIP == i_chip->getType() ); + mss::states l_ret = mss::states::NO; + + #ifdef CONFIG_AXONE + fapi2::Target<fapi2::TARGET_TYPE_OCMB_CHIP> fapiTrgt ( i_chip->getTrgt() ); + FAPI_CALL_HWP( l_ret, exp_is_broadcast_capable, fapiTrgt ); + + #endif - mss::states l_ret = mss::states::NO; - FAPI_CALL_HWP( l_ret, mss::mcbist::is_broadcast_capable, fapiTrgt ); return ( mss::states::YES == l_ret ); - */ - return false; } //------------------------------------------------------------------------------ template<> -uint32_t startSfRead<TYPE_MEM_PORT>( ExtensibleChip * i_memPort, - const MemRank & i_rank ) +uint32_t startSfRead<TYPE_OCMB_CHIP>( ExtensibleChip * i_ocmb, + const MemRank & i_rank ) { - #define PRDF_FUNC "[PlatServices::startSfRead<TYPE_MCA>] " + #define PRDF_FUNC "[PlatServices::startSfRead<TYPE_OCMB_CHIP>] " PRDF_ASSERT( isInMdiaMode() ); // MDIA must be running. - PRDF_ASSERT( nullptr != i_memPort ); - PRDF_ASSERT( TYPE_MEM_PORT == i_memPort->getType() ); + PRDF_ASSERT( nullptr != i_ocmb ); + PRDF_ASSERT( TYPE_OCMB_CHIP == i_ocmb->getType() ); uint32_t o_rc = SUCCESS; - /* TODO RTC 207273 - no HWP support yet + #ifdef CONFIG_AXONE + // Get the OCMB_CHIP fapi target - ExtensibleChip * ocmbChip = getConnectedParent( i_memPort, TYPE_OCMB_CHIP ); - fapi2::Target<fapi2::TYPE_OCMB_CHIP> fapiTrgt ( ocmbChip->getTrgt() ); + fapi2::Target<fapi2::TARGET_TYPE_OCMB_CHIP> fapiTrgt ( i_ocmb->getTrgt() ); // Get the stop conditions. - mss::mcbist::stop_conditions stopCond; + mss::mcbist::stop_conditions<mss::mc_type::EXPLORER> stopCond; stopCond.set_pause_on_mpe(mss::ON) .set_pause_on_ue(mss::ON) .set_pause_on_aue(mss::ON) @@ -892,39 +895,39 @@ uint32_t startSfRead<TYPE_MEM_PORT>( ExtensibleChip * i_memPort, { // Get the first address of the given rank. mss::mcbist::address saddr, eaddr; - o_rc = getMemAddrRange<TYPE_MEM_PORT>( i_memPort, i_rank, saddr, eaddr, - SLAVE_RANK ); + o_rc = getMemAddrRange<TYPE_OCMB_CHIP>( i_ocmb, i_rank, saddr, eaddr, + SLAVE_RANK ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "getMemAddrRange(0x%08x,0x%2x) failed", - i_memPort->getHuid(), i_rank.getKey() ); + i_ocmb->getHuid(), i_rank.getKey() ); break; } // Clear all of the counters and maintenance ECC attentions. - o_rc = prepareNextCmd<TYPE_OCMB_CHIP>( ocmbChip ); + o_rc = prepareNextCmd<TYPE_OCMB_CHIP>( i_ocmb ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "prepareNextCmd(0x%08x) failed", - ocmbChip->getHuid() ); + i_ocmb->getHuid() ); break; } // Start the super fast read command. errlHndl_t errl; - FAPI_INVOKE_HWP( errl, mss::memdiags::sf_read, fapiTrgt, stopCond, + FAPI_INVOKE_HWP( errl, exp_sf_read, fapiTrgt, stopCond, saddr ); if ( nullptr != errl ) { - PRDF_ERR( PRDF_FUNC "mss::memdiags::sf_read(0x%08x,%d) failed", - ocmbChip->getHuid(), i_rank.getMaster() ); + PRDF_ERR( PRDF_FUNC "exp_sf_read(0x%08x,%d) failed", + i_ocmb->getHuid(), i_rank.getMaster() ); PRDF_COMMIT_ERRL( errl, ERRL_ACTION_REPORT ); o_rc = FAIL; break; } } while (0); - */ + #endif return o_rc; @@ -933,22 +936,154 @@ uint32_t startSfRead<TYPE_MEM_PORT>( ExtensibleChip * i_memPort, //------------------------------------------------------------------------------ -// This specialization only exists to avoid a lot of extra code in some classes. -// The input chip must still be an MEM_PORT chip. template<> -uint32_t startSfRead<TYPE_OCMB_CHIP>( ExtensibleChip * i_memPort, - const MemRank & i_rank ) +uint32_t cleanupSfRead<TYPE_OCMB_CHIP>( ExtensibleChip * i_ocmbChip ) { - return startSfRead<TYPE_MEM_PORT>( i_memPort, i_rank ); + return SUCCESS; // Not needed for MCBIST commands. } //------------------------------------------------------------------------------ +#ifdef CONFIG_AXONE + template<> -uint32_t cleanupSfRead<TYPE_OCMB_CHIP>( ExtensibleChip * i_ocmbChip ) +uint32_t startTdSteerCleanup<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemRank & i_rank, AddrRangeType i_rangeType, + mss::mcbist::stop_conditions<mss::mc_type::EXPLORER> i_stopCond ) { - return SUCCESS; // Not needed for MCBIST commands. + #define PRDF_FUNC "[PlatServices::startTdSteerCleanup<TYPE_OCMB_CHIP>] " + + PRDF_ASSERT( isInMdiaMode() ); // MDIA must be running. + + PRDF_ASSERT( nullptr != i_chip ); + PRDF_ASSERT( TYPE_OCMB_CHIP == i_chip->getType() ); + + uint32_t o_rc = SUCCESS; + + // Default speed is to run as fast as possible. + //mss_MaintCmd::TimeBaseSpeed cmdSpeed = mss_MaintCmd::FAST_MAX_BW_IMPACT; + + // Set stop-on-AUE for all target scrubs. See explanation in startBgScrub() + // for the reasons why. + i_stopCond.set_pause_on_aue(mss::ON); + + do + { + // Get the address range of the given rank. + mss::mcbist::address saddr, eaddr; + o_rc = getMemAddrRange<TYPE_OCMB_CHIP>( i_chip, i_rank, saddr, eaddr, + i_rangeType ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "getMemAddrRange(0x%08x,0x%2x) failed", + i_chip->getHuid(), i_rank.getKey() ); + break; + } + + // Clear all of the counters and maintenance ECC attentions. + o_rc = prepareNextCmd<TYPE_OCMB_CHIP>( i_chip ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "prepareNextCmd(0x%08x) failed", + i_chip->getHuid() ); + break; + } + + /* TODO RTC 199032 - sparing support + // Get the MBA fapi target. + fapi2::Target<fapi2::TARGET_TYPE_MBA> fapiTrgt ( i_chip->getTrgt() ); + + // Start the steer cleanup command. + mss_TimeBaseSteerCleanup cmd { fapiTrgt, saddr, eaddr, cmdSpeed, + i_stopCond, false }; + errlHndl_t errl = nullptr; + FAPI_INVOKE_HWP( errl, cmd.setupAndExecuteCmd ); + if ( nullptr != errl ) + { + PRDF_ERR( PRDF_FUNC "setupAndExecuteCmd() on 0x%08x,0x%02x failed", + i_chip->getHuid(), i_rank.getKey() ); + PRDF_COMMIT_ERRL( errl, ERRL_ACTION_REPORT ); + o_rc = FAIL; break; + } + */ + + } while (0); + + return o_rc; + + #undef PRDF_FUNC } + +#endif + +//------------------------------------------------------------------------------ + +#ifdef CONFIG_AXONE + +template<> +uint32_t startTdSfRead<TYPE_OCMB_CHIP>(ExtensibleChip * i_chip, + const MemRank & i_rank, AddrRangeType i_rangeType, + mss::mcbist::stop_conditions<mss::mc_type::EXPLORER> i_stopCond) +{ + #define PRDF_FUNC "[PlatServices::startTdSfRead<TYPE_OCMB_CHIP>] " + + PRDF_ASSERT( isInMdiaMode() ); // MDIA must be running. + + PRDF_ASSERT( nullptr != i_chip ); + PRDF_ASSERT( TYPE_OCMB_CHIP == i_chip->getType() ); + + uint32_t o_rc = SUCCESS; + + // Set stop-on-AUE for all target scrubs. See explanation in startBgScrub() + // for the reasons why. + i_stopCond.set_pause_on_aue(mss::ON); + + do + { + // Get the address range of the given rank. + mss::mcbist::address saddr, eaddr; + o_rc = getMemAddrRange<TYPE_OCMB_CHIP>( i_chip, i_rank, saddr, eaddr, + i_rangeType ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "getMemAddrRange(0x%08x,0x%2x) failed", + i_chip->getHuid(), i_rank.getKey() ); + break; + } + + // Clear all of the counters and maintenance ECC attentions. + o_rc = prepareNextCmd<TYPE_OCMB_CHIP>( i_chip ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "prepareNextCmd(0x%08x) failed", + i_chip->getHuid() ); + break; + } + + // Get the OCMB fapi target. + fapi2::Target<fapi2::TARGET_TYPE_OCMB_CHIP> + fapiTrgt( i_chip->getTrgt() ); + + // Start the super fast read command. + errlHndl_t errl; + FAPI_INVOKE_HWP( errl, exp_sf_read, fapiTrgt, i_stopCond, saddr ); + if ( nullptr != errl ) + { + PRDF_ERR( PRDF_FUNC "exp_sf_read(0x%08x,%d) failed", + i_chip->getHuid(), i_rank.getMaster() ); + PRDF_COMMIT_ERRL( errl, ERRL_ACTION_REPORT ); + o_rc = FAIL; break; + } + + } while (0); + + return o_rc; + + #undef PRDF_FUNC +} + +#endif + //------------------------------------------------------------------------------ } // end namespace PlatServices diff --git a/src/usr/diag/prdf/plat/prdfPlatServices_ipl.H b/src/usr/diag/prdf/plat/prdfPlatServices_ipl.H index a27f1b92e..c12cf5a51 100644 --- a/src/usr/diag/prdf/plat/prdfPlatServices_ipl.H +++ b/src/usr/diag/prdf/plat/prdfPlatServices_ipl.H @@ -66,7 +66,7 @@ int32_t mdiaSendEventMsg( TARGETING::TargetHandle_t i_trgt, /** * @brief Initiates a reconfig loop due to an RCD parity error. - * @param i_trgt An MCA or MEM_PORT target. + * @param i_trgt An MCA target. * @return True if the number of allowed reconfig loops has been exceeded. * False otherwise. */ @@ -113,7 +113,7 @@ bool isBroadcastModeCapable( ExtensibleChip * i_chip ); /** * @brief Starts a super fast read command from the first address of the given * rank to the end of memory. - * @param i_chip MCBIST/MCA, MBA, or MEM_PORT chip. + * @param i_chip MCBIST/MCA, MBA, or OCMB chip. * @param i_rank Will start the command on the first address of this slave * rank. To ensure the command is started on a master rank boundary, * make sure the slave rank value is 0. diff --git a/src/usr/diag/prdf/plat/prdfPlatServices_rt.C b/src/usr/diag/prdf/plat/prdfPlatServices_rt.C index 25a470f8d..0fbe5b969 100644 --- a/src/usr/diag/prdf/plat/prdfPlatServices_rt.C +++ b/src/usr/diag/prdf/plat/prdfPlatServices_rt.C @@ -37,6 +37,8 @@ // Platform includes #include <prdfCenMbaDataBundle.H> +#include <prdfP9McbistDataBundle.H> +#include <prdfOcmbDataBundle.H> #include <prdfMemScrubUtils.H> #include <prdfPlatServices.H> @@ -51,6 +53,8 @@ #include <p9_stop_api.H> #include <rt_todintf.H> +#include <hwp_wrappers.H> + //------------------------------------------------------------------------------ using namespace TARGETING; @@ -105,28 +109,6 @@ void sendPredDeallocRequest( uint64_t i_saddr, uint64_t i_eaddr ) __dyndealloc( i_saddr, i_eaddr, MEMORY_ERROR_PREDICTIVE ); } -uint32_t nvdimmNotifyPhypProtChange( TARGETING::TargetHandle_t i_target, - const NVDIMM::nvdimm_protection_t i_state ) -{ - #define PRDF_FUNC "[PlatServices::nvdimmNotifyPhypProtChange] " - - uint32_t o_rc = SUCCESS; - - errlHndl_t errl = NVDIMM::notifyNvdimmProtectionChange( i_target, i_state ); - if ( nullptr != errl ) - { - PRDF_ERR( PRDF_FUNC "NVDIMM::notifyNvdimmProtectionChange(0x%08x) " - "failed.", getHuid(i_target) ); - PRDF_COMMIT_ERRL( errl, ERRL_ACTION_REPORT ); - o_rc = FAIL; - } - - return o_rc; - - #undef PRDF_FUNC - -} - //############################################################################## //## Nimbus Maintenance Command wrappers //############################################################################## @@ -172,7 +154,8 @@ uint32_t stopBgScrub<TYPE_MCA>( ExtensibleChip * i_chip ) //------------------------------------------------------------------------------ template<> -uint32_t resumeBgScrub<TYPE_MCBIST>( ExtensibleChip * i_chip ) +uint32_t resumeBgScrub<TYPE_MCBIST>( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ) { #define PRDF_FUNC "[PlatServices::resumeBgScrub<TYPE_MCBIST>] " @@ -195,9 +178,42 @@ uint32_t resumeBgScrub<TYPE_MCBIST>( ExtensibleChip * i_chip ) break; } + // Check UE and CE stop counters to determine stop conditions + mss::mcbist::stop_conditions<> stopCond; + if ( getMcbistDataBundle(i_chip)->iv_ueStopCounter.thReached(io_sc) ) + { + // If we've reached the limit of UEs we're allowed to stop on + // per rank, only set the stop on mpe stop condition. + stopCond.set_pause_on_mpe(mss::ON); + } + else if (getMcbistDataBundle(i_chip)->iv_ceStopCounter.thReached(io_sc)) + { + // If we've reached the limit of CEs we're allowed to stop on + // per rank, set all the normal stop conditions except stop on CE + stopCond.set_pause_on_aue(mss::ON); + + #ifdef CONFIG_HBRT_PRD + + stopCond.set_pause_on_mpe(mss::ON) + .set_pause_on_ue(mss::ON); + + // In MNFG mode, stop on RCE_ETE to get an accurate callout for IUEs + if ( mfgMode() ) stopCond.set_thresh_rce(1); + + #endif + } + else + { + // If we haven't reached threshold on the number of UEs or CEs we + // have stopped on, do not change the stop conditions. + stopCond = mss::mcbist::stop_conditions<>( + mss::mcbist::stop_conditions<>::DONT_CHANGE ); + } + // Resume the command on the next address. errlHndl_t errl; - FAPI_INVOKE_HWP( errl, mss::memdiags::continue_cmd, fapiTrgt ); + FAPI_INVOKE_HWP( errl, mss::memdiags::continue_cmd, fapiTrgt, + mss::mcbist::end_boundary::DONT_CHANGE, stopCond ); if ( nullptr != errl ) { @@ -217,12 +233,14 @@ uint32_t resumeBgScrub<TYPE_MCBIST>( ExtensibleChip * i_chip ) //------------------------------------------------------------------------------ template<> -uint32_t resumeBgScrub<TYPE_MCA>( ExtensibleChip * i_chip ) +uint32_t resumeBgScrub<TYPE_MCA>( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ) { PRDF_ASSERT( nullptr != i_chip ); PRDF_ASSERT( TYPE_MCA == i_chip->getType() ); - return resumeBgScrub<TYPE_MCBIST>(getConnectedParent(i_chip, TYPE_MCBIST)); + return resumeBgScrub<TYPE_MCBIST>(getConnectedParent(i_chip, TYPE_MCBIST), + io_sc); } //############################################################################## @@ -362,7 +380,8 @@ uint32_t __resumeScrub<TYPE_MBA>( ExtensibleChip * i_chip, //------------------------------------------------------------------------------ template<> -uint32_t resumeBgScrub<TYPE_MBA>( ExtensibleChip * i_chip ) +uint32_t resumeBgScrub<TYPE_MBA>( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ) { PRDF_ASSERT( nullptr != i_chip ); PRDF_ASSERT( TYPE_MBA == i_chip->getType() ); @@ -418,19 +437,21 @@ uint32_t stopBgScrub<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip ) uint32_t rc = SUCCESS; - /* TODO RTC 207273 - no HWP support yet + #ifdef CONFIG_AXONE + fapi2::Target<fapi2::TARGET_TYPE_OCMB_CHIP> fapiTrgt ( i_chip->getTrgt() ); errlHndl_t errl; - FAPI_INVOKE_HWP( errl, mss::memdiags::stop, fapiTrgt ); + FAPI_INVOKE_HWP( errl, exp_stop, fapiTrgt ); if ( nullptr != errl ) { - PRDF_ERR( PRDF_FUNC "mss::memdiags::stop(0x%08x) failed", i_chip->getHuid()); + PRDF_ERR( PRDF_FUNC "exp_stop(0x%08x) failed", i_chip->getHuid()); PRDF_COMMIT_ERRL( errl, ERRL_ACTION_REPORT ); rc = FAIL; } - */ + + #endif return rc; @@ -440,19 +461,8 @@ uint32_t stopBgScrub<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip ) //------------------------------------------------------------------------------ template<> -uint32_t stopBgScrub<TYPE_MEM_PORT>( ExtensibleChip * i_chip ) -{ - PRDF_ASSERT( nullptr != i_chip ); - PRDF_ASSERT( TYPE_MEM_PORT == i_chip->getType() ); - - ExtensibleChip* ocmbChip = getConnectedParent( i_chip, TYPE_OCMB_CHIP ); - return stopBgScrub<TYPE_OCMB_CHIP>( ocmbChip ); -} - -//------------------------------------------------------------------------------ - -template<> -uint32_t resumeBgScrub<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip ) +uint32_t resumeBgScrub<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ) { #define PRDF_FUNC "[PlatServices::resumeBgScrub<TYPE_OCMB_CHIP>] " @@ -461,9 +471,9 @@ uint32_t resumeBgScrub<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip ) uint32_t o_rc = SUCCESS; - /* TODO RTC 207273 - no hwp support yet + #ifdef CONFIG_AXONE - // Get the OCMB_CHIP fapi target + // Get the OCMB fapi target fapi2::Target<fapi2::TARGET_TYPE_OCMB_CHIP> fapiTrgt ( i_chip->getTrgt() ); do @@ -477,13 +487,45 @@ uint32_t resumeBgScrub<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip ) break; } + // Check UE and CE stop counters to determine stop conditions + mss::mcbist::stop_conditions<mss::mc_type::EXPLORER> stopCond; + if ( getOcmbDataBundle(i_chip)->iv_ueStopCounter.thReached(io_sc) ) + { + // If we've reached the limit of UEs we're allowed to stop on + // per rank, only set the stop on mpe stop condition. + stopCond.set_pause_on_mpe(mss::ON); + } + else if ( getOcmbDataBundle(i_chip)->iv_ceStopCounter.thReached(io_sc) ) + { + // If we've reached the limit of CEs we're allowed to stop on + // per rank, set all the normal stop conditions except stop on CE + stopCond.set_pause_on_aue(mss::ON); + + #ifdef CONFIG_HBRT_PRD + + stopCond.set_pause_on_mpe(mss::ON) + .set_pause_on_ue(mss::ON); + + // In MNFG mode, stop on RCE_ETE to get an accurate callout for IUEs + if ( mfgMode() ) stopCond.set_thresh_rce(1); + + #endif + } + else + { + // If we haven't reached threshold on the number of UEs or CEs we + // have stopped on, do not change the stop conditions. + stopCond = mss::mcbist::stop_conditions<mss::mc_type::EXPLORER>( + mss::mcbist::stop_conditions<mss::mc_type::EXPLORER>::DONT_CHANGE ); + } + // Resume the command on the next address. errlHndl_t errl; - FAPI_INVOKE_HWP( errl, mss::memdiags::continue_cmd, fapiTrgt ); - + FAPI_INVOKE_HWP( errl, exp_continue_cmd, fapiTrgt, + mss::mcbist::end_boundary::DONT_CHANGE, stopCond ); if ( nullptr != errl ) { - PRDF_ERR( PRDF_FUNC "mss::memdiags::continue_cmd(0x%08x) failed", + PRDF_ERR( PRDF_FUNC "exp_continue_cmd(0x%08x) failed", i_chip->getHuid() ); PRDF_COMMIT_ERRL( errl, ERRL_ACTION_REPORT ); o_rc = FAIL; break; @@ -491,25 +533,13 @@ uint32_t resumeBgScrub<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip ) } while (0); - */ + #endif return o_rc; #undef PRDF_FUNC } -//------------------------------------------------------------------------------ - -template<> -uint32_t resumeBgScrub<TYPE_MEM_PORT>( ExtensibleChip * i_chip ) -{ - PRDF_ASSERT( nullptr != i_chip ); - PRDF_ASSERT( TYPE_MEM_PORT == i_chip->getType() ); - - ExtensibleChip* ocmbChip = getConnectedParent( i_chip, TYPE_OCMB_CHIP ); - return resumeBgScrub<TYPE_OCMB_CHIP>( ocmbChip ); -} - //############################################################################## //## Line Delete Functions //############################################################################## diff --git a/src/usr/diag/prdf/plat/prdfPlatServices_rt.H b/src/usr/diag/prdf/plat/prdfPlatServices_rt.H index 5407c94ad..49d4c0a73 100644 --- a/src/usr/diag/prdf/plat/prdfPlatServices_rt.H +++ b/src/usr/diag/prdf/plat/prdfPlatServices_rt.H @@ -30,7 +30,6 @@ #include <p9_l2err_extract.H> #include <p9_pm_callout.H> #include <prdfMemAddress.H> -#include <isteps/nvdimm/nvdimm.H> namespace PRDF { @@ -65,22 +64,13 @@ void sendDynMemDeallocRequest( uint64_t i_saddr, uint64_t i_eaddr ); */ void sendPredDeallocRequest( uint64_t i_saddr, uint64_t i_eaddr ); -/** - * @brief Notify PHYP of NVDIMM protection status - * - * @param i_target Processor with NVDIMM - * @param i_state Protection state of NVDIMM - */ -uint32_t nvdimmNotifyPhypProtChange( TARGETING::Target * i_target, - const NVDIMM::nvdimm_protection_t i_state ); - //############################################################################## //## Nimbus/Centaur Maintenance Command wrappers //############################################################################## /** * @brief Stops Background Scrubbing. - * @param i_chip MCBIST, MCA, MBA, MEM_PORT, or OCMB chip. + * @param i_chip MCBIST, MCA, MBA, or OCMB chip. * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. */ template<TARGETING::TYPE T> @@ -99,11 +89,13 @@ uint32_t stopBgScrub( ExtensibleChip * i_chip ); * due to an error. It should not be called after executing a Targeted * Diagnotics procedure. * - * @param i_chip MCBIST, MCA, MBA, MEM_PORT, or OCMB chip. + * @param i_chip MCBIST, MCA, MBA, or OCMB chip. + * @param io_sc The step code data struct. * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. */ template<TARGETING::TYPE T> -uint32_t resumeBgScrub( ExtensibleChip * i_chip ); +uint32_t resumeBgScrub( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ); /** * @brief Resumes TD scrubbing after it has paused on error. diff --git a/src/usr/diag/prdf/prdfMain_ipl.C b/src/usr/diag/prdf/prdfMain_ipl.C index b73356575..755206b1e 100644 --- a/src/usr/diag/prdf/prdfMain_ipl.C +++ b/src/usr/diag/prdf/prdfMain_ipl.C @@ -42,10 +42,10 @@ #include <prdfCenMbaDataBundle.H> #include <prdfPlatServices.H> #include <prdfP9McaDataBundle.H> +#include <prdfOcmbDataBundle.H> #include <prdfMemBgScrub.H> // Custom compile configs -#include <config.h> #ifdef CONFIG_ENABLE_CHECKSTOP_ANALYSIS #include <prdfFileRegisterAccess.H> @@ -98,6 +98,11 @@ int32_t analyzeIplCEStats( TargetHandle_t i_trgt, bool &o_calloutMade ) MbaDataBundle * db = getMbaDataBundle( chip ); o_calloutMade = db->getIplCeStats()->analyzeStats(); } + else if ( TYPE_OCMB_CHIP == type ) + { + OcmbDataBundle * db = getOcmbDataBundle( chip ); + o_calloutMade = db->getIplCeStats()->analyzeStats(); + } else { PRDF_ERR( PRDF_FUNC "Unsupported target type %d", type ); @@ -155,6 +160,8 @@ errlHndl_t startScrub( const TargetHandle_t i_trgt ) { case TYPE_MBA: startInitialBgScrub<TYPE_MBA>( chip); break; case TYPE_MCBIST: startInitialBgScrub<TYPE_MCBIST>(chip); break; + case TYPE_OCMB_CHIP: + startInitialBgScrub<TYPE_OCMB_CHIP>(chip); break; default: PRDF_ERR( PRDF_FUNC "Unsupported maintenance target type " "0x%02x", chip->getType() ); diff --git a/src/usr/diag/prdf/prdf_hb_only.mk b/src/usr/diag/prdf/prdf_hb_only.mk index 72ef8880d..6217c6b40 100644 --- a/src/usr/diag/prdf/prdf_hb_only.mk +++ b/src/usr/diag/prdf/prdf_hb_only.mk @@ -75,10 +75,20 @@ prd_incpath += ${ROOTPATH}/src/import/chips/p9/procedures/hwp/pm/ prd_incpath += ${ROOTPATH}/src/import/chips/p9/procedures/hwp/lib/ prd_incpath += ${ROOTPATH}/src/import/generic/memory/lib/utils/ prd_incpath += ${ROOTPATH}/src/import/chips/p9/utils/imageProcs/ +prd_incpath += ${ROOTPATH}/src/import/chips/common/utils/ prd_incpath += ${ROOTPATH}/src/import/chips/common/utils/imageProcs/ prd_incpath += ${ROOTPATH}/src/import/hwpf/fapi2/include prd_incpath += ${ROOTPATH}/src/import/ prd_incpath += ${ROOTPATH}/src/import/chips/centaur/procedures/hwp/io/ +prd_incpath += ${ROOTPATH}/src/usr/isteps/nvdimm + +# For including hwp_wrappers.H +prd_incpath += ${ROOTPATH}/src/import/generic/memory/lib/prd/ +prd_incpath += ${ROOTPATH}/src/import/generic/memory/lib/utils/mcbist/ +prd_incpath += ${ROOTPATH}/src/import/chips/ocmb/explorer/common/include/ +prd_incpath += ${ROOTPATH}/src/import/chips/ocmb/explorer/procedures/hwp/memory/ +prd_incpath += ${ROOTPATH}/obj/genfiles/chips/ocmb/explorer/procedures/hwp/memory/lib/ +prd_incpath += ${ROOTPATH}/obj/genfiles/generic/memory/lib/ ################################################################################ # Hostboot only object files common to both IPL and runtime @@ -130,12 +140,6 @@ ifeq (${HOSTBOOT_RUNTIME},1) # plat/ prd_obj += prdfPlatServices_rt.o -# nvdimm -prd_vpath += ${ROOTPATH}/src/usr/isteps/nvdimm/ -prd_vpath += ${ROOTPATH}/src/usr/isteps/nvdimm/runtime -prd_obj_no_sim += nvdimm.o -prd_obj_no_sim += nvdimm_rt.o - endif ################################################################################ @@ -190,6 +194,19 @@ prd_obj_no_sim += p9c_dimmBadDqBitmapFuncs.o prd_obj_no_sim += p9c_query_channel_failure.o prd_obj_no_sim += p9c_mss_rowRepairFuncs.o +prd_vpath += ${ROOTPATH}/src/import/chips/ocmb/explorer/procedures/hwp/memory/lib/ +prd_vpath += ${ROOTPATH}/src/import/chips/ocmb/explorer/procedures/hwp/memory/lib/eff_config/ +prd_vpath += ${ROOTPATH}/src/import/chips/ocmb/explorer/procedures/hwp/memory/lib/mcbist/ +prd_vpath += ${ROOTPATH}/src/import/chips/ocmb/explorer/procedures/hwp/memory/lib/utils +prd_vpath += ${ROOTPATH}/src/import/chips/p9/procedures/hwp/memory/lib/utils/ +prd_obj_no_sim += hwp_wrappers_nim.o +prd_obj_no_sim += hwp_wrappers_exp.o +prd_obj_no_sim += nimbus_pos.o +prd_obj_no_sim += explorer_pos.o +prd_obj_no_sim += exp_mcbist.o +prd_obj_no_sim += exp_memdiags.o +prd_obj_no_sim += explorer_memory_size.o + ################################################################################ # The following are hardware procedure utilities that we are pulling into the # PRD library (only needed here for HBRT). This code is already compiled in diff --git a/src/usr/diag/prdf/runtime/makefile b/src/usr/diag/prdf/runtime/makefile index 8f7338756..970ce9333 100644 --- a/src/usr/diag/prdf/runtime/makefile +++ b/src/usr/diag/prdf/runtime/makefile @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2014,2018 +# Contributors Listed Below - COPYRIGHT 2014,2019 # [+] International Business Machines Corp. # # @@ -38,13 +38,16 @@ include ../prdf_hb_only.mk # Will define PRD_SRC_PATH and PRD_INC_PATH include ../common/prdf_common_fsp_and_hb.mk include ../common/framework/prdf_framework.mk include ../common/plat/p9/prdf_plat_p9.mk +include ../common/plat/axone/prdf_plat_axone.mk include ../common/plat/cen/prdf_plat_cen.mk include ../common/plat/mem/prdf_plat_mem.mk include ../common/plat/centaur/prdf_plat_centaur.mk include ../common/plat/cumulus/prdf_plat_cumulus.mk include ../common/plat/nimbus/prdf_plat_nimbus.mk +include ../common/plat/explorer/prdf_plat_explorer.mk include ../plat/cen/prdf_plat_cen_hb_only.mk include ../plat/mem/prdf_plat_mem_hb_only.mk +include ../plat/explorer/prdf_plat_explorer_hb_only.mk include ../plat/p9/prdf_plat_p9_hb_only.mk VPATH += ${prd_vpath} diff --git a/src/usr/diag/prdf/test/prdfTest_BadDqBitmap.H b/src/usr/diag/prdf/test/prdfTest_BadDqBitmap.H new file mode 100644 index 000000000..4b4fa8fea --- /dev/null +++ b/src/usr/diag/prdf/test/prdfTest_BadDqBitmap.H @@ -0,0 +1,227 @@ +/* IBM_PROLOG_BEGIN_TAG */ +/* This is an automatically generated prolog. */ +/* */ +/* $Source: src/usr/diag/prdf/test/prdfTest_BadDqBitmap.H $ */ +/* */ +/* OpenPOWER HostBoot Project */ +/* */ +/* Contributors Listed Below - COPYRIGHT 2019 */ +/* [+] International Business Machines Corp. */ +/* */ +/* */ +/* Licensed under the Apache License, Version 2.0 (the "License"); */ +/* you may not use this file except in compliance with the License. */ +/* You may obtain a copy of the License at */ +/* */ +/* http://www.apache.org/licenses/LICENSE-2.0 */ +/* */ +/* Unless required by applicable law or agreed to in writing, software */ +/* distributed under the License is distributed on an "AS IS" BASIS, */ +/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */ +/* implied. See the License for the specific language governing */ +/* permissions and limitations under the License. */ +/* */ +/* IBM_PROLOG_END_TAG */ + +#ifndef __TEST_PRDFBADDQBITMAP_H +#define __TEST_PRDFBADDQBITMAP_H + +/** + * @file prdfTest_BadDqBitmap.H + * + * @brief prdf testing reading and writing the BAD_DQ_BITMAP attribute + */ + +#ifdef __HOSTBOOT_MODULE + #include <cxxtest/TestSuite.H> + #include <errl/errlentry.H> + #include <errl/errlmanager.H> +#else + #include <cxxtest/TestSuite.h> + #include <fsp/FipsGlobalFixture.H> + #include <errlentry.H> +#endif + +#include <prdfTrace.H> +#include <prdfMain.H> +#include "prdfsimMacros.H" +#include <prdfMemDqBitmap.H> +#include <prdfPlatServices.H> +#include <prdfTargetServices.H> + +class WriteBadDqBitmap: public CxxTest::TestSuite +{ + +public: + + void TestNimbusReadWriteBadDqBitmap(void) + { + using namespace PRDF; + using namespace TARGETING; + using namespace PlatServices; + + TargetHandle_t masterProc = nullptr; + targetService().masterProcChipTargetHandle(masterProc); + + // Nimbus only test + if ( MODEL_NIMBUS == masterProc->getAttr<ATTR_MODEL>() ) + { + TS_INFO("- TestNimbusReadWriteBadDqBitmap - Start -"); + + uint32_t rc = SUCCESS; + + // Get an MCBIST + TargetHandle_t mcb = getConnectedChild(masterProc, TYPE_MCBIST, 0); + if ( nullptr == mcb ) + { + TS_FAIL( "ERROR: Failed to get MCBIST" ); + } + // Get an MCA + TargetHandle_t mca = getConnectedChild( mcb, TYPE_MCA, 0 ); + if ( nullptr == mca ) + { + TS_FAIL( "ERROR: Failed to get MCA" ); + } + + // Make arbitrary initial data + MemRank rank( 0, 0 ); + const uint8_t initialBitmap[DQ_BITMAP::BITMAP_SIZE] = + { 0xdb, 0xdb, 0xdb, 0xdb, 0xdb, 0xdb, 0xdb, 0xdb, 0xdb, 0x00 }; + BitmapData initialData; + memcpy( initialData[0].bitmap, initialBitmap, + sizeof(initialData[0].bitmap) ); + + // Set with the initial data + MemDqBitmap setBitmap( mca, rank, initialData ); + rc = setBadDqBitmap( mca, rank, setBitmap ); + if ( SUCCESS != rc ) + { + TS_FAIL( "ERROR: setBadDqBitmap failed " ); + } + + // Read the data back + MemDqBitmap getBitmap; + rc = getBadDqBitmap( mca, rank, getBitmap ); + if ( SUCCESS != rc ) + { + TS_FAIL( "ERROR: getBadDqBitmap failed" ); + } + + BitmapData newData = getBitmap.getData(); + + // Compare the read data to the initial data. The last byte (byte 9) + // is for spares so we won't worry about comparing that. + for ( uint8_t n = 0; n < (DQ_BITMAP::BITMAP_SIZE-1); n++ ) + { + if ( newData.at(0).bitmap[n] != initialBitmap[n] ) + { + TS_FAIL( "TestNimbusReadWriteBadDqBitmap: Incorrect data " + "found. newData[%d]=0x%x initialBitmap[%d]=0x%x", + n, newData.at(0).bitmap[n], n, initialBitmap[n] ); + } + } + + // Clear the vpd just in case + rc = clearBadDqBitmap( mca, rank ); + if ( SUCCESS != rc ) + { + TS_FAIL( "ERROR: clearBadDqBitmap failed" ); + } + + TS_INFO("- TestNimbusReadWriteBadDqBitmap - End -"); + } + + } + + void TestAxoneReadWriteBadDqBitmap(void) + { + using namespace PRDF; + using namespace TARGETING; + using namespace PlatServices; + + TargetHandle_t masterProc = nullptr; + targetService().masterProcChipTargetHandle(masterProc); + + // Axone only test + if ( MODEL_AXONE == masterProc->getAttr<ATTR_MODEL>() ) + { + TS_INFO("- TestAxoneReadWriteBadDqBitmap - Start -"); + + uint32_t rc = SUCCESS; + + // Get an OCMB + TargetHandle_t mc = getConnectedChild( masterProc, TYPE_MC, 0 ); + if ( nullptr == mc ) + { + TS_FAIL( "ERROR: Failed to get MC" ); + } + TargetHandle_t omic = getConnectedChild( mc, TYPE_OMIC, 0 ); + if ( nullptr == omic ) + { + TS_FAIL( "ERROR: Failed to get OMIC" ); + } + TargetHandle_t omi = getConnectedChild( omic, TYPE_OMI, 0 ); + if ( nullptr == omi ) + { + TS_FAIL( "ERROR: Failed to get OMI" ); + } + TargetHandle_t ocmb = getConnectedChild( omi, TYPE_OCMB_CHIP, 0 ); + if ( nullptr == ocmb ) + { + TS_FAIL( "ERROR: Failed to get OCMB" ); + } + // Make arbitrary initial data + MemRank rank( 0 ); + const uint8_t initialBitmap[DQ_BITMAP::BITMAP_SIZE] = + { 0xab, 0xab, 0xab, 0xab, 0xab, 0xab, 0xab, 0xab, 0xab, 0x00 }; + BitmapData initialData; + memcpy( initialData[0].bitmap, initialBitmap, + sizeof(initialData[0].bitmap) ); + + // Set with the initial data + MemDqBitmap setBitmap( ocmb, rank, initialData ); + rc = setBadDqBitmap( ocmb, rank, setBitmap ); + if ( SUCCESS != rc ) + { + TS_FAIL( "ERROR: setBadDqBitmap failed " ); + } + + // Read the data back + MemDqBitmap getBitmap; + rc = getBadDqBitmap( ocmb, rank, getBitmap ); + if ( SUCCESS != rc ) + { + TS_FAIL( "ERROR: getBadDqBitmap failed" ); + } + + BitmapData newData = getBitmap.getData(); + + // Compare the read data to the initial data. The last byte (byte 9) + // is for spares so we won't worry about comparing that. + for ( uint8_t n = 0; n < (DQ_BITMAP::BITMAP_SIZE-1); n++ ) + { + if ( newData.at(0).bitmap[n] != initialBitmap[n] ) + { + TS_FAIL( "TestAxoneReadWriteBadDqBitmap: Incorrect data " + "found. newData[%d]=0x%x initialBitmap[%d]=0x%x", + n, newData.at(0).bitmap[n], n, initialBitmap[n] ); + } + } + + // Clear the vpd just in case + rc = clearBadDqBitmap( ocmb, rank ); + if ( SUCCESS != rc ) + { + TS_FAIL( "ERROR: clearBadDqBitmap failed" ); + } + + TS_INFO("- TestAxoneReadWriteBadDqBitmap - End -"); + + } + + } + +//------------------------------------------------------------------------------ + +}; +#endif diff --git a/src/usr/diag/prdf/test/prdf_hb_common_test.mk b/src/usr/diag/prdf/test/prdf_hb_common_test.mk index a148e0c24..5d1e7ea36 100755 --- a/src/usr/diag/prdf/test/prdf_hb_common_test.mk +++ b/src/usr/diag/prdf/test/prdf_hb_common_test.mk @@ -70,6 +70,7 @@ EXTRAINCDIR += ${ROOTPATH}/src/import/chips/p9/procedures/hwp/cache/ EXTRAINCDIR += ${ROOTPATH}/src/import/chips/p9/procedures/hwp/pm/ EXTRAINCDIR += ${ROOTPATH}/src/import/chips/p9/procedures/hwp/lib/ EXTRAINCDIR += ${ROOTPATH}/src/import/chips/p9/utils/imageProcs +EXTRAINCDIR += ${ROOTPATH}/src/import/chips/common/utils EXTRAINCDIR += ${ROOTPATH}/src/import/chips/common/utils/imageProcs EXTRAINCDIR += ${ROOTPATH}/src/import/hwpf/fapi2/include EXTRAINCDIR += ${ROOTPATH}/src/import/ @@ -91,8 +92,9 @@ TESTS += ${PRD_USR_TEST_PATH}/prdfTest.H TESTS += ${PRD_USR_TEST_PATH}/prdfTest_XBus.H TESTS += ${PRD_USR_TEST_PATH}/prdfTest_ABus.H TESTS += ${PRD_USR_TEST_PATH}/prdfTest_ProcCentFir.H +TESTS += ${PRD_USR_TEST_PATH}/prdfTest_BadDqBitmap.H TESTS += ${PRD_USR_TEST_PATH}/prdfTest_Ex.H -TESTS += $(if $(CONFIG_AXONE_BRING_UP),,${PRD_USR_TEST_PATH}/prdfTest_NimbusTpLFir.H) +TESTS += ${PRD_USR_TEST_PATH}/prdfTest_NimbusTpLFir.H #@TODO RTC:178802 #TESTS += ${PRD_USR_TEST_PATH}/prdfTest_Mcs.H diff --git a/src/usr/diag/prdf/test/prdfsimHomRegisterAccess.C b/src/usr/diag/prdf/test/prdfsimHomRegisterAccess.C index 3e3079883..d6b02b5ee 100755 --- a/src/usr/diag/prdf/test/prdfsimHomRegisterAccess.C +++ b/src/usr/diag/prdf/test/prdfsimHomRegisterAccess.C @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2012,2017 */ +/* Contributors Listed Below - COPYRIGHT 2012,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -43,7 +43,7 @@ SimScomAccessor::~SimScomAccessor() uint32_t SimScomAccessor::Access(TARGETING::TargetHandle_t i_target, BitString & bs, uint64_t registerId, - MopRegisterAccess::Operation operation) const + RegisterAccess::Operation operation) const { PRDF_DENTER("SimScomAccessor::Access()"); uint32_t rc = SUCCESS; @@ -53,8 +53,8 @@ uint32_t SimScomAccessor::Access(TARGETING::TargetHandle_t i_target, { switch (operation) { - case MopRegisterAccess::WRITE: l_op = ScrDB::WRITE; break; - case MopRegisterAccess::READ: l_op = ScrDB::READ; break; + case RegisterAccess::WRITE: l_op = ScrDB::WRITE; break; + case RegisterAccess::READ: l_op = ScrDB::READ; break; default: PRDF_ERR( "SimScomAccessor::Access() unsupported operation: 0x%X", operation ); rc = PRD_SCANCOM_FAILURE; diff --git a/src/usr/diag/prdf/test/prdfsimHomRegisterAccess.H b/src/usr/diag/prdf/test/prdfsimHomRegisterAccess.H index b8a610f75..f5566eb54 100755 --- a/src/usr/diag/prdf/test/prdfsimHomRegisterAccess.H +++ b/src/usr/diag/prdf/test/prdfsimHomRegisterAccess.H @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2012,2017 */ +/* Contributors Listed Below - COPYRIGHT 2012,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -73,7 +73,7 @@ class SimScomAccessor : public ScomAccessor virtual uint32_t Access(TARGETING::TargetHandle_t i_target, BitString & bs, uint64_t registerId, - MopRegisterAccess::Operation operation) const; + RegisterAccess::Operation operation) const; private: diff --git a/src/usr/diag/prdf/test/prdfsimScrDB.C b/src/usr/diag/prdf/test/prdfsimScrDB.C index 6308ba423..a6a67bd9c 100755 --- a/src/usr/diag/prdf/test/prdfsimScrDB.C +++ b/src/usr/diag/prdf/test/prdfsimScrDB.C @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2012,2017 */ +/* Contributors Listed Below - COPYRIGHT 2012,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -24,7 +24,7 @@ /* IBM_PROLOG_END_TAG */ #include "prdfsimScrDB.H" -#include <iipMopRegisterAccess.h> +#include <prdfHomRegisterAccess.H> #include <prdfTrace.H> #include <prdfPlatServices.H> #include "prdfsimServices.H" |