diff options
author | Christian Geddes <crgeddes@us.ibm.com> | 2019-09-11 10:19:58 -0500 |
---|---|---|
committer | Daniel M Crowell <dcrowell@us.ibm.com> | 2019-09-13 10:23:12 -0500 |
commit | 8ab48e774cbea9bfec5461d8fd63b4fc7f249c3b (patch) | |
tree | 6f97325ea17bf577407497fb2948de1c07ef1552 | |
parent | c049efe4425d7dbb7b44964da5372a6b1bdd90aa (diff) | |
download | talos-hostboot-8ab48e774cbea9bfec5461d8fd63b4fc7f249c3b.tar.gz talos-hostboot-8ab48e774cbea9bfec5461d8fd63b4fc7f249c3b.zip |
Apply MC_HANG timeout workaround to all processors
When we initially added this workaround for whatever reason we
specified the master proc only. It needs to be applied to all
procesors in the system.
Change-Id: I5840f2b0670f1790393a385f045ee656ad76bef8
Reviewed-on: http://rchgit01.rchland.ibm.com/gerrit1/83594
Reviewed-by: Michael Baiocchi <mbaiocch@us.ibm.com>
Reviewed-by: Zachary Clark <zach@ibm.com>
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com>
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Reviewed-by: Daniel M Crowell <dcrowell@us.ibm.com>
-rw-r--r-- | src/usr/isteps/istep10/call_proc_chiplet_scominit.C | 104 |
1 files changed, 45 insertions, 59 deletions
diff --git a/src/usr/isteps/istep10/call_proc_chiplet_scominit.C b/src/usr/isteps/istep10/call_proc_chiplet_scominit.C index ad2f2c7f5..91c98f2c7 100644 --- a/src/usr/isteps/istep10/call_proc_chiplet_scominit.C +++ b/src/usr/isteps/istep10/call_proc_chiplet_scominit.C @@ -73,7 +73,7 @@ void* call_proc_chiplet_scominit( void *io_pArgs ) IStepError l_stepError; TRACFCOMP(g_trac_isteps_trace, ENTER_MRK"call_proc_chiplet_scominit entry" ); - + do{ if (!INITSERVICE::isSMPWrapConfig()) @@ -109,71 +109,57 @@ void* call_proc_chiplet_scominit( void *io_pArgs ) HWPF_COMP_ID); } } - - // TODO RTC: 213932 Remove workaround to ignore MC channel hang + #ifdef CONFIG_AXONE_BRING_UP TARGETING::TargetHandleList l_cpuTargetList; getAllChips(l_cpuTargetList, TYPE_PROC); - - // - // Identify the master processor - // - TARGETING::Target * l_masterProc = nullptr; - TARGETING::Target * l_masterNode = nullptr; - const bool l_onlyFunctional = true; // Make sure masterproc is functional - l_err = TARGETING::targetService().queryMasterProcChipTargetHandle( - l_masterProc, - l_masterNode, - l_onlyFunctional); - - if(l_err) + // Apply workaround to remove MC hang timeouts because they + // are forcing false negatives to all proc functional chips + for (const auto & l_procChip: l_cpuTargetList) { - TRACFCOMP( ISTEPS_TRACE::g_trac_isteps_trace, - "ERROR : call_proc_chiplet_scominit: " - "queryMasterProcChipTargetHandle() returned PLID=0x%x", - l_err->plid() ); - // Create IStep error log and cross reference error that occurred - l_stepError.addErrorDetails(l_err); - // Commit error - errlCommit( l_err, HWPF_COMP_ID ); - break; - } - TARGETING::TargetHandleList l_miTargetList; - TARGETING::getChildAffinityTargets( l_miTargetList, l_masterProc, CLASS_UNIT, TYPE_MI ); - - const uint64_t MCS_TIMEOUT_CONTROL_REG = 0x501081B; - - for(const auto & l_mi : l_miTargetList) - { - uint64_t l_mcsTimeoutControlValue; - size_t l_regSize = sizeof(l_mcsTimeoutControlValue); - l_err = deviceRead(l_mi, &l_mcsTimeoutControlValue, l_regSize, - DEVICE_SCOM_ADDRESS(MCS_TIMEOUT_CONTROL_REG)); - - // Clear bit 33 and re-write the scom register with new value. - // When this bit is cleared it allows extra time for gemini card - // before a channel hang is declared - l_mcsTimeoutControlValue &= ~(1UL << 30); - - l_err = deviceWrite(l_mi, &l_mcsTimeoutControlValue, l_regSize, - DEVICE_SCOM_ADDRESS(MCS_TIMEOUT_CONTROL_REG)); - - if(l_err) - { - TRACFCOMP( ISTEPS_TRACE::g_trac_isteps_trace, - "ERROR : call_proc_chiplet_scominit: " - "deviceWrite on DEVICE_SCOM_ADDRESS MCS_TIMEOUT_CONTROL_REG returned PLID=0x%x", - l_err->plid() ); - // Create IStep error log and cross reference error that occurred - l_stepError.addErrorDetails(l_err); - // Commit error - errlCommit( l_err, HWPF_COMP_ID ); - break; - } + TARGETING::TargetHandleList l_miTargetList; + TARGETING::getChildAffinityTargets( l_miTargetList, l_procChip, CLASS_UNIT, TYPE_MI ); + + const uint64_t MCS_TIMEOUT_CONTROL_REG = 0x501081B; + + for(const auto & l_mi : l_miTargetList) + { + uint64_t l_mcsTimeoutControlValue; + size_t l_regSize = sizeof(l_mcsTimeoutControlValue); + l_err = deviceRead(l_mi, &l_mcsTimeoutControlValue, l_regSize, + DEVICE_SCOM_ADDRESS(MCS_TIMEOUT_CONTROL_REG)); + + // Clear bit 33 and re-write the scom register with new value. + // When this bit is cleared it allows extra time for gemini card + // before a channel hang is declared + l_mcsTimeoutControlValue &= ~(1UL << 30); + + l_err = deviceWrite(l_mi, &l_mcsTimeoutControlValue, l_regSize, + DEVICE_SCOM_ADDRESS(MCS_TIMEOUT_CONTROL_REG)); + + if(l_err) + { + TRACFCOMP( ISTEPS_TRACE::g_trac_isteps_trace, + "ERROR : call_proc_chiplet_scominit: " + "deviceWrite on DEVICE_SCOM_ADDRESS MCS_TIMEOUT_CONTROL_REG returned PLID=0x%x", + l_err->plid() ); + // error will be handled below + break; + } + } + + if(l_err) + { + // Create IStep error log and cross reference error that occurred + l_stepError.addErrorDetails(l_err); + // Commit error + errlCommit( l_err, HWPF_COMP_ID ); + break; + } } #endif - + }while(0); TRACFCOMP(g_trac_isteps_trace, EXIT_MRK"call_proc_chiplet_scominit exit" ); |