diff options
author | Jacob Harvey <jlharvey@us.ibm.com> | 2017-06-01 16:24:50 -0500 |
---|---|---|
committer | Daniel M. Crowell <dcrowell@us.ibm.com> | 2017-06-30 00:17:15 -0400 |
commit | d0a8f18502ab3d62238aa4945068cd7c79d75ebc (patch) | |
tree | bf0609bd1c6547ae20eda5b63e82aafa739ded97 /src | |
parent | 0e89cd3c6848c78d1b9d954e8936110beca76271 (diff) | |
download | talos-hostboot-d0a8f18502ab3d62238aa4945068cd7c79d75ebc.tar.gz talos-hostboot-d0a8f18502ab3d62238aa4945068cd7c79d75ebc.zip |
Set HB to ignore draminit_training fails
Change-Id: I92bd5cdc52adad0a1414fb61ec6d215d3c51165e
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/41484
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Tested-by: Hostboot CI <hostboot-ci+hostboot@us.ibm.com>
Reviewed-by: STEPHEN GLANCY <sglancy@us.ibm.com>
Reviewed-by: Louis Stermole <stermole@us.ibm.com>
Dev-Ready: JACOB L. HARVEY <jlharvey@us.ibm.com>
Reviewed-by: Jennifer A. Stofer <stofer@us.ibm.com>
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/41492
Reviewed-by: Hostboot Team <hostboot@us.ibm.com>
Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com>
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Reviewed-by: Daniel M. Crowell <dcrowell@us.ibm.com>
Diffstat (limited to 'src')
-rw-r--r-- | src/import/chips/p9/procedures/hwp/memory/lib/mc/port.H | 1 | ||||
-rw-r--r-- | src/import/chips/p9/procedures/hwp/memory/p9_mss_draminit_training.C | 80 |
2 files changed, 60 insertions, 21 deletions
diff --git a/src/import/chips/p9/procedures/hwp/memory/lib/mc/port.H b/src/import/chips/p9/procedures/hwp/memory/lib/mc/port.H index d660c4631..93cadbe0b 100644 --- a/src/import/chips/p9/procedures/hwp/memory/lib/mc/port.H +++ b/src/import/chips/p9/procedures/hwp/memory/lib/mc/port.H @@ -517,6 +517,7 @@ fapi_try_exit: /// @param[in] i_target the target /// @param[in] i_state the state /// @return FAPI2_RC_SUCCESS if and only if ok +/// @note Disable Port Fail after recurring RCD errors. /// template< fapi2::TargetType T, typename TT = portTraits<T> > fapi2::ReturnCode change_port_fail_disable( const fapi2::Target<T>& i_target, states i_state ) diff --git a/src/import/chips/p9/procedures/hwp/memory/p9_mss_draminit_training.C b/src/import/chips/p9/procedures/hwp/memory/p9_mss_draminit_training.C index 61f551320..f2ee76321 100644 --- a/src/import/chips/p9/procedures/hwp/memory/p9_mss_draminit_training.C +++ b/src/import/chips/p9/procedures/hwp/memory/p9_mss_draminit_training.C @@ -35,6 +35,7 @@ #include <fapi2.H> #include <mss.H> +#include <vector> #include <p9_mss_draminit_training.H> #include <lib/utils/count_dimm.H> @@ -61,8 +62,10 @@ extern "C" const uint8_t i_abort_on_error) { // Keep track of the last error seen by a port - fapi2::ReturnCode l_port_error = fapi2::FAPI2_RC_SUCCESS; - fapi2::buffer<uint32_t> l_cal_steps_enabled = i_special_training; + fapi2::ReturnCode l_port_error ( fapi2::FAPI2_RC_SUCCESS ); + fapi2::buffer<uint32_t> l_cal_steps_enabled( i_special_training ); + + std::vector<fapi2::ReturnCode> l_fails; FAPI_INF("Start draminit training"); @@ -170,44 +173,79 @@ extern "C" } // Execute selected cal steps - FAPI_TRY( mss::setup_and_execute_cal(p, rp, l_cal_steps_enabled, i_abort_on_error) ); + FAPI_TRY( mss::setup_and_execute_cal(p, rp, l_cal_steps_enabled, l_cal_abort_on_error) ); + + fapi2::ReturnCode l_rc (fapi2::current_err); - // If we're aborting on error we can just FAPI_TRY. If we're not, we don't want to exit if there's + // If we're aborting on error we can just jump to the end. + // If we're not, we don't want to exit if there's // an error but we want to log the error and keep on keeping on. - if ((fapi2::current_err = mss::process_initial_cal_errors(p)) != fapi2::FAPI2_RC_SUCCESS) + if ((l_rc = mss::process_initial_cal_errors(p)) != fapi2::FAPI2_RC_SUCCESS) { - fapi2::logError(fapi2::current_err); - if (l_cal_abort_on_error) { - goto fapi_try_exit; + FAPI_TRY( l_rc ); } + l_fails.push_back(l_rc); + // Keep tack of the last cal error we saw. - l_rank_pair_error = fapi2::current_err; + l_rank_pair_error = l_rc; } - }// rank pairs - // Conducts workarounds after training if needed - FAPI_TRY( mss::workarounds::dp16::post_training_workarounds( p, l_cal_steps_enabled ) ); + }// rank pairs - // Once we've trained all the rank pairs we can record the bad bits in the attributes if we have an error - // This error is the most recent error seen on a port, too, so we keep track of that. - if (l_rank_pair_error != fapi2::FAPI2_RC_SUCCESS) { - FAPI_TRY( mss::dp16::record_bad_bits(p) ); - l_port_error = l_rank_pair_error; + fapi2::ReturnCode l_rc (fapi2::FAPI2_RC_SUCCESS); + // Conducts workarounds after training if needed + l_rc = mss::workarounds::dp16::post_training_workarounds( p, l_cal_steps_enabled ); + + if ( l_rc != fapi2::FAPI2_RC_SUCCESS) + { + l_fails.push_back(l_rc); + } + + // Going to treat bad_bits errors as similar to training errors + // If we're in hostboot, we update the attribute and keep running + // If we're cronus, we'll error out + l_rc = mss::dp16::record_bad_bits(p); + + if ( l_rc != fapi2::FAPI2_RC_SUCCESS) + { + l_fails.push_back(l_rc); + } } + + // Resetting current_err. + // The error has either already been "logged" or we have exited and returned the error up the call stack. + fapi2::current_err = fapi2::FAPI2_RC_SUCCESS; } - // So we're calibrated the entire port. If we're here either we didn't have any errors or the last error - // seen on a port is the error for this entire controller. - FAPI_TRY(l_port_error, "Seeing port error, exiting training"); +// So we want to record the errors as informational and not mess with current_err +#ifdef __HOSTBOOT_MODULE + + for (auto l_iter = l_fails.begin(); l_iter != l_fails.end(); ++l_iter) + { + // fapi2 doesn't have INFO flag, so the RECOVERED flag will do + // Same behavior (no printouts to the custonmer and no deconfigures/ fail outs) + // We want to have these fail logs for the future, but we'll let memdiags catch the errors + fapi2::logError(*l_iter, fapi2::FAPI2_ERRL_SEV_RECOVERED); + } + +// If we're in cronus, we're just going to bomb out. Error logging doesn't work as of 6/17 JLH +// The errors should be printed out as FAPI_ERR's when the ReturnCode was made though +#else + { + if (l_fails.size() != 0) + { + FAPI_TRY(l_fails[0]); + } + } +#endif // Unmask FIR FAPI_TRY( mss::unmask::after_draminit_training(i_target) ); - fapi_try_exit: FAPI_INF("End draminit training"); return fapi2::current_err; |