diff options
author | Stephen Glancy <sglancy@us.ibm.com> | 2018-05-11 13:23:06 -0500 |
---|---|---|
committer | Dean Sanner <dsanner@us.ibm.com> | 2018-05-25 11:23:21 -0400 |
commit | 8ff5d8f2210b5bd95aecf0f2a5e589f0b24ac189 (patch) | |
tree | 475fda343cd4cce920d871d596b5c005afd46a53 /src/import/chips | |
parent | 2254d9f67acf154c5eecda4627f7f9bf1063fe72 (diff) | |
download | talos-hostboot-8ff5d8f2210b5bd95aecf0f2a5e589f0b24ac189.tar.gz talos-hostboot-8ff5d8f2210b5bd95aecf0f2a5e589f0b24ac189.zip |
Updates Centaur training to continue on fails for FW
Change-Id: I295fbcdcee0691215a8b45ff951842801775b6b3
CQ:SW426968
RTC:192763
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/58712
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Dev-Ready: STEPHEN GLANCY <sglancy@us.ibm.com>
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Reviewed-by: Louis Stermole <stermole@us.ibm.com>
Reviewed-by: ANDRE A. MARIN <aamarin@us.ibm.com>
Reviewed-by: Dean Sanner <dsanner@us.ibm.com>
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/58718
Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com>
Diffstat (limited to 'src/import/chips')
3 files changed, 42 insertions, 10 deletions
diff --git a/src/import/chips/centaur/procedures/hwp/memory/p9c_mss_draminit_training.C b/src/import/chips/centaur/procedures/hwp/memory/p9c_mss_draminit_training.C index 5f46f29ad..c079e6d13 100755 --- a/src/import/chips/centaur/procedures/hwp/memory/p9c_mss_draminit_training.C +++ b/src/import/chips/centaur/procedures/hwp/memory/p9c_mss_draminit_training.C @@ -663,6 +663,8 @@ extern "C" { uint8_t& io_dqs_try ) { + // Used to determine if the error should be logged as recovered or not + bool l_training_error = false; fapi2::buffer<uint64_t> l_cal_error_buffer_64; fapi2::buffer<uint64_t> l_disable_bit_data_for_dp18_buffer_64; uint8_t l_mbaPosition = 0; @@ -682,7 +684,7 @@ extern "C" { if(l_cal_error_buffer_64.getBit<CEN_MBA_DDRPHY_PC_INIT_CAL_ERROR_P0_ERROR_RANK_PAIR, CEN_MBA_DDRPHY_PC_INIT_CAL_ERROR_P0_ERROR_RANK_PAIR_LEN>()) { io_status = MSS_INIT_CAL_FAIL; - + l_training_error = true; FAPI_ASSERT(!l_cal_error_buffer_64.getBit<CEN_MBA_DDRPHY_PC_INIT_CAL_ERROR_P0_ERROR_WR_LEVEL>(), fapi2::CEN_MSS_DRAMINIT_TRAINING_WR_LVL_ERROR(). set_MBA_POSITION(l_mbaPosition). @@ -697,6 +699,7 @@ extern "C" { // DQS Alignment Work Around: if (io_dqs_try < MAX_DQS_RETRY) { + l_training_error = false; ++io_dqs_try; --io_cur_cal_step; FAPI_INF( "+++ DQS Alignment recovery attempt %d on %s port: %d rank group: %d! +++", io_dqs_try, mss::c_str(i_target), @@ -721,6 +724,7 @@ extern "C" { } // if dqs_try < max else { + l_training_error = true; FAPI_ASSERT(false, fapi2::CEN_MSS_DRAMINIT_TRAINING_DQS_ALIGNMENT_ERROR(). set_TARGET_MBA_ERROR(i_target). @@ -732,6 +736,7 @@ extern "C" { } } // if getBit<50> + l_training_error = true; FAPI_ASSERT(!l_cal_error_buffer_64.getBit<CEN_MBA_DDRPHY_PC_INIT_CAL_ERROR_P0_ERROR_RDCLK_ALIGN>(), fapi2::CEN_MSS_DRAMINIT_TRAINING_RD_CLK_SYS_CLK_ALIGNMENT_ERROR(). set_TARGET_MBA_ERROR(i_target). @@ -837,7 +842,26 @@ extern "C" { io_status = MSS_INIT_CAL_PASS; } + return fapi2::FAPI2_RC_SUCCESS; fapi_try_exit: +#ifdef __HOSTBOOT_MODULE + + // If we took a training fail, log it as recovered - memdiags will sort it out + if(l_training_error) + { + auto l_temp_rc = fapi2::current_err; + fapi2::logError(l_temp_rc, fapi2::FAPI2_ERRL_SEV_RECOVERED); + fapi2::current_err = fapi2::FAPI2_RC_SUCCESS; + } + +#else + + if(l_training_error) + { + FAPI_ERR("%s error was caused by a training error", mss::c_str(i_target)); + } + +#endif return fapi2::current_err; } diff --git a/src/import/chips/centaur/procedures/hwp/memory/p9c_mss_generic_shmoo.C b/src/import/chips/centaur/procedures/hwp/memory/p9c_mss_generic_shmoo.C index e22376c1f..e1d70e29d 100755 --- a/src/import/chips/centaur/procedures/hwp/memory/p9c_mss_generic_shmoo.C +++ b/src/import/chips/centaur/procedures/hwp/memory/p9c_mss_generic_shmoo.C @@ -542,6 +542,20 @@ extern "C" } //////////////// changed the check condition ... The error call out need to gard the dimm=l_faulted_dimm(0 or 1) //// port=l_faulted_port(0 or 1) target=i_target ... + +#ifdef __HOSTBOOT_MODULE + FAPI_ASSERT_NOEXIT(!l_memory_health, + fapi2::CEN_MSS_GENERIC_SHMOO_MCBIST_FAILED(). + set_MBA_TARGET(i_target). + set_MBA_PORT_NUMBER(l_faulted_port). + set_MBA_DIMM_NUMBER(l_faulted_dimm), + "generic_shmoo:sanity_check failed !! MCBIST failed on %s initial run , memory is not in good state needs investigation port=%d rank=%d dimm=%d", + mss::c_str(i_target), + l_faulted_port, + l_faulted_rank, + l_faulted_dimm); + +#else FAPI_ASSERT(!l_memory_health, fapi2::CEN_MSS_GENERIC_SHMOO_MCBIST_FAILED(). set_MBA_TARGET(i_target). @@ -553,6 +567,8 @@ extern "C" l_faulted_rank, l_faulted_dimm); +#endif + fapi_try_exit: return fapi2::current_err; } diff --git a/src/import/chips/centaur/procedures/xml/error_info/p9c_memory_mss_generic_shmoo_errors.xml b/src/import/chips/centaur/procedures/xml/error_info/p9c_memory_mss_generic_shmoo_errors.xml index e84b96714..2bd50d9f2 100644 --- a/src/import/chips/centaur/procedures/xml/error_info/p9c_memory_mss_generic_shmoo_errors.xml +++ b/src/import/chips/centaur/procedures/xml/error_info/p9c_memory_mss_generic_shmoo_errors.xml @@ -5,7 +5,7 @@ <!-- --> <!-- OpenPOWER HostBoot Project --> <!-- --> -<!-- Contributors Listed Below - COPYRIGHT 2016,2017 --> +<!-- Contributors Listed Below - COPYRIGHT 2016,2018 --> <!-- [+] International Business Machines Corp. --> <!-- --> <!-- --> @@ -50,13 +50,5 @@ </childTargets> <priority>HIGH</priority> </callout> - <deconfigure> - <childTargets> - <parent>MBA_TARGET</parent> - <childType>TARGET_TYPE_DIMM</childType> - <childPort>MBA_PORT_NUMBER</childPort> - <childNumber>MBA_DIMM_NUMBER</childNumber> - </childTargets> - </deconfigure> </hwpError> </hwpErrors> |