summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorStephen Glancy <sglancy@us.ibm.com>2018-05-11 13:23:06 -0500
committerDean Sanner <dsanner@us.ibm.com>2018-05-25 11:23:21 -0400
commit8ff5d8f2210b5bd95aecf0f2a5e589f0b24ac189 (patch)
tree475fda343cd4cce920d871d596b5c005afd46a53
parent2254d9f67acf154c5eecda4627f7f9bf1063fe72 (diff)
downloadtalos-hostboot-8ff5d8f2210b5bd95aecf0f2a5e589f0b24ac189.tar.gz
talos-hostboot-8ff5d8f2210b5bd95aecf0f2a5e589f0b24ac189.zip
Updates Centaur training to continue on fails for FW
Change-Id: I295fbcdcee0691215a8b45ff951842801775b6b3 CQ:SW426968 RTC:192763 Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/58712 Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com> Dev-Ready: STEPHEN GLANCY <sglancy@us.ibm.com> Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com> Reviewed-by: Louis Stermole <stermole@us.ibm.com> Reviewed-by: ANDRE A. MARIN <aamarin@us.ibm.com> Reviewed-by: Dean Sanner <dsanner@us.ibm.com> Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/58718 Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com> Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com>
-rwxr-xr-xsrc/import/chips/centaur/procedures/hwp/memory/p9c_mss_draminit_training.C26
-rwxr-xr-xsrc/import/chips/centaur/procedures/hwp/memory/p9c_mss_generic_shmoo.C16
-rw-r--r--src/import/chips/centaur/procedures/xml/error_info/p9c_memory_mss_generic_shmoo_errors.xml10
3 files changed, 42 insertions, 10 deletions
diff --git a/src/import/chips/centaur/procedures/hwp/memory/p9c_mss_draminit_training.C b/src/import/chips/centaur/procedures/hwp/memory/p9c_mss_draminit_training.C
index 5f46f29ad..c079e6d13 100755
--- a/src/import/chips/centaur/procedures/hwp/memory/p9c_mss_draminit_training.C
+++ b/src/import/chips/centaur/procedures/hwp/memory/p9c_mss_draminit_training.C
@@ -663,6 +663,8 @@ extern "C" {
uint8_t& io_dqs_try
)
{
+ // Used to determine if the error should be logged as recovered or not
+ bool l_training_error = false;
fapi2::buffer<uint64_t> l_cal_error_buffer_64;
fapi2::buffer<uint64_t> l_disable_bit_data_for_dp18_buffer_64;
uint8_t l_mbaPosition = 0;
@@ -682,7 +684,7 @@ extern "C" {
if(l_cal_error_buffer_64.getBit<CEN_MBA_DDRPHY_PC_INIT_CAL_ERROR_P0_ERROR_RANK_PAIR, CEN_MBA_DDRPHY_PC_INIT_CAL_ERROR_P0_ERROR_RANK_PAIR_LEN>())
{
io_status = MSS_INIT_CAL_FAIL;
-
+ l_training_error = true;
FAPI_ASSERT(!l_cal_error_buffer_64.getBit<CEN_MBA_DDRPHY_PC_INIT_CAL_ERROR_P0_ERROR_WR_LEVEL>(),
fapi2::CEN_MSS_DRAMINIT_TRAINING_WR_LVL_ERROR().
set_MBA_POSITION(l_mbaPosition).
@@ -697,6 +699,7 @@ extern "C" {
// DQS Alignment Work Around:
if (io_dqs_try < MAX_DQS_RETRY)
{
+ l_training_error = false;
++io_dqs_try;
--io_cur_cal_step;
FAPI_INF( "+++ DQS Alignment recovery attempt %d on %s port: %d rank group: %d! +++", io_dqs_try, mss::c_str(i_target),
@@ -721,6 +724,7 @@ extern "C" {
} // if dqs_try < max
else
{
+ l_training_error = true;
FAPI_ASSERT(false,
fapi2::CEN_MSS_DRAMINIT_TRAINING_DQS_ALIGNMENT_ERROR().
set_TARGET_MBA_ERROR(i_target).
@@ -732,6 +736,7 @@ extern "C" {
}
} // if getBit<50>
+ l_training_error = true;
FAPI_ASSERT(!l_cal_error_buffer_64.getBit<CEN_MBA_DDRPHY_PC_INIT_CAL_ERROR_P0_ERROR_RDCLK_ALIGN>(),
fapi2::CEN_MSS_DRAMINIT_TRAINING_RD_CLK_SYS_CLK_ALIGNMENT_ERROR().
set_TARGET_MBA_ERROR(i_target).
@@ -837,7 +842,26 @@ extern "C" {
io_status = MSS_INIT_CAL_PASS;
}
+ return fapi2::FAPI2_RC_SUCCESS;
fapi_try_exit:
+#ifdef __HOSTBOOT_MODULE
+
+ // If we took a training fail, log it as recovered - memdiags will sort it out
+ if(l_training_error)
+ {
+ auto l_temp_rc = fapi2::current_err;
+ fapi2::logError(l_temp_rc, fapi2::FAPI2_ERRL_SEV_RECOVERED);
+ fapi2::current_err = fapi2::FAPI2_RC_SUCCESS;
+ }
+
+#else
+
+ if(l_training_error)
+ {
+ FAPI_ERR("%s error was caused by a training error", mss::c_str(i_target));
+ }
+
+#endif
return fapi2::current_err;
}
diff --git a/src/import/chips/centaur/procedures/hwp/memory/p9c_mss_generic_shmoo.C b/src/import/chips/centaur/procedures/hwp/memory/p9c_mss_generic_shmoo.C
index e22376c1f..e1d70e29d 100755
--- a/src/import/chips/centaur/procedures/hwp/memory/p9c_mss_generic_shmoo.C
+++ b/src/import/chips/centaur/procedures/hwp/memory/p9c_mss_generic_shmoo.C
@@ -542,6 +542,20 @@ extern "C"
}
//////////////// changed the check condition ... The error call out need to gard the dimm=l_faulted_dimm(0 or 1) //// port=l_faulted_port(0 or 1) target=i_target ...
+
+#ifdef __HOSTBOOT_MODULE
+ FAPI_ASSERT_NOEXIT(!l_memory_health,
+ fapi2::CEN_MSS_GENERIC_SHMOO_MCBIST_FAILED().
+ set_MBA_TARGET(i_target).
+ set_MBA_PORT_NUMBER(l_faulted_port).
+ set_MBA_DIMM_NUMBER(l_faulted_dimm),
+ "generic_shmoo:sanity_check failed !! MCBIST failed on %s initial run , memory is not in good state needs investigation port=%d rank=%d dimm=%d",
+ mss::c_str(i_target),
+ l_faulted_port,
+ l_faulted_rank,
+ l_faulted_dimm);
+
+#else
FAPI_ASSERT(!l_memory_health,
fapi2::CEN_MSS_GENERIC_SHMOO_MCBIST_FAILED().
set_MBA_TARGET(i_target).
@@ -553,6 +567,8 @@ extern "C"
l_faulted_rank,
l_faulted_dimm);
+#endif
+
fapi_try_exit:
return fapi2::current_err;
}
diff --git a/src/import/chips/centaur/procedures/xml/error_info/p9c_memory_mss_generic_shmoo_errors.xml b/src/import/chips/centaur/procedures/xml/error_info/p9c_memory_mss_generic_shmoo_errors.xml
index e84b96714..2bd50d9f2 100644
--- a/src/import/chips/centaur/procedures/xml/error_info/p9c_memory_mss_generic_shmoo_errors.xml
+++ b/src/import/chips/centaur/procedures/xml/error_info/p9c_memory_mss_generic_shmoo_errors.xml
@@ -5,7 +5,7 @@
<!-- -->
<!-- OpenPOWER HostBoot Project -->
<!-- -->
-<!-- Contributors Listed Below - COPYRIGHT 2016,2017 -->
+<!-- Contributors Listed Below - COPYRIGHT 2016,2018 -->
<!-- [+] International Business Machines Corp. -->
<!-- -->
<!-- -->
@@ -50,13 +50,5 @@
</childTargets>
<priority>HIGH</priority>
</callout>
- <deconfigure>
- <childTargets>
- <parent>MBA_TARGET</parent>
- <childType>TARGET_TYPE_DIMM</childType>
- <childPort>MBA_PORT_NUMBER</childPort>
- <childNumber>MBA_DIMM_NUMBER</childNumber>
- </childTargets>
- </deconfigure>
</hwpError>
</hwpErrors>
OpenPOWER on IntegriCloud