summaryrefslogtreecommitdiffstats
path: root/src/import/chips/p9/procedures
diff options
context:
space:
mode:
authorJacob Harvey <jlharvey@us.ibm.com>2017-08-01 16:11:59 -0500
committerDaniel M. Crowell <dcrowell@us.ibm.com>2017-08-19 22:12:27 -0400
commit11108f43887202522217b92d448880df0fef05e5 (patch)
treee48dcecf359d3ba9fa663fe7604545f8ed1a0d61 /src/import/chips/p9/procedures
parentbb97f80565ac3074de838e2773d1d08e91040775 (diff)
downloadtalos-hostboot-11108f43887202522217b92d448880df0fef05e5.tar.gz
talos-hostboot-11108f43887202522217b92d448880df0fef05e5.zip
Fix draminit_training error logging and unit test
Change-Id: Ie0e00595a9e4a50e8b5aa0a2017e6c6ed2e548c5 Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/44193 Tested-by: Hostboot CI <hostboot-ci+hostboot@us.ibm.com> Dev-Ready: JACOB L. HARVEY <jlharvey@us.ibm.com> Reviewed-by: ANDRE A. MARIN <aamarin@us.ibm.com> Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com> Reviewed-by: MATTHEW I. HICKMAN <matthew.hickman@ibm.com> Reviewed-by: Jennifer A. Stofer <stofer@us.ibm.com> Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/44198 Reviewed-by: Hostboot Team <hostboot@us.ibm.com> Reviewed-by: JACOB L. HARVEY <jlharvey@us.ibm.com> Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com> Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com> Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com> Reviewed-by: Daniel M. Crowell <dcrowell@us.ibm.com>
Diffstat (limited to 'src/import/chips/p9/procedures')
-rw-r--r--src/import/chips/p9/procedures/hwp/memory/lib/dimm/rank.H38
-rw-r--r--src/import/chips/p9/procedures/hwp/memory/lib/phy/ddr_phy.C156
-rw-r--r--src/import/chips/p9/procedures/hwp/memory/lib/phy/ddr_phy.H24
-rw-r--r--src/import/chips/p9/procedures/hwp/memory/lib/phy/dp16.C6
-rw-r--r--src/import/chips/p9/procedures/hwp/memory/lib/shared/mss_const.H3
-rw-r--r--src/import/chips/p9/procedures/hwp/memory/p9_mss_draminit_training.C92
-rw-r--r--src/import/chips/p9/procedures/xml/error_info/p9_memory_mss_draminit_training.xml9
7 files changed, 214 insertions, 114 deletions
diff --git a/src/import/chips/p9/procedures/hwp/memory/lib/dimm/rank.H b/src/import/chips/p9/procedures/hwp/memory/lib/dimm/rank.H
index 9aa3f8e74..34310cc56 100644
--- a/src/import/chips/p9/procedures/hwp/memory/lib/dimm/rank.H
+++ b/src/import/chips/p9/procedures/hwp/memory/lib/dimm/rank.H
@@ -44,6 +44,7 @@
#include <lib/utils/num.H>
#include <lib/utils/count_dimm.H>
#include <lib/shared/mss_const.H>
+#include <lib/phy/phy_cntrl.H>
namespace mss
{
@@ -428,6 +429,39 @@ inline uint64_t map_rank_ordinal_from_phy( const fapi2::Target<fapi2::TARGET_TYP
}
///
+/// @brief Maps a ordinal rank pair to INIT_CAL_ERROR register encoding
+/// @param[in] i_target the fapi2 target of the MCA
+/// @param[in] i_rp rank pair 0-3
+/// @param[out] o_encoding
+/// @return the mapped value
+// i_rank passed by value to save a local
+///
+inline fapi2::ReturnCode map_rp_primary_to_init_cal( const fapi2::Target<fapi2::TARGET_TYPE_MCA>& i_target,
+ const uint64_t i_rp,
+ uint64_t& o_encoding)
+{
+ typedef pcTraits<fapi2::TARGET_TYPE_MCA> TT;
+ fapi2::buffer<uint64_t> l_temp;
+
+ FAPI_ASSERT( i_rp < MAX_RANK_PAIRS,
+ fapi2::MSS_INVALID_RANK_PAIR()
+ .set_RANK_PAIR(i_rp)
+ .set_MCA_TARGET(i_target)
+ .set_FUNCTION(MAP_RP_PRIMARY_TO_INIT_CAL),
+ "Error in map_rp_primary_init_cal for %s on rp %d",
+ mss::c_str(i_target),
+ i_rp);
+
+ FAPI_INF("Setting bit for rp %d", i_rp);
+ //INIT_CAL_ERROR == 60, the bit position offset for the register
+ FAPI_TRY( l_temp.setBit(i_rp + TT::INIT_CAL_ERROR_RANK_PAIR) );
+
+ o_encoding = l_temp;
+ return fapi2::FAPI2_RC_SUCCESS;
+fapi_try_exit:
+ return fapi2::current_err;
+}
+///
/// @brief Convert rank indexes in a rank_pair reg value from MC perspective to PHY perspective
/// @tparam T fapi2 Target Type the type of the MC target
/// @param[in] i_target the fapi2 target of the MCA
@@ -1327,8 +1361,8 @@ inline fapi2::ReturnCode get_ranks_in_pair( const fapi2::Target<T>& i_target,
FAPI_ASSERT( false,
fapi2::MSS_INVALID_RANK_PAIR()
.set_RANK_PAIR(i_rp)
- .set_FUNCTION(GET_RANKS_IN_PAIR)
- .set_MCA_TARGET(i_target),
+ .set_MCA_TARGET(i_target)
+ .set_FUNCTION(GET_RANKS_IN_PAIR),
"%s Invalid number of rankpairs entered. num: %lu max: %lu",
mss::c_str(i_target),
i_rp,
diff --git a/src/import/chips/p9/procedures/hwp/memory/lib/phy/ddr_phy.C b/src/import/chips/p9/procedures/hwp/memory/lib/phy/ddr_phy.C
index 3d19b1764..5d8aaf71f 100644
--- a/src/import/chips/p9/procedures/hwp/memory/lib/phy/ddr_phy.C
+++ b/src/import/chips/p9/procedures/hwp/memory/lib/phy/ddr_phy.C
@@ -531,7 +531,7 @@ fapi2::ReturnCode process_initial_cal_errors( const fapi2::Target<TARGET_TYPE_MC
FAPI_INF("initial cal err: 0x%016llx, rp: 0x%016llx (0x%016llx)", l_errors, l_rank_pairs, uint64_t(l_err_data));
// Check for RDVREF calibration errors. This fapi_try catches scom errors. Any errors from the
- // RDVREF itself are assumed errors only after read centering
+ // RDVREF itself are assumed errors only after read centering (caught in general error above)
FAPI_TRY( dp16::process_rdvref_cal_errors(i_target) );
// WR VREF error processing acts the same as the RD VREF processing
@@ -539,7 +539,7 @@ fapi2::ReturnCode process_initial_cal_errors( const fapi2::Target<TARGET_TYPE_MC
if ((l_rank_pairs == 0) || (l_errors == 0))
{
- FAPI_INF("Initial cal - no errors reported");
+ FAPI_INF("Initial cal - no errors reported %s", mss::c_str(i_target));
return fapi2::FAPI2_RC_SUCCESS;
}
@@ -550,38 +550,16 @@ fapi2::ReturnCode process_initial_cal_errors( const fapi2::Target<TARGET_TYPE_MC
// fails ...) Note first_bit_set gives a bit position (0 being left most.) So, the rank
// in question is the bit postion minus the position of the 0th rank in the register.
// (the rank bits are bits 60:63, for example, so rank 0 is in position 60)
- FAPI_TRY( mss::rank_pair_primary_to_dimm(i_target, mss::first_bit_set(l_rank_pairs) - TT::INIT_CAL_ERROR_RANK_PAIR,
+ FAPI_TRY( mss::rank_pair_primary_to_dimm(i_target,
+ mss::first_bit_set(l_rank_pairs) - TT::INIT_CAL_ERROR_RANK_PAIR,
l_failed_dimm) );
- FAPI_ERR("initial cal failed for %s", mss::c_str(l_failed_dimm));
- // If we aborted on error, the disable bits aren't complete so we can't make a determination about
- // whether the port is repairable.
- if (cal_abort_on_error == fapi2::ENUM_ATTR_MSS_CAL_ABORT_ON_ERROR_YES)
- {
- FAPI_INF("can't process disable bits as we aborted on error - disable bits might be incomplete");
- }
- else
- {
- // Process the disable bits. The PHY will disable bits that it finds don't work for whatever reason.
- // However, we can handle a number of bad bits without resorting to killing the DIMM. Do the bad
- // bit processing here, and if we can go on and ignore these bad bits, we'll see a succcess here.
- if (dp16::process_bad_bits(i_target, l_failed_dimm, l_rank_pairs) == fapi2::FAPI2_RC_SUCCESS)
- {
- FAPI_INF("Initial cal - errors reported, but only 1 nibble + 1 bit marked %s", mss::c_str(l_failed_dimm));
-
- // If we're on a pre-DD1.02 Nimbus, Anuwat requests we 'pass' training with 1 nibble + 1 bit.
- if (mss::chip_ec_feature_mss_training_bad_bits(i_target))
- {
- return fapi2::FAPI2_RC_SUCCESS;
- }
- }
- }
// So we can do a few things here. If we're aborting on the first calibration error,
// we only expect to have one error bit set. If we ran all the calibrations, we can
// either have one bit set or more than one bit set. If we have more than one bit set
- // the result is the same - a broken DIMM which will be deconfigured. So put enough
- // information in the FFDC for the lab but we don't need one error for every cal fail.
+ // the result is the same - a broken DIMM.
+ // So put enough information in the FFDC for the lab but we don't need one error for every cal fail.
FAPI_ASSERT(mss::bit_count(l_errors) == 1,
fapi2::MSS_DRAMINIT_TRAINING_MULTIPLE_ERRORS()
.set_FAILED_STEPS(uint64_t(l_err_data))
@@ -718,6 +696,128 @@ fapi_try_exit:
}
///
+/// @brief Finds the calibration errors from draminit training
+/// @param[in] i_target the port target
+/// @param[in] i_rp the rank pair we are calibrating
+/// @param[in] i_cal_abort_on_error denoting if we aborted on first fail
+/// @param[in,out] io_fails a vector storing all of our cal fails
+/// @return FAPI2_RC_SUCCESS iff all of the scoms and functionality were good
+///
+template<>
+fapi2::ReturnCode find_and_log_cal_errors(const fapi2::Target<fapi2::TARGET_TYPE_MCA>& i_target,
+ const uint64_t i_rp,
+ const uint64_t i_cal_abort_on_error,
+ std::vector<fapi2::ReturnCode>& io_fails)
+{
+ fapi2::ReturnCode l_rc (fapi2::FAPI2_RC_SUCCESS);
+ fapi2::Target<TARGET_TYPE_DIMM> l_dimm;
+
+ // Let's get the DIMM since we train per rank pair (primary rank pair)
+ FAPI_TRY( mss::rank_pair_primary_to_dimm(i_target,
+ i_rp,
+ l_dimm),
+ "Failed getting the DIMM for %s", mss::c_str(i_target) );
+
+ // Let's keep track of the error.
+ // We don't want to error out here because we want to run on the other ports/ranks
+ // We'll add this to io_fails if we fail too many DQ's
+ l_rc = mss::process_initial_cal_errors(i_target);
+
+ if (l_rc != fapi2::FAPI2_RC_SUCCESS)
+ {
+ // If we're aborting on error we jump to the end and error out.
+ // We don't care about other ports or ranks because the hardware stopped when it saw the error
+ if (i_cal_abort_on_error)
+ {
+ FAPI_TRY( l_rc, "Training failed for %s. Set to abort on error, so cal didn't finish",
+ mss::c_str(l_dimm) );
+ }
+
+ // Process the disable bits. The PHY will disable bits that it finds don't work for whatever reason.
+ // However, we can handle a number of bad bits without resorting to killing the DIMM. Do the bad
+ // bit processing here, and if we can go on and ignore these bad bits, we'll see a succcess here.
+ // Needs to be bit representation for process_bad_bits (it can handle fails for multiple rp for 1 dimm)
+ uint64_t l_encoding = 0;
+ FAPI_TRY( mss::rank::map_rp_primary_to_init_cal(i_target, i_rp, l_encoding) );
+
+ if (dp16::process_bad_bits(i_target, l_dimm, l_encoding) == fapi2::FAPI2_RC_SUCCESS)
+ {
+ // If we're on a Nimbus, lab team requests we 'pass' training with 1 nibble + 1 bit
+ if (mss::chip_ec_feature_mss_training_bad_bits(i_target))
+ {
+ FAPI_INF("p9_mss_draminit_training: errors reported, but 1 nibble + 1 bit or less was marked.%s",
+ mss::c_str(l_dimm));
+
+ // Let's log the error as RECOVERED (logs should be hidden and no deconfigs take place) - JLH
+ fapi2::logError(l_rc, fapi2::FAPI2_ERRL_SEV_RECOVERED);
+ // Set l_rc to success so we will still record the bad bits into the attribute
+ l_rc = fapi2::FAPI2_RC_SUCCESS;
+ }
+ }
+
+ FAPI_ERR("Seeing calibration errors for p9_mss_draminit_training %s: Keep running? %s",
+ mss::c_str(l_dimm),
+ (l_rc == fapi2::FAPI2_RC_SUCCESS) ? "Yes" : "no");
+
+ // Let's update the attribute with the failing DQ bits since we had a training error
+ // The only fail we get here is a scom error, so we should error out
+ // We only want to update the attribute for hostboot runs though
+ // Updating the attribute updates the DIMM's VPD and actually disabled those DQ bits for good
+ // Commenting out until PRD has the backside implementation complete
+#ifdef __HOSTBOOT_MODULE
+ // TODO RTC:178400 Come back and use the ATTR_BAD_BITS accessor functions from PRD when available
+ //FAPI_TRY( mss::dp16::record_bad_bits(i_target) );
+#endif
+
+ // Let's add the error to our vector for later processing (if it didn't affect too many DQ bits)
+ if (l_rc != fapi2::FAPI2_RC_SUCCESS)
+ {
+ io_fails.push_back(l_rc);
+ }
+ }
+
+ // Calling process_bad_bits above sets fapi2::current_err; Need to explicitly return SUCCESS here
+ return fapi2::FAPI2_RC_SUCCESS;
+fapi_try_exit:
+ return fapi2::current_err;
+}
+
+///
+/// @brief Handle draminit_training cal fails
+/// @param[in] i_fails vector holding the return codes for calibration failures
+/// @note We handle errors differently depending on if we're HB or cronus
+/// If we're cronus, we want to error out.
+/// If we're hostboot, we want to log the error as hidden and let PRD choose to deconfigure
+///
+fapi2::ReturnCode draminit_training_error_handler( const std::vector<fapi2::ReturnCode>& i_fails)
+{
+// If we're in hostboot, we want to log all of the errors as hidden
+// and let PRD deconfigure based off of ATTR_BAD_DQ_BITMAP
+#ifdef __HOSTBOOT_MODULE
+ for (auto l_iter : i_fails)
+ {
+ fapi2::logError(l_iter, fapi2::FAPI2_ERRL_SEV_RECOVERED);
+ }
+
+ return fapi2::current_err;
+
+// If we're cronus, let's bomb out
+#else
+
+ if (i_fails.size() != 0)
+ {
+ // We can't log errors in cronus, so let's take the first one and end the IPL
+ FAPI_ERR("Failed p9_mss_draminit_training");
+ return i_fails[0];
+ }
+
+#endif
+ // Need this for compiler/ if i_fails is empty
+ return fapi2::FAPI2_RC_SUCCESS;
+}
+
+
+///
/// @brief Sets up the IO impedances (ADR DRV's and DP DRV's/RCV's) - MCA specialization
/// @tparam T the fapi2::TargetType
/// @param[in] i_target the target (MCA/MCBIST or MBA?)
diff --git a/src/import/chips/p9/procedures/hwp/memory/lib/phy/ddr_phy.H b/src/import/chips/p9/procedures/hwp/memory/lib/phy/ddr_phy.H
index 1c27efccc..39db82147 100644
--- a/src/import/chips/p9/procedures/hwp/memory/lib/phy/ddr_phy.H
+++ b/src/import/chips/p9/procedures/hwp/memory/lib/phy/ddr_phy.H
@@ -426,6 +426,30 @@ fapi2::ReturnCode rank_pair_primary_to_dimm(const fapi2::Target<T>& i_target, co
fapi2::Target<fapi2::TARGET_TYPE_DIMM>& o_dimm);
///
+/// @brief Handle draminit_training cal fails
+/// @param[in] i_fails vector holding the return codes for calibration failures
+/// @note We handle errors differently depending on if we're HB or cronus
+/// If we're cronus, we want to error out.
+/// If we're hostboot, we want to log the error as hidden and let PRD choose to deconfigure
+///
+fapi2::ReturnCode draminit_training_error_handler ( const std::vector<fapi2::ReturnCode>& i_fails);
+
+///
+/// @brief Finds the calibration errors from draminit training
+/// @tparam T fapi2::TargetType of the port target
+/// @param[in] i_target the port target
+/// @param[in] i_rp the rank pair we are calibrating
+/// @param[in] i_cal_abort_on_error denoting if we aborted on first fail
+/// @param[in,out] io_fails a vector storing all of our cal fails
+/// @return FAPI2_RC_SUCCESS iff all of the scoms and functionality were good
+///
+template<fapi2::TargetType T>
+fapi2::ReturnCode find_and_log_cal_errors(const fapi2::Target<T>& i_target,
+ const uint64_t i_rp,
+ const uint64_t i_cal_abort_on_error,
+ std::vector<fapi2::ReturnCode>& io_fails);
+
+///
/// @brief Sets up the IO impedances (ADR DRV's and DP DRV's/RCV's)
/// @tparam T the fapi2::TargetType
/// @param[in] i_target the target (MCA/MCBIST or MBA?)
diff --git a/src/import/chips/p9/procedures/hwp/memory/lib/phy/dp16.C b/src/import/chips/p9/procedures/hwp/memory/lib/phy/dp16.C
index d9a7fb8e4..547fd3d54 100644
--- a/src/import/chips/p9/procedures/hwp/memory/lib/phy/dp16.C
+++ b/src/import/chips/p9/procedures/hwp/memory/lib/phy/dp16.C
@@ -2662,12 +2662,12 @@ fapi_try_exit:
fapi2::ReturnCode record_bad_bits( const fapi2::Target<fapi2::TARGET_TYPE_MCA>& i_target )
{
const auto& l_mcs = mss::find_target<TARGET_TYPE_MCS>(i_target);
- uint8_t l_value[PORTS_PER_MCS][MAX_DIMM_PER_PORT][MAX_RANK_PER_DIMM][10] = { 0 };
+ uint8_t l_value[PORTS_PER_MCS][MAX_DIMM_PER_PORT][MAX_RANK_PER_DIMM][BAD_DQ_BYTE_COUNT] = { 0 };
// Process the bad bits into an array. We copy these in to their own array
// as it allows the compiler to check indexes where a passed pointer wouldn't
// otherwise do.
- uint8_t l_data[MAX_DIMM_PER_PORT][MAX_RANK_PER_DIMM][10] = { 0 };
+ uint8_t l_data[MAX_DIMM_PER_PORT][MAX_RANK_PER_DIMM][BAD_DQ_BYTE_COUNT] = { 0 };
FAPI_TRY( mss::dp16::record_bad_bits_helper(i_target, l_data) );
// Read the attribute
@@ -2732,7 +2732,7 @@ fapi2::ReturnCode record_bad_bits_helper( const fapi2::Target<fapi2::TARGET_TYPE
l_bad_bits[l_byte_index] = (v.first & 0xFF00) >> 8;
l_bad_bits[l_byte_index + 1] = v.first & 0x00FF;
- FAPI_DBG("writing %s value 0x%0lX to 0x%X, 0x%X from 0x%016lx",
+ FAPI_DBG("%s Recording ATTR_BAD_DQ_BITMAP value 0x%0lX to 0x%X, 0x%X from 0x%016lx",
mss::c_str(i_target),
v.first,
l_bad_bits[l_byte_index],
diff --git a/src/import/chips/p9/procedures/hwp/memory/lib/shared/mss_const.H b/src/import/chips/p9/procedures/hwp/memory/lib/shared/mss_const.H
index 2214abb40..82fe0bc61 100644
--- a/src/import/chips/p9/procedures/hwp/memory/lib/shared/mss_const.H
+++ b/src/import/chips/p9/procedures/hwp/memory/lib/shared/mss_const.H
@@ -145,6 +145,9 @@ enum ffdc_function_codes
// MSS_INVALID_INDEX_PASSED
SYMBOL_COUNT_READ = 50,
SYMBOL_COUNT_WRITE = 51,
+
+ // Used in rank.H
+ MAP_RP_PRIMARY_TO_INIT_CAL = 60,
};
enum states
diff --git a/src/import/chips/p9/procedures/hwp/memory/p9_mss_draminit_training.C b/src/import/chips/p9/procedures/hwp/memory/p9_mss_draminit_training.C
index 94d63b82b..b440ebd9a 100644
--- a/src/import/chips/p9/procedures/hwp/memory/p9_mss_draminit_training.C
+++ b/src/import/chips/p9/procedures/hwp/memory/p9_mss_draminit_training.C
@@ -63,6 +63,7 @@ extern "C"
{
// Keep track of the last error seen by a port
fapi2::ReturnCode l_port_error ( fapi2::FAPI2_RC_SUCCESS );
+
fapi2::buffer<uint32_t> l_cal_steps_enabled( i_special_training );
std::vector<fapi2::ReturnCode> l_fails;
@@ -78,8 +79,15 @@ extern "C"
}
uint8_t l_reset_disable = 0;
+ uint8_t l_cal_abort_on_error = i_abort_on_error;
FAPI_TRY( mss::mrw_reset_delay_before_cal(l_reset_disable), "%s Error in p9_mss_draminit_training",
mss::c_str(i_target) );
+ // Flag to abort on error
+
+ if (i_abort_on_error == CAL_ABORT_SENTINAL)
+ {
+ FAPI_TRY( mss::cal_abort_on_error(l_cal_abort_on_error) );
+ }
// Configure the CCS engine.
{
@@ -104,9 +112,6 @@ extern "C"
for( const auto& p : mss::find_targets<TARGET_TYPE_MCA>(i_target))
{
- // Keep track of the last error seen by a rank pair
- fapi2::ReturnCode l_rank_pair_error(fapi2::FAPI2_RC_SUCCESS);
-
// Returned from set_rank_pairs, it tells us how many rank pairs
// we configured on this port.
std::vector<uint64_t> l_pairs;
@@ -156,95 +161,34 @@ extern "C"
FAPI_TRY( mss::dp16::reset_delay_values(p, l_pairs), "Error in p9_mss_draminit_training" );
}
- FAPI_DBG("generating calibration CCS instructions: %d rank-pairs", l_pairs.size());
+ FAPI_DBG("generating calibration CCS instructions: %d rank-pairs %s", l_pairs.size(), mss::c_str(p));
// Turn on refresh for training
- FAPI_TRY( mss::workarounds::dqs_align::turn_on_refresh(p), "Error in p9_mss_draminit_training" );
+ FAPI_TRY( mss::workarounds::dqs_align::turn_on_refresh(p), "Error in p9_mss_draminit_training %s", mss::c_str(p) );
// For each rank pair we need to calibrate, pop a ccs instruction in an array and execute it.
// NOTE: IF YOU CALIBRATE MORE THAN ONE RANK PAIR PER CCS PROGRAM, MAKE SURE TO CHANGE
// THE PROCESSING OF THE ERRORS. (it's hard to figure out which DIMM failed, too) BRS.
for (const auto& rp : l_pairs)
{
- uint8_t l_cal_abort_on_error = i_abort_on_error;
-
- if (i_abort_on_error == CAL_ABORT_SENTINAL)
- {
- FAPI_TRY( mss::cal_abort_on_error(l_cal_abort_on_error), "Error in p9_mss_draminit_training" );
- }
-
- // Execute selected cal steps
- FAPI_TRY( mss::setup_and_execute_cal(p, rp, l_cal_steps_enabled, l_cal_abort_on_error),
- "Error in p9_mss_draminit_training" );
-
- fapi2::ReturnCode l_rc (fapi2::current_err);
-
- // If we're aborting on error we can just jump to the end.
- // If we're not, we don't want to exit if there's
- // an error but we want to log the error and keep on keeping on.
- if ((l_rc = mss::process_initial_cal_errors(p)) != fapi2::FAPI2_RC_SUCCESS)
- {
- if (l_cal_abort_on_error)
- {
- FAPI_TRY( l_rc );
- }
-
- l_fails.push_back(l_rc);
+ FAPI_INF("Execute cal on rp %d %s", rp, mss::c_str(p));
- // Keep tack of the last cal error we saw.
- l_rank_pair_error = l_rc;
- }
+ FAPI_TRY( mss::setup_and_execute_cal(p, rp, l_cal_steps_enabled, l_cal_abort_on_error) );
+ FAPI_TRY( mss::find_and_log_cal_errors(p, rp, l_cal_abort_on_error, l_fails) );
}// rank pairs
{
- fapi2::ReturnCode l_rc (fapi2::FAPI2_RC_SUCCESS);
// Conducts workarounds after training if needed
- l_rc = mss::workarounds::dp16::post_training_workarounds( p, l_cal_steps_enabled );
-
- if ( l_rc != fapi2::FAPI2_RC_SUCCESS)
- {
- l_fails.push_back(l_rc);
- }
-
- // Going to treat bad_bits errors as similar to training errors
- // If we're in hostboot, we update the attribute and keep running
- // If we're cronus, we'll error out
- l_rc = mss::dp16::record_bad_bits(p);
-
- if ( l_rc != fapi2::FAPI2_RC_SUCCESS)
- {
- l_fails.push_back(l_rc);
- }
- }
-
- // Resetting current_err.
- // The error has either already been "logged" or we have exited and returned the error up the call stack.
- fapi2::current_err = fapi2::FAPI2_RC_SUCCESS;
- }
+ // if we get fails here,it's due to scom errors
+ FAPI_TRY( mss::workarounds::dp16::post_training_workarounds( p, l_cal_steps_enabled ));
-// So we want to record the errors as informational and not mess with current_err
-#ifdef __HOSTBOOT_MODULE
-
- for (auto l_iter = l_fails.begin(); l_iter != l_fails.end(); ++l_iter)
- {
- // fapi2 doesn't have INFO flag, so the RECOVERED flag will do
- // Same behavior (no printouts to the custonmer and no deconfigures/ fail outs)
- // We want to have these fail logs for the future, but we'll let memdiags catch the errors
- fapi2::logError(*l_iter, fapi2::FAPI2_ERRL_SEV_RECOVERED);
- }
-
-// If we're in cronus, we're just going to bomb out. Error logging doesn't work as of 6/17 JLH
-// The errors should be printed out as FAPI_ERR's when the ReturnCode was made though
-#else
- {
- if (l_fails.size() != 0)
- {
- FAPI_TRY(l_fails[0]);
}
}
-#endif
+ // Let's handle the cal fails on the MCBIST
+ // We do it here in order to train every port
+ FAPI_TRY( mss::draminit_training_error_handler(l_fails) );
// Unmask FIR
FAPI_TRY( mss::unmask::after_draminit_training(i_target), "Error in p9_mss_draminit" );
diff --git a/src/import/chips/p9/procedures/xml/error_info/p9_memory_mss_draminit_training.xml b/src/import/chips/p9/procedures/xml/error_info/p9_memory_mss_draminit_training.xml
index 7bb746450..811dffb3f 100644
--- a/src/import/chips/p9/procedures/xml/error_info/p9_memory_mss_draminit_training.xml
+++ b/src/import/chips/p9/procedures/xml/error_info/p9_memory_mss_draminit_training.xml
@@ -571,7 +571,8 @@
<hwpError>
<rc>RC_MSS_FAILED_RDVREF_CAL</rc>
<description>
- A DP16 has failed read vREF calibration
+ A DP16 has failed read vREF calibration.
+ If this fails, RDCNTR cal will also catch the fail
</description>
<ffdc>REGISTER</ffdc>
<ffdc>VALUE</ffdc>
@@ -589,12 +590,6 @@
<target>MCA_TARGET</target>
<priority>HIGH</priority>
</callout>
- <deconfigure>
- <target>MCA_TARGET</target>
- </deconfigure>
- <gard>
- <target>MCA_TARGET</target>
- </gard>
</hwpError>
<hwpError>
OpenPOWER on IntegriCloud