summaryrefslogtreecommitdiffstats
path: root/src/import/chips
diff options
context:
space:
mode:
authorStephen Glancy <sglancy@us.ibm.com>2017-09-21 10:14:04 -0500
committerDaniel M. Crowell <dcrowell@us.ibm.com>2017-10-02 23:45:51 -0400
commit3890040afa1dc93d58476d68df35cb44d49c57b2 (patch)
tree1c1538e4b8bb40a8a9d65bc8e900a84bb2caa586 /src/import/chips
parentf21a18e501c28d932ee24f11a7a3ffaa93228735 (diff)
downloadtalos-hostboot-3890040afa1dc93d58476d68df35cb44d49c57b2.tar.gz
talos-hostboot-3890040afa1dc93d58476d68df35cb44d49c57b2.zip
Updates error paths for PRD FIR checking
FIR's could cause errors within hardware procedures. PRD has the capability to retrigger a procedure if it sees an error. We might be able to avoid IPL issues with this, so if a FIR has been hit during hardware enabled code (CCS or calibration), then log the error and let PRD find the "new" FIR that could have caused the hardware engine to have an issue. If there is some other problem, the retriggered HWP will find it. Change-Id: I81599d1d0c4b4c256b79820b4a7e2eafc09e206b Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/46571 Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com> Reviewed-by: JACOB L. HARVEY <jlharvey@us.ibm.com> Reviewed-by: Louis Stermole <stermole@us.ibm.com> Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com> Tested-by: HWSV CI <hwsv-ci+hostboot@us.ibm.com> Reviewed-by: ANDRE A. MARIN <aamarin@us.ibm.com> Tested-by: Hostboot CI <hostboot-ci+hostboot@us.ibm.com> Reviewed-by: Jennifer A. Stofer <stofer@us.ibm.com> Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/46584 Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com> Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com> Reviewed-by: Daniel M. Crowell <dcrowell@us.ibm.com>
Diffstat (limited to 'src/import/chips')
-rw-r--r--src/import/chips/p9/procedures/hwp/memory/lib/ccs/ccs.C7
-rw-r--r--src/import/chips/p9/procedures/hwp/memory/lib/dimm/ddr4/mrs_load_ddr4.C2
-rw-r--r--src/import/chips/p9/procedures/hwp/memory/lib/dimm/rank.C16
-rw-r--r--src/import/chips/p9/procedures/hwp/memory/lib/dimm/rank.H4
-rw-r--r--src/import/chips/p9/procedures/hwp/memory/lib/fir/check.C202
-rw-r--r--src/import/chips/p9/procedures/hwp/memory/lib/fir/check.H147
-rw-r--r--src/import/chips/p9/procedures/hwp/memory/lib/mc/port.H2
-rw-r--r--src/import/chips/p9/procedures/hwp/memory/lib/mc/xlate.C4
-rw-r--r--src/import/chips/p9/procedures/hwp/memory/lib/phy/ddr_phy.C11
-rw-r--r--src/import/chips/p9/procedures/hwp/memory/lib/phy/dp16.C42
-rw-r--r--src/import/chips/p9/procedures/hwp/memory/lib/shared/mss_const.H1
-rw-r--r--src/import/chips/p9/procedures/hwp/memory/lib/workarounds/dp16_workarounds.C18
-rw-r--r--src/import/chips/p9/procedures/hwp/memory/p9_mss_draminit_training.C2
13 files changed, 432 insertions, 26 deletions
diff --git a/src/import/chips/p9/procedures/hwp/memory/lib/ccs/ccs.C b/src/import/chips/p9/procedures/hwp/memory/lib/ccs/ccs.C
index b82951221..669048513 100644
--- a/src/import/chips/p9/procedures/hwp/memory/lib/ccs/ccs.C
+++ b/src/import/chips/p9/procedures/hwp/memory/lib/ccs/ccs.C
@@ -37,6 +37,7 @@
#include <mss.H>
#include <lib/ccs/ccs.H>
+#include <lib/fir/check.H>
using fapi2::TARGET_TYPE_MCBIST;
using fapi2::TARGET_TYPE_MCA;
@@ -83,6 +84,7 @@ fapi2::ReturnCode fail_type( const fapi2::Target<TARGET_TYPE_MCBIST>& i_target,
const uint64_t& i_type,
const fapi2::Target<TARGET_TYPE_MCA>& i_mca )
{
+ fapi2::ReturnCode l_failing_rc(fapi2::FAPI2_RC_SUCCESS);
// Including the MCA_TARGET here and below at CAL_TIMEOUT since these problems likely lie at the MCA level
// So we disable the PORT and hopefully that's it
// If the problem lies with the MCBIST, it'll just have to loop
@@ -112,7 +114,10 @@ fapi2::ReturnCode fail_type( const fapi2::Target<TARGET_TYPE_MCBIST>& i_target,
fapi2::MSS_CCS_HUNG().set_MCBIST_TARGET(i_target),
"%s CCS appears hung", mss::c_str(i_target));
fapi_try_exit:
- return fapi2::current_err;
+ // Due to the PRD update, we need to check for FIR's
+ // If any FIR's have lit up, this CCS fail could have been caused by the FIR
+ // So, let PRD retrigger this step to see if we can resolve the issue
+ return mss::check::fir_or_pll_fail(i_target, fapi2::current_err);
}
///
diff --git a/src/import/chips/p9/procedures/hwp/memory/lib/dimm/ddr4/mrs_load_ddr4.C b/src/import/chips/p9/procedures/hwp/memory/lib/dimm/ddr4/mrs_load_ddr4.C
index 43694ac2d..cec455f6a 100644
--- a/src/import/chips/p9/procedures/hwp/memory/lib/dimm/ddr4/mrs_load_ddr4.C
+++ b/src/import/chips/p9/procedures/hwp/memory/lib/dimm/ddr4/mrs_load_ddr4.C
@@ -64,7 +64,7 @@ fapi2::ReturnCode mrs_engine( const fapi2::Target<fapi2::TARGET_TYPE_DIMM>& i_ta
const uint64_t i_rank,
std::vector< ccs::instruction_t<fapi2::TARGET_TYPE_MCBIST> >& io_inst )
{
- FAPI_TRY( mrs_engine(i_target, i_data, i_rank, i_data.iv_delay, io_inst) );
+ FAPI_TRY( mrs_engine(i_target, i_data, i_rank, i_data.iv_delay, io_inst) );
fapi_try_exit:
return fapi2::current_err;
diff --git a/src/import/chips/p9/procedures/hwp/memory/lib/dimm/rank.C b/src/import/chips/p9/procedures/hwp/memory/lib/dimm/rank.C
index 6404adf0b..f2edb7873 100644
--- a/src/import/chips/p9/procedures/hwp/memory/lib/dimm/rank.C
+++ b/src/import/chips/p9/procedures/hwp/memory/lib/dimm/rank.C
@@ -233,7 +233,6 @@ fapi_try_exit:
///
/// @brief Return a vector of rank numbers which represent the primary rank pairs for this port
-/// @tparam T the target type
/// @param[in] i_target TARGET_TYPE_MCA
/// @param[out] o_rps a vector of rank_pairs
/// @return FAPI2_RC_SUCCESS iff all is ok
@@ -251,7 +250,7 @@ fapi2::ReturnCode primary_ranks( const fapi2::Target<TARGET_TYPE_MCA>& i_target,
FAPI_TRY( mss::eff_num_master_ranks_per_dimm(d, l_rank_count[mss::index(d)]) );
}
- FAPI_DBG("ranks: %d, %d", l_rank_count[0], l_rank_count[1]);
+ FAPI_DBG("%s ranks: %d, %d", mss::c_str(i_target), l_rank_count[0], l_rank_count[1]);
// Walk through rank pair table and skip empty pairs
o_rps.clear();
@@ -264,13 +263,15 @@ fapi2::ReturnCode primary_ranks( const fapi2::Target<TARGET_TYPE_MCA>& i_target,
}
}
+ // Returning success in case no DIMM's are configured
+ return fapi2::FAPI2_RC_SUCCESS;
+
fapi_try_exit:
return fapi2::current_err;
}
///
/// @brief Return a vector of rank numbers which represent the primary rank pairs for this dimm
-/// @tparam T the target type
/// @param[in] i_target TARGET_TYPE_DIMM
/// @param[out] o_rps a vector of rank_pairs
/// @return FAPI2_RC_SUCCESS iff all is ok
@@ -344,7 +345,6 @@ fapi_try_exit:
///
/// @brief Given a target, get the rank pair assignments, based on DIMMs
-/// @tparam T the fapi2::TargetType
/// @param[in] i_target the target (MCA or MBA?)
/// @param[out] o_registers the regiter settings for the appropriate rank pairs
/// @return FAPI2_RC_SUCCESS if and only if ok
@@ -382,8 +382,7 @@ fapi_try_exit:
///
/// @brief Setup the rank information in the port
-/// @tparam T the fapi2::TargetType
-/// @param[in] i_target the target (MCA or MBA?)
+/// @param[in] i_target the target (MCA)
/// @return FAPI2_RC_SUCCESS if and only if ok
///
template<>
@@ -485,7 +484,6 @@ fapi_try_exit:
///
/// @brief Get a vector of configured rank pairs.
/// Returns a vector of ordinal values of the configured rank pairs. e.g., for a 2R DIMM, {0, 1}
-/// @tparam T the fapi2::TargetType
/// @param[in]i_target the target (MCA or MBA?)
/// @param[out] o_pairs std::vector of rank pairs configured
/// @return FAPI2_RC_SUCCESS if and only if ok
@@ -565,7 +563,6 @@ fapi_try_exit:
///
/// @brief Get a rank-pair id from a physical rank
/// Returns a number representing which rank-pair this rank is a part of
-/// @tparam T the fapi2::TargetType
/// @param[in] i_target the target (MCA or MBA?)
/// @param[in] i_rank the physical rank number
/// @param[out] o_pairs the rank pair
@@ -573,7 +570,8 @@ fapi_try_exit:
///
template<>
fapi2::ReturnCode get_pair_from_rank(const fapi2::Target<TARGET_TYPE_MCA>& i_target,
- uint64_t i_rank, uint64_t& o_pair)
+ uint64_t i_rank,
+ uint64_t& o_pair)
{
// Sort of brute-force, but no real good other way to do it. Given the
// rank-pair configuration we walk the config looking for our rank, and
diff --git a/src/import/chips/p9/procedures/hwp/memory/lib/dimm/rank.H b/src/import/chips/p9/procedures/hwp/memory/lib/dimm/rank.H
index 34310cc56..e5b3b9041 100644
--- a/src/import/chips/p9/procedures/hwp/memory/lib/dimm/rank.H
+++ b/src/import/chips/p9/procedures/hwp/memory/lib/dimm/rank.H
@@ -1060,7 +1060,7 @@ inline fapi2::ReturnCode set_pair_valid( const fapi2::Target<T>& i_target,
fapi2::MSS_INVALID_RANK()
.set_RANK(i_rank)
.set_MCA_TARGET(i_target)
- .set_FUNCTION(GET_RANKS_IN_PAIR),
+ .set_FUNCTION(SET_PAIR_VALID),
"%s Invalid rank (%d) in get_ranks_in_pair",
mss::c_str(i_target),
i_rank);
@@ -1231,7 +1231,7 @@ fapi2::ReturnCode get_ranks_in_pair( const fapi2::Target<T>& i_target,
// Get data
for (uint64_t l_ordinal = 0; l_ordinal < TT::NUM_RANKS_IN_PAIR; ++l_ordinal)
{
- // Check to make sure rank is vlaid
+ // Check to make sure rank is valid
FAPI_ASSERT( l_ordinal < MAX_RANK_PER_DIMM,
fapi2::MSS_INVALID_RANK()
.set_RANK(l_ordinal)
diff --git a/src/import/chips/p9/procedures/hwp/memory/lib/fir/check.C b/src/import/chips/p9/procedures/hwp/memory/lib/fir/check.C
index 58f1f0d94..7a329aaed 100644
--- a/src/import/chips/p9/procedures/hwp/memory/lib/fir/check.C
+++ b/src/import/chips/p9/procedures/hwp/memory/lib/fir/check.C
@@ -36,6 +36,8 @@
#include <fapi2.H>
#include <p9_mc_scom_addresses.H>
#include <p9_mc_scom_addresses_fld.H>
+#include <p9_perv_scom_addresses.H>
+#include <p9_perv_scom_addresses_fld.H>
#include <generic/memory/lib/utils/scom.H>
#include <lib/fir/fir.H>
@@ -205,6 +207,9 @@ fapi2::ReturnCode during_draminit_training( const fapi2::Target<fapi2::TARGET_TY
fapi2::buffer<uint64_t> l_phyfir_data;
fapi2::buffer<uint64_t> l_phyfir_masked;
+ // If we have a FIR that is lit up, we want to see if it could have been caused by a more drastic FIR
+ bool l_check_fir = false;
+
FAPI_TRY( mss::getScom(l_mca, MCA_IOM_PHY0_DDRPHY_FIR_REG, l_phyfir_data) );
l_phyfir_masked = l_phyfir_data & l_phyfir_mask;
@@ -213,6 +218,8 @@ fapi2::ReturnCode during_draminit_training( const fapi2::Target<fapi2::TARGET_TY
// We'll have the error log to know what fir bit triggered and when, so we should be fine clearing here
FAPI_TRY( mss::putScom(l_mca, MCA_IOM_PHY0_DDRPHY_FIR_REG_AND, l_phyfir_mask.invert()) );
+ // Check the FIR here
+ l_check_fir = true;
FAPI_ASSERT( l_phyfir_masked == 0,
fapi2::MSS_DRAMINIT_TRAINING_PORT_FIR()
.set_PHY_FIR(l_phyfir_masked)
@@ -222,8 +229,203 @@ fapi2::ReturnCode during_draminit_training( const fapi2::Target<fapi2::TARGET_TY
mss::c_str(i_target), l_phyfir_masked);
fapi_try_exit:
+
+ // Handle any fails seen above accordingly
+ return mss::check::fir_or_pll_fail( i_target, fapi2::current_err, l_check_fir);
+}
+
+// Declares FIR registers that are re-used between multiple functions
+// Vectors of FIR and mask registers to read through
+// As check_fir can be called in multiple places, we don't know what the mask may hold
+// In order to tell if a FIR is legit or not, we read the FIR and check it against the mask reg
+// Note: using a vector here in case we need to expand
+static const std::vector<std::pair<uint64_t, uint64_t>> MCBIST_FIR_REGS =
+{
+ // MCBIST FIR
+ {MCBIST_MCBISTFIRQ, MCBIST_MCBISTFIRMASK},
+};
+
+static const std::vector<std::pair<uint64_t, uint64_t>> MCA_FIR_REGS =
+{
+ // MCA ECC FIR
+ {MCA_FIR, MCA_MASK},
+ // MCA CAL FIR
+ {MCA_MBACALFIRQ, MCA_MBACALFIR_MASK},
+ // DDRPHY FIR
+ {MCA_IOM_PHY0_DDRPHY_FIR_REG, MCA_IOM_PHY0_DDRPHY_FIR_MASK_REG},
+};
+
+///
+/// @brief Checks whether any of the PLL unlock values are set
+/// @param[in] i_local_fir - the overall FIR register
+/// @param[in] i_perv_fir - the pervasive PLL FIR
+/// @param[in] i_mc_fir - the memory controller FIR
+/// @return fapi2::ReturnCode FAPI2_RC_SUCCESS iff ok
+///
+bool pll_unlock( const fapi2::buffer<uint64_t>& i_local_fir,
+ const fapi2::buffer<uint64_t>& i_perv_fir,
+ const fapi2::buffer<uint64_t>& i_mc_fir )
+{
+ // Note: the following registers did not have the scom fields defined, so we're constexpr'ing them here
+ constexpr uint64_t PERV_TP_ERROR_START = 25;
+ constexpr uint64_t PERV_TP_ERROR_LEN = 4;
+ constexpr uint64_t PERV_MC_ERROR_START = 25;
+
+ // No overall FIR (bit 21) was set, so just exit
+ if(!i_local_fir.getBit<PERV_1_LOCAL_FIR_IN21>())
+ {
+ FAPI_INF("Did not have the PERV_LOCAL_FIR bit set. No PLL error, exiting");
+ return false;
+ }
+
+ // Now, identify whether a PLL unlock caused the FIR bit to fail
+ FAPI_INF("PERV_TP_ERROR_REG %s PERV_MC01_ERROR_REG %s",
+ i_perv_fir.getBit<PERV_TP_ERROR_START, PERV_TP_ERROR_LEN>() ? "PLL lock fail" : "PLL ok",
+ i_mc_fir.getBit<PERV_MC_ERROR_START>() ? "PLL lock fail" : "PLL ok");
+
+ // We have a PLL unlock if the MC PLL unlock FIR bit is on or any of the TP PLL unlock bits are on
+ return (i_mc_fir.getBit<PERV_MC_ERROR_START>()) || (i_perv_fir.getBit<PERV_TP_ERROR_START, PERV_TP_ERROR_LEN>());
+}
+
+///
+/// @brief Checks whether any PLL FIRs have been set on a target
+/// @param[in] i_target - the target on which to operate
+/// @param[out] o_fir_error - true iff a FIR was hit
+/// @return fapi2::ReturnCode FAPI2_RC_SUCCESS iff ok
+///
+fapi2::ReturnCode pll_fir( const fapi2::Target<fapi2::TARGET_TYPE_MCBIST>& i_target, bool& o_fir_error )
+{
+ // Sets o_fir_error to false to begin with, just in case we have scom issues
+ o_fir_error = false;
+
+ // Gets the processor target
+ const auto& l_proc = mss::find_target<fapi2::TARGET_TYPE_PROC_CHIP>(i_target);
+
+ // Gets the register data
+ fapi2::buffer<uint64_t> l_local_fir;
+ fapi2::buffer<uint64_t> l_perv_fir;
+ fapi2::buffer<uint64_t> l_mc_fir;
+
+ FAPI_TRY(mss::getScom(l_proc, PERV_TP_LOCAL_FIR, l_local_fir), "%s failed to get 0x%016llx", mss::c_str(i_target),
+ PERV_TP_LOCAL_FIR);
+ FAPI_TRY(mss::getScom(l_proc, PERV_TP_ERROR_REG, l_perv_fir), "%s failed to get 0x%016llx", mss::c_str(i_target),
+ PERV_TP_ERROR_REG);
+ FAPI_TRY(mss::getScom(i_target, PERV_MC01_ERROR_REG, l_mc_fir), "%s failed to get 0x%016llx", mss::c_str(i_target),
+ PERV_MC01_ERROR_REG);
+
+ // Checks the data
+ o_fir_error = pll_unlock(l_local_fir, l_perv_fir, l_mc_fir);
+
+fapi_try_exit:
return fapi2::current_err;
}
+///
+/// @brief Checks whether any FIR have lit up
+/// @param[in] i_target - the target on which to operate - MCBIST specialization
+/// @param[out] o_fir_error - true iff a FIR was hit
+/// @return fapi2::ReturnCode FAPI2_RC_SUCCESS iff ok
+///
+template< >
+fapi2::ReturnCode bad_fir_bits( const fapi2::Target<fapi2::TARGET_TYPE_MCBIST>& i_target, bool& o_fir_error )
+{
+ // Start by assuming we do not have a FIR
+ o_fir_error = false;
+
+ // Loop, check the scoms, and check the FIR
+ // Note: we return out if any FIR is bad
+ for(const auto& l_fir_reg : MCBIST_FIR_REGS)
+ {
+ FAPI_TRY(fir_with_mask(i_target, l_fir_reg, o_fir_error));
+
+ // Exit if we found a FIR
+ if(o_fir_error)
+ {
+ return fapi2::FAPI2_RC_SUCCESS;
+ }
+ }
+
+ // Loop through all MCA's and all MCA FIR's
+ for(const auto& l_mca : mss::find_targets<fapi2::TARGET_TYPE_MCA>(i_target))
+ {
+ for(const auto& l_fir_reg : MCA_FIR_REGS)
+ {
+ FAPI_TRY(fir_with_mask(l_mca, l_fir_reg, o_fir_error));
+
+ // Exit if we found a FIR
+ if(o_fir_error)
+ {
+ return fapi2::FAPI2_RC_SUCCESS;
+ }
+ }
+ }
+
+ // Lastly, check for PLL unlocks
+ FAPI_TRY(pll_fir(i_target, o_fir_error));
+
+fapi_try_exit:
+ return fapi2::current_err;
+}
+
+
+///
+/// @brief Checks whether any FIR have lit up
+/// @param[in] i_target - the target on which to operate - MCA specialization
+/// @param[out] o_fir_error - true iff a FIR was hit
+/// @return fapi2::ReturnCode FAPI2_RC_SUCCESS iff ok
+///
+template< >
+fapi2::ReturnCode bad_fir_bits( const fapi2::Target<fapi2::TARGET_TYPE_MCA>& i_target, bool& o_fir_error )
+{
+ const auto& l_mcbist = mss::find_target<fapi2::TARGET_TYPE_MCBIST>(i_target);
+ // Start by assuming we do not have a FIR
+ o_fir_error = false;
+
+ // Loop, check the scoms, and check the FIR
+ // Note: we return out if any FIR is bad
+ for(const auto& l_fir_reg : MCBIST_FIR_REGS)
+ {
+ FAPI_TRY(fir_with_mask(l_mcbist, l_fir_reg, o_fir_error));
+
+ // Exit if we found a FIR
+ if(o_fir_error)
+ {
+ return fapi2::FAPI2_RC_SUCCESS;
+ }
+ }
+
+ // Loop through all MCA FIR's
+ for(const auto& l_fir_reg : MCA_FIR_REGS)
+ {
+ FAPI_TRY(fir_with_mask(i_target, l_fir_reg, o_fir_error));
+
+ // Exit if we found a FIR
+ if(o_fir_error)
+ {
+ return fapi2::FAPI2_RC_SUCCESS;
+ }
+ }
+
+ // Lastly, check for PLL unlocks
+ FAPI_TRY(pll_fir(l_mcbist, o_fir_error));
+
+fapi_try_exit:
+ return fapi2::current_err;
+}
+
+
+///
+/// @brief Checks whether any FIR have lit up
+/// @param[in] i_target - the target on which to operate - DIMM specialization
+/// @param[out] o_fir_error - true iff a FIR was hit
+/// @return fapi2::ReturnCode FAPI2_RC_SUCCESS iff ok
+///
+template< >
+fapi2::ReturnCode bad_fir_bits( const fapi2::Target<fapi2::TARGET_TYPE_DIMM>& i_target, bool& o_fir_error )
+{
+ const auto l_mca = mss::find_target<fapi2::TARGET_TYPE_MCA>(i_target);
+ return bad_fir_bits(l_mca, o_fir_error);
+}
+
}
}
diff --git a/src/import/chips/p9/procedures/hwp/memory/lib/fir/check.H b/src/import/chips/p9/procedures/hwp/memory/lib/fir/check.H
index ded638e49..fc82aaed1 100644
--- a/src/import/chips/p9/procedures/hwp/memory/lib/fir/check.H
+++ b/src/import/chips/p9/procedures/hwp/memory/lib/fir/check.H
@@ -27,7 +27,7 @@
/// @file check.H
/// @brief Subroutines for checking MSS FIR
///
-// *HWP HWP Owner: Brian Silver <bsilver@us.ibm.com>
+// *HWP HWP Owner: Andre Marin <aamarin@us.ibm.com>
// *HWP HWP Backup: Marc Gollub <gollub@us.ibm.com>
// *HWP Team: Memory
// *HWP Level: 2
@@ -37,6 +37,7 @@
#define _MSS_CHECK_FIR_H_
#include <fapi2.H>
+#include <generic/memory/lib/utils/scom.H>
namespace mss
{
@@ -58,6 +59,7 @@ fapi2::ReturnCode during_phy_reset( const fapi2::Target<T>& i_target );
///
/// @brief Check FIR bits during draminit training
+/// @tparam T the fapi2::TargetType which hold the FIR bits
/// @param[in] i_target the dimm that was trained
/// @note We check for fir errors after training each rank
/// to see if there was a problem with the engine
@@ -69,6 +71,149 @@ fapi2::ReturnCode during_phy_reset( const fapi2::Target<T>& i_target );
template< fapi2::TargetType T >
fapi2::ReturnCode during_draminit_training( const fapi2::Target<T>& i_target );
+///
+/// @brief Checks whether any of the PLL unlock values are set
+/// @param[in] i_local_fir - the overall FIR register
+/// @param[in] i_perv_fir - the pervasive PLL FIR
+/// @param[in] i_mc_fir - the memory controller FIR
+/// @return fapi2::ReturnCode FAPI2_RC_SUCCESS iff ok
+///
+bool pll_unlock( const fapi2::buffer<uint64_t>& i_local_fir,
+ const fapi2::buffer<uint64_t>& i_perv_fir,
+ const fapi2::buffer<uint64_t>& i_mc_fir );
+
+///
+/// @brief Checks whether any PLL FIRs have been set on a target
+/// @param[in] i_target - the target on which to operate
+/// @param[out] o_fir_error - true iff a FIR was hit
+/// @return fapi2::ReturnCode FAPI2_RC_SUCCESS iff ok
+///
+fapi2::ReturnCode pll_fir( const fapi2::Target<fapi2::TARGET_TYPE_MCBIST>& i_target, bool& o_fir_error );
+
+///
+/// @brief Checks whether any FIRs have lit up on a target
+/// @tparam T the fapi2::TargetType which hold the FIR bits
+/// @param[in] i_target - the target on which to operate
+/// @param[out] o_fir_error - true iff a FIR was hit
+/// @return fapi2::ReturnCode FAPI2_RC_SUCCESS iff ok
+///
+template< fapi2::TargetType T >
+fapi2::ReturnCode bad_fir_bits( const fapi2::Target<T>& i_target, bool& o_fir_error );
+
+///
+/// @brief Checks whether the passed in FIRs have any un-masked errors set
+/// @tparam T the fapi2::TargetType which hold the FIR bits
+/// @param[in] i_target - the target on which to operate
+/// @param[in] i_fir_regs - FIR register and mask register
+/// @param[out] o_fir_error - true iff a FIR was hit
+/// @return fapi2::ReturnCode FAPI2_RC_SUCCESS iff ok
+///
+template< fapi2::TargetType T >
+inline fapi2::ReturnCode fir_with_mask( const fapi2::Target<T>& i_target,
+ const std::pair<uint64_t, uint64_t>& i_fir_regs,
+ bool& o_fir_error )
+{
+ // Temporary variables to make the code a bit more readable
+ const auto FIR_REG = i_fir_regs.first;
+ const auto FIR_MASK = i_fir_regs.second;
+
+ fapi2::buffer<uint64_t> l_fir;
+ fapi2::buffer<uint64_t> l_fir_mask;
+
+ // Read the registers
+ FAPI_TRY(mss::getScom(i_target, FIR_REG, l_fir));
+ FAPI_TRY(mss::getScom(i_target, FIR_MASK, l_fir_mask));
+
+
+ // The mask register will need to be inverted as a 0 in the mask register means the FIR is legit
+ // A bitwise and works the opposite way
+ l_fir_mask.invert();
+
+ // If we have any unmasked bit, set that we have a FIR error and exit out with success
+ // Note: we want to set success here as PRD will find the FIR as "new" and retrigger the procedure this way
+ o_fir_error = ((l_fir & l_fir_mask) != 0);
+
+ // And print the information for debuggability
+ FAPI_INF("%s %s on reg 0x%016lx value 0x%016lx and mask 0x%016lx value 0x%016lx", mss::c_str(i_target),
+ o_fir_error ? "has FIR's set" : "has no FIR's set", FIR_REG, l_fir, FIR_MASK, l_fir_mask.invert());
+
+fapi_try_exit:
+ return fapi2::current_err;
+}
+
+///
+/// @brief Checks whether a FIR or unlocked PLL could be the root cause of another failure
+/// @tparam T the fapi2::TargetType which hold the FIR bits
+/// @param[in] i_target - the target on which to operate
+/// @param[in] i_rc - the return code for the function - cannot be const due to a HB compile issue
+/// @return fapi2::ReturnCode FAPI2_RC_SUCCESS iff ok
+/// @note This is a helper function to enable unit testing
+///
+template< fapi2::TargetType T >
+fapi2::ReturnCode hostboot_fir_or_pll_fail( const fapi2::Target<T>& i_target, fapi2::ReturnCode& i_rc)
+{
+ // We didn't have an error, so return success
+ if(i_rc == fapi2::FAPI2_RC_SUCCESS)
+ {
+ FAPI_INF("%s has a good return code, returning success", mss::c_str(i_target));
+ return fapi2::FAPI2_RC_SUCCESS;
+ }
+
+ fapi2::ReturnCode l_fircheck_scom_err(fapi2::FAPI2_RC_SUCCESS);
+ bool l_fir_error = false;
+
+ FAPI_ERR("%s has a bad return code, time to check some firs!", mss::c_str(i_target));
+
+ l_fircheck_scom_err = bad_fir_bits(i_target, l_fir_error);
+
+ FAPI_ERR("%s took a fail. FIR was %s", mss::c_str(i_target),
+ l_fir_error ? "set - returning FIR RC" : "unset - returning inputted RC");
+
+ // If we had a FIR error, log the original error and return success
+ // PRD will handle the original error
+ if(l_fir_error)
+ {
+ fapi2::log_related_error(i_target, i_rc, fapi2::FAPI2_ERRL_SEV_RECOVERED);
+ fapi2::current_err = fapi2::FAPI2_RC_SUCCESS;
+ }
+ else
+ {
+ fapi2::current_err = i_rc;
+ }
+
+ return fapi2::current_err;
+}
+
+///
+/// @brief Checks whether a FIR or unlocked PLL could be the root cause of another failure, if a check fir boolean is passed in
+/// @tparam T the fapi2::TargetType which hold the FIR bits
+/// @param[in] i_target - the target on which to operate
+/// @param[in] i_rc - the return code for the function - cannot be const due to a HB compile issue
+/// @param[in] i_check_fir - true IFF the FIR needs to be checked - defaults to true
+/// @return fapi2::ReturnCode FAPI2_RC_SUCCESS iff ok
+///
+template< fapi2::TargetType T >
+fapi2::ReturnCode fir_or_pll_fail( const fapi2::Target<T>& i_target, fapi2::ReturnCode& i_rc,
+ const bool i_check_fir = true)
+{
+#ifdef __HOSTBOOT_MODULE
+
+ fapi2::ReturnCode l_rc(i_rc);
+
+ // If need be, check the FIR below
+ if(i_check_fir)
+ {
+ // Handle any issues according to PRD FIR scheme, as a FIR could have caused this issue
+ l_rc = hostboot_fir_or_pll_fail(i_target, l_rc);
+ }
+
+ return l_rc;
+
+#else
+ return i_rc;
+#endif
+}
+
}
}
#endif
diff --git a/src/import/chips/p9/procedures/hwp/memory/lib/mc/port.H b/src/import/chips/p9/procedures/hwp/memory/lib/mc/port.H
index 876a83909..b6c2ece01 100644
--- a/src/import/chips/p9/procedures/hwp/memory/lib/mc/port.H
+++ b/src/import/chips/p9/procedures/hwp/memory/lib/mc/port.H
@@ -964,7 +964,7 @@ fapi2::ReturnCode reset_zqcal_config( const fapi2::Target<T>& i_target )
for (const auto r : l_ranks)
{
- l_phy_zqcal_config.setBit(TT::PER_ZCAL_ENA_RANK + rank::map_rank_ordinal_to_phy(i_target, r));
+ FAPI_TRY(l_phy_zqcal_config.setBit(TT::PER_ZCAL_ENA_RANK + rank::map_rank_ordinal_to_phy(i_target, r)));
}
// Write the ZQCAL periodic config
diff --git a/src/import/chips/p9/procedures/hwp/memory/lib/mc/xlate.C b/src/import/chips/p9/procedures/hwp/memory/lib/mc/xlate.C
index 17563fc83..bdee48e3c 100644
--- a/src/import/chips/p9/procedures/hwp/memory/lib/mc/xlate.C
+++ b/src/import/chips/p9/procedures/hwp/memory/lib/mc/xlate.C
@@ -936,7 +936,7 @@ fapi2::ReturnCode xlate_dimm_2R2T8Gbx4( const dimm::kind& i_kind,
// We're basically a 2R 4Gbx4 with an extra row. So lets setup like we're one of those,
// add row 16 and shift the D bit as needed.
- xlate_dimm_2R2T4Gbx4(i_kind, i_offset, i_largest, io_xlate0, io_xlate1, io_xlate2);
+ FAPI_TRY(xlate_dimm_2R2T4Gbx4(i_kind, i_offset, i_largest, io_xlate0, io_xlate1, io_xlate2));
// Tell the MC which of the row bits are valid, and map the DIMM selector
// We're a 17 row DIMM, so ROW16 is valid.
@@ -1941,7 +1941,7 @@ fapi2::ReturnCode setup_xlate_map_helper( std::vector<dimm::kind>& io_dimm_kinds
set_DIMM_TYPE(k.iv_dimm_type).
set_ROWS(k.iv_rows).
set_SIZE(k.iv_size),
- "no address translation funtion for DIMM %s %dMR (%d total ranks) %dGbx%d (%dGB) %d rows in slot %d",
+ "no address translation function for DIMM %s %dMR (%d total ranks) %dGbx%d (%dGB) %d rows in slot %d",
mss::c_str(k.iv_target),
k.iv_master_ranks,
k.iv_total_ranks,
diff --git a/src/import/chips/p9/procedures/hwp/memory/lib/phy/ddr_phy.C b/src/import/chips/p9/procedures/hwp/memory/lib/phy/ddr_phy.C
index 86a8621fa..e1e63fec5 100644
--- a/src/import/chips/p9/procedures/hwp/memory/lib/phy/ddr_phy.C
+++ b/src/import/chips/p9/procedures/hwp/memory/lib/phy/ddr_phy.C
@@ -521,6 +521,11 @@ fapi2::ReturnCode process_initial_cal_errors( const fapi2::Target<TARGET_TYPE_DI
uint64_t l_rank_pairs = 0;
uint8_t cal_abort_on_error = 0;
+ // This boolean tells the code whether we took a training fail or a scom fail reading the status registers
+ // It starts as false, given that we need to read out the registers
+ // When we start checking all of the values of the status registers, it gets set to true
+ bool l_check_firs = false;
+
const auto& l_mca = mss::find_target<fapi2::TARGET_TYPE_MCA>(i_target);
fapi2::buffer<uint64_t> l_err_data;
@@ -550,6 +555,9 @@ fapi2::ReturnCode process_initial_cal_errors( const fapi2::Target<TARGET_TYPE_DI
}
// Error information from other registers is gathered in the FFDC from the XML
+ // From here on out, check the FIRs
+ // Using this boolean to avoid having to check the FIR's after each assert below
+ l_check_firs = true;
// So we can do a few things here. If we're aborting on the first calibration error,
// we only expect to have one error bit set. If we ran all the calibrations, we can
@@ -692,7 +700,8 @@ fapi_try_exit:
(fapi2::current_err == fapi2::FAPI2_RC_SUCCESS ? "success" : "errors reported"),
mss::c_str(l_mca));
- return fapi2::current_err;
+ // Checks the FIR's, if need be
+ return mss::check::fir_or_pll_fail( i_target, fapi2::current_err, l_check_firs);
}
///
diff --git a/src/import/chips/p9/procedures/hwp/memory/lib/phy/dp16.C b/src/import/chips/p9/procedures/hwp/memory/lib/phy/dp16.C
index 0e346881a..129c37515 100644
--- a/src/import/chips/p9/procedures/hwp/memory/lib/phy/dp16.C
+++ b/src/import/chips/p9/procedures/hwp/memory/lib/phy/dp16.C
@@ -52,6 +52,7 @@
#include <generic/memory/lib/utils/c_str.H>
#include <lib/workarounds/dp16_workarounds.H>
+#include <lib/fir/check.H>
#include <generic/memory/lib/utils/mss_math.H>
using fapi2::TARGET_TYPE_MCS;
@@ -3260,6 +3261,22 @@ fapi_try_exit:
///
fapi2::ReturnCode record_bad_bits( const fapi2::Target<fapi2::TARGET_TYPE_MCA>& i_target )
{
+ // If we have a FIR set that could have caused our training fail, then skip checking bad bits in FW
+ // PRD will handle the FIR and retrigger the procedure
+#ifdef __HOSTBOOT_MODULE
+ bool l_fir_error = false;
+ FAPI_TRY(mss::check::bad_fir_bits(i_target, l_fir_error), "%s took an error while checking FIR's",
+ mss::c_str(i_target));
+
+ // Exit if we took a FIR error - PRD will handle bad bits
+ if(l_fir_error)
+ {
+ FAPI_INF("%s has FIR's set, exiting to let PRD handle it", mss::c_str(i_target));
+ return fapi2::FAPI2_RC_SUCCESS;
+ }
+
+#endif
+
for( const auto& d : mss::find_targets<fapi2::TARGET_TYPE_DIMM>(i_target) )
{
uint8_t l_data[MAX_RANK_PER_DIMM][BAD_DQ_BYTE_COUNT] = {};
@@ -3367,11 +3384,17 @@ fapi2::ReturnCode process_rdvref_cal_errors( const fapi2::Target<fapi2::TARGET_T
size_t l_index = 0;
std::vector<fapi2::buffer<uint64_t>> l_data;
+ // Boolean to keep track of if a fail was calibration related, or scom related
+ bool l_cal_fail = false;
+
// Suck all the cal error bits out ...
FAPI_TRY( mss::scom_suckah(l_mca, TT::RD_VREF_CAL_ERROR_REG, l_data) );
FAPI_INF("%s Processing RD_VREF_CAL_ERROR", mss::c_str(i_target));
+ // From here on out, the FIR's are all cal fails
+ l_cal_fail = true;
+
for (const auto& v : l_data)
{
// They should all be 0's. If they're not, we have a problem.
@@ -3383,14 +3406,17 @@ fapi2::ReturnCode process_rdvref_cal_errors( const fapi2::Target<fapi2::TARGET_T
.set_VALUE(v),
"DP16 failed read vref calibration on %s. register 0x%016lx value 0x%016lx",
mss::c_str(l_mca), TT::RD_VREF_CAL_ERROR_REG[l_index], v);
+
++l_index;
}
- FAPI_INF("RD_VREF_CAL_ERROR complete");
+ FAPI_INF("%s RD_VREF_CAL_ERROR complete", mss::c_str(i_target));
return fapi2::FAPI2_RC_SUCCESS;
fapi_try_exit:
- return fapi2::current_err;
+
+ // If the FIR's are cal fails, then check to see if FIRs or PLL fails were the cause
+ return mss::check::fir_or_pll_fail( i_target, fapi2::current_err, l_cal_fail);
}
///
@@ -3412,10 +3438,16 @@ fapi2::ReturnCode process_wrvref_cal_errors( const fapi2::Target<fapi2::TARGET_T
std::vector<std::pair<fapi2::buffer<uint64_t>, fapi2::buffer<uint64_t>>> l_data;
std::vector<std::pair<fapi2::buffer<uint64_t>, fapi2::buffer<uint64_t>>> l_mask;
+ // Boolean to keep track of if a fail was calibration related, or scom related
+ bool l_cal_fail = false;
+
// Suck all the cal error bits out ...
FAPI_TRY( mss::scom_suckah(l_mca, TT::WR_VREF_ERROR_REG, l_data) );
FAPI_TRY( mss::scom_suckah(l_mca, TT::WR_VREF_ERROR_MASK_REG, l_mask) );
+ // From here on out, the FIR's are all cal fails
+ l_cal_fail = true;
+
// Loop through both data and mask
{
// Note: ideally these would be cbegin/cend, but HB doesn't support constant iterators for vectors
@@ -3480,11 +3512,13 @@ fapi2::ReturnCode process_wrvref_cal_errors( const fapi2::Target<fapi2::TARGET_T
}
}
- FAPI_INF("WRVREF_CAL_ERROR complete");
+ FAPI_INF("%s WRVREF_CAL_ERROR complete", mss::c_str(i_target));
return fapi2::FAPI2_RC_SUCCESS;
fapi_try_exit:
- return fapi2::current_err;
+
+ // If the FIR's are cal fails, then check to see if FIR's were the cause
+ return mss::check::fir_or_pll_fail( i_target, fapi2::current_err, l_cal_fail);
}
///
diff --git a/src/import/chips/p9/procedures/hwp/memory/lib/shared/mss_const.H b/src/import/chips/p9/procedures/hwp/memory/lib/shared/mss_const.H
index cf6a871e5..d6e5c4f53 100644
--- a/src/import/chips/p9/procedures/hwp/memory/lib/shared/mss_const.H
+++ b/src/import/chips/p9/procedures/hwp/memory/lib/shared/mss_const.H
@@ -199,6 +199,7 @@ enum ffdc_functions
RD_CTR_WORKAROUND_READ_DATA = 7,
OVERRIDE_ODT_WR_CONFIG = 8,
RECORD_BAD_BITS_HELPER = 9,
+ SET_PAIR_VALID = 10,
};
// Static consts describing the bits used in the cal_step_enable attribute
// These are bit positions. 0 is the left most bit.
diff --git a/src/import/chips/p9/procedures/hwp/memory/lib/workarounds/dp16_workarounds.C b/src/import/chips/p9/procedures/hwp/memory/lib/workarounds/dp16_workarounds.C
index 95dbe26f8..a5fa507bb 100644
--- a/src/import/chips/p9/procedures/hwp/memory/lib/workarounds/dp16_workarounds.C
+++ b/src/import/chips/p9/procedures/hwp/memory/lib/workarounds/dp16_workarounds.C
@@ -47,6 +47,7 @@
#include <lib/phy/phy_cntrl.H>
#include <lib/dimm/rank.H>
#include <lib/utils/bit_count.H>
+#include <lib/fir/check.H>
namespace mss
{
@@ -547,10 +548,13 @@ fapi2::ReturnCode dqs_align_workaround(const fapi2::Target<fapi2::TARGET_TYPE_MC
// If we can't, exit with success
if (! chip_ec_feature_mss_dqs_workaround(i_target) )
{
- FAPI_DBG("Skipping DQS workaround because of ec feature attribute");
+ FAPI_DBG("%s Skipping DQS workaround because of ec feature attribute", mss::c_str(i_target));
return fapi2::FAPI2_RC_SUCCESS;
}
+ // Boolean to keep track of if a fail was calibration related, or scom related
+ bool l_cal_fail = false;
+
FAPI_TRY( eff_dram_width( i_target, l_dram_width) );
l_is_x8 = ((l_dram_width[0] == fapi2::ENUM_ATTR_EFF_DRAM_WIDTH_X8) ||
@@ -603,6 +607,8 @@ fapi2::ReturnCode dqs_align_workaround(const fapi2::Target<fapi2::TARGET_TYPE_MC
// Clear all disable bits - this will cause calibration to re-run everything that failed, including WR LVL fails
FAPI_TRY(mss::workarounds::dp16::dqs_align::reset_disables(i_target, i_rp));
+ // Next, we're checking for CAL fails, so make sure to check the FIR's below
+ l_cal_fail = true;
// If the loop timed out, bomb out
// If this is firmware, they'll log it as info and run to memdiags
@@ -617,11 +623,16 @@ fapi2::ReturnCode dqs_align_workaround(const fapi2::Target<fapi2::TARGET_TYPE_MC
"%s i_rp %lu DQS workaround failed! 10 loops reached without everything passing",
mss::c_str(i_target), i_rp);
+ // Below, the errors are scom related, no need to check the FIR's
+ l_cal_fail = false;
+
// Now plop the delays back in to the registers
FAPI_TRY(mss::workarounds::dp16::dqs_align::set_passing_values( i_target, i_rp, l_passing_values));
fapi_try_exit:
- return fapi2::current_err;
+
+ // If the FIR's are cal fails, then check to see if FIR's or PLL's could be the cause
+ return mss::check::fir_or_pll_fail(i_target, fapi2::current_err, l_cal_fail);
}
///
@@ -777,7 +788,8 @@ fapi_try_exit:
/// @param[in,out] io_passing_values - the passing values, a map from the DQS number to the value
/// @return fapi2::ReturnCode FAPI2_RC_SUCCESS if ok
///
-fapi2::ReturnCode record_passing_values( const fapi2::Target<fapi2::TARGET_TYPE_MCA>& i_target, const uint64_t i_rp,
+fapi2::ReturnCode record_passing_values( const fapi2::Target<fapi2::TARGET_TYPE_MCA>& i_target,
+ const uint64_t i_rp,
std::map<uint64_t, uint64_t>& io_passing_values)
{
// Traits declaration
diff --git a/src/import/chips/p9/procedures/hwp/memory/p9_mss_draminit_training.C b/src/import/chips/p9/procedures/hwp/memory/p9_mss_draminit_training.C
index 533a53905..b4de8bd90 100644
--- a/src/import/chips/p9/procedures/hwp/memory/p9_mss_draminit_training.C
+++ b/src/import/chips/p9/procedures/hwp/memory/p9_mss_draminit_training.C
@@ -68,7 +68,7 @@ extern "C"
std::vector<fapi2::ReturnCode> l_fails;
- FAPI_INF("Start draminit training");
+ FAPI_INF("%s Start draminit training", mss::c_str(i_target));
// If there are no DIMM we don't need to bother. In fact, we can't as we didn't setup
// attributes for the PHY, etc.
OpenPOWER on IntegriCloud