summaryrefslogtreecommitdiffstats
path: root/src/import
diff options
context:
space:
mode:
Diffstat (limited to 'src/import')
-rw-r--r--src/import/chips/p9/procedures/hwp/memory/lib/ccs/ccs.C7
-rw-r--r--src/import/chips/p9/procedures/hwp/memory/lib/dimm/ddr4/mrs_load_ddr4.C2
-rw-r--r--src/import/chips/p9/procedures/hwp/memory/lib/dimm/rank.C16
-rw-r--r--src/import/chips/p9/procedures/hwp/memory/lib/dimm/rank.H4
-rw-r--r--src/import/chips/p9/procedures/hwp/memory/lib/fir/check.C202
-rw-r--r--src/import/chips/p9/procedures/hwp/memory/lib/fir/check.H147
-rw-r--r--src/import/chips/p9/procedures/hwp/memory/lib/mc/port.H2
-rw-r--r--src/import/chips/p9/procedures/hwp/memory/lib/mc/xlate.C4
-rw-r--r--src/import/chips/p9/procedures/hwp/memory/lib/phy/ddr_phy.C11
-rw-r--r--src/import/chips/p9/procedures/hwp/memory/lib/phy/dp16.C42
-rw-r--r--src/import/chips/p9/procedures/hwp/memory/lib/shared/mss_const.H1
-rw-r--r--src/import/chips/p9/procedures/hwp/memory/lib/workarounds/dp16_workarounds.C18
-rw-r--r--src/import/chips/p9/procedures/hwp/memory/p9_mss_draminit_training.C2
13 files changed, 432 insertions, 26 deletions
diff --git a/src/import/chips/p9/procedures/hwp/memory/lib/ccs/ccs.C b/src/import/chips/p9/procedures/hwp/memory/lib/ccs/ccs.C
index b82951221..669048513 100644
--- a/src/import/chips/p9/procedures/hwp/memory/lib/ccs/ccs.C
+++ b/src/import/chips/p9/procedures/hwp/memory/lib/ccs/ccs.C
@@ -37,6 +37,7 @@
#include <mss.H>
#include <lib/ccs/ccs.H>
+#include <lib/fir/check.H>
using fapi2::TARGET_TYPE_MCBIST;
using fapi2::TARGET_TYPE_MCA;
@@ -83,6 +84,7 @@ fapi2::ReturnCode fail_type( const fapi2::Target<TARGET_TYPE_MCBIST>& i_target,
const uint64_t& i_type,
const fapi2::Target<TARGET_TYPE_MCA>& i_mca )
{
+ fapi2::ReturnCode l_failing_rc(fapi2::FAPI2_RC_SUCCESS);
// Including the MCA_TARGET here and below at CAL_TIMEOUT since these problems likely lie at the MCA level
// So we disable the PORT and hopefully that's it
// If the problem lies with the MCBIST, it'll just have to loop
@@ -112,7 +114,10 @@ fapi2::ReturnCode fail_type( const fapi2::Target<TARGET_TYPE_MCBIST>& i_target,
fapi2::MSS_CCS_HUNG().set_MCBIST_TARGET(i_target),
"%s CCS appears hung", mss::c_str(i_target));
fapi_try_exit:
- return fapi2::current_err;
+ // Due to the PRD update, we need to check for FIR's
+ // If any FIR's have lit up, this CCS fail could have been caused by the FIR
+ // So, let PRD retrigger this step to see if we can resolve the issue
+ return mss::check::fir_or_pll_fail(i_target, fapi2::current_err);
}
///
diff --git a/src/import/chips/p9/procedures/hwp/memory/lib/dimm/ddr4/mrs_load_ddr4.C b/src/import/chips/p9/procedures/hwp/memory/lib/dimm/ddr4/mrs_load_ddr4.C
index 43694ac2d..cec455f6a 100644
--- a/src/import/chips/p9/procedures/hwp/memory/lib/dimm/ddr4/mrs_load_ddr4.C
+++ b/src/import/chips/p9/procedures/hwp/memory/lib/dimm/ddr4/mrs_load_ddr4.C
@@ -64,7 +64,7 @@ fapi2::ReturnCode mrs_engine( const fapi2::Target<fapi2::TARGET_TYPE_DIMM>& i_ta
const uint64_t i_rank,
std::vector< ccs::instruction_t<fapi2::TARGET_TYPE_MCBIST> >& io_inst )
{
- FAPI_TRY( mrs_engine(i_target, i_data, i_rank, i_data.iv_delay, io_inst) );
+ FAPI_TRY( mrs_engine(i_target, i_data, i_rank, i_data.iv_delay, io_inst) );
fapi_try_exit:
return fapi2::current_err;
diff --git a/src/import/chips/p9/procedures/hwp/memory/lib/dimm/rank.C b/src/import/chips/p9/procedures/hwp/memory/lib/dimm/rank.C
index 6404adf0b..f2edb7873 100644
--- a/src/import/chips/p9/procedures/hwp/memory/lib/dimm/rank.C
+++ b/src/import/chips/p9/procedures/hwp/memory/lib/dimm/rank.C
@@ -233,7 +233,6 @@ fapi_try_exit:
///
/// @brief Return a vector of rank numbers which represent the primary rank pairs for this port
-/// @tparam T the target type
/// @param[in] i_target TARGET_TYPE_MCA
/// @param[out] o_rps a vector of rank_pairs
/// @return FAPI2_RC_SUCCESS iff all is ok
@@ -251,7 +250,7 @@ fapi2::ReturnCode primary_ranks( const fapi2::Target<TARGET_TYPE_MCA>& i_target,
FAPI_TRY( mss::eff_num_master_ranks_per_dimm(d, l_rank_count[mss::index(d)]) );
}
- FAPI_DBG("ranks: %d, %d", l_rank_count[0], l_rank_count[1]);
+ FAPI_DBG("%s ranks: %d, %d", mss::c_str(i_target), l_rank_count[0], l_rank_count[1]);
// Walk through rank pair table and skip empty pairs
o_rps.clear();
@@ -264,13 +263,15 @@ fapi2::ReturnCode primary_ranks( const fapi2::Target<TARGET_TYPE_MCA>& i_target,
}
}
+ // Returning success in case no DIMM's are configured
+ return fapi2::FAPI2_RC_SUCCESS;
+
fapi_try_exit:
return fapi2::current_err;
}
///
/// @brief Return a vector of rank numbers which represent the primary rank pairs for this dimm
-/// @tparam T the target type
/// @param[in] i_target TARGET_TYPE_DIMM
/// @param[out] o_rps a vector of rank_pairs
/// @return FAPI2_RC_SUCCESS iff all is ok
@@ -344,7 +345,6 @@ fapi_try_exit:
///
/// @brief Given a target, get the rank pair assignments, based on DIMMs
-/// @tparam T the fapi2::TargetType
/// @param[in] i_target the target (MCA or MBA?)
/// @param[out] o_registers the regiter settings for the appropriate rank pairs
/// @return FAPI2_RC_SUCCESS if and only if ok
@@ -382,8 +382,7 @@ fapi_try_exit:
///
/// @brief Setup the rank information in the port
-/// @tparam T the fapi2::TargetType
-/// @param[in] i_target the target (MCA or MBA?)
+/// @param[in] i_target the target (MCA)
/// @return FAPI2_RC_SUCCESS if and only if ok
///
template<>
@@ -485,7 +484,6 @@ fapi_try_exit:
///
/// @brief Get a vector of configured rank pairs.
/// Returns a vector of ordinal values of the configured rank pairs. e.g., for a 2R DIMM, {0, 1}
-/// @tparam T the fapi2::TargetType
/// @param[in]i_target the target (MCA or MBA?)
/// @param[out] o_pairs std::vector of rank pairs configured
/// @return FAPI2_RC_SUCCESS if and only if ok
@@ -565,7 +563,6 @@ fapi_try_exit:
///
/// @brief Get a rank-pair id from a physical rank
/// Returns a number representing which rank-pair this rank is a part of
-/// @tparam T the fapi2::TargetType
/// @param[in] i_target the target (MCA or MBA?)
/// @param[in] i_rank the physical rank number
/// @param[out] o_pairs the rank pair
@@ -573,7 +570,8 @@ fapi_try_exit:
///
template<>
fapi2::ReturnCode get_pair_from_rank(const fapi2::Target<TARGET_TYPE_MCA>& i_target,
- uint64_t i_rank, uint64_t& o_pair)
+ uint64_t i_rank,
+ uint64_t& o_pair)
{
// Sort of brute-force, but no real good other way to do it. Given the
// rank-pair configuration we walk the config looking for our rank, and
diff --git a/src/import/chips/p9/procedures/hwp/memory/lib/dimm/rank.H b/src/import/chips/p9/procedures/hwp/memory/lib/dimm/rank.H
index 34310cc56..e5b3b9041 100644
--- a/src/import/chips/p9/procedures/hwp/memory/lib/dimm/rank.H
+++ b/src/import/chips/p9/procedures/hwp/memory/lib/dimm/rank.H
@@ -1060,7 +1060,7 @@ inline fapi2::ReturnCode set_pair_valid( const fapi2::Target<T>& i_target,
fapi2::MSS_INVALID_RANK()
.set_RANK(i_rank)
.set_MCA_TARGET(i_target)
- .set_FUNCTION(GET_RANKS_IN_PAIR),
+ .set_FUNCTION(SET_PAIR_VALID),
"%s Invalid rank (%d) in get_ranks_in_pair",
mss::c_str(i_target),
i_rank);
@@ -1231,7 +1231,7 @@ fapi2::ReturnCode get_ranks_in_pair( const fapi2::Target<T>& i_target,
// Get data
for (uint64_t l_ordinal = 0; l_ordinal < TT::NUM_RANKS_IN_PAIR; ++l_ordinal)
{
- // Check to make sure rank is vlaid
+ // Check to make sure rank is valid
FAPI_ASSERT( l_ordinal < MAX_RANK_PER_DIMM,
fapi2::MSS_INVALID_RANK()
.set_RANK(l_ordinal)
diff --git a/src/import/chips/p9/procedures/hwp/memory/lib/fir/check.C b/src/import/chips/p9/procedures/hwp/memory/lib/fir/check.C
index 58f1f0d94..7a329aaed 100644
--- a/src/import/chips/p9/procedures/hwp/memory/lib/fir/check.C
+++ b/src/import/chips/p9/procedures/hwp/memory/lib/fir/check.C
@@ -36,6 +36,8 @@
#include <fapi2.H>
#include <p9_mc_scom_addresses.H>
#include <p9_mc_scom_addresses_fld.H>
+#include <p9_perv_scom_addresses.H>
+#include <p9_perv_scom_addresses_fld.H>
#include <generic/memory/lib/utils/scom.H>
#include <lib/fir/fir.H>
@@ -205,6 +207,9 @@ fapi2::ReturnCode during_draminit_training( const fapi2::Target<fapi2::TARGET_TY
fapi2::buffer<uint64_t> l_phyfir_data;
fapi2::buffer<uint64_t> l_phyfir_masked;
+ // If we have a FIR that is lit up, we want to see if it could have been caused by a more drastic FIR
+ bool l_check_fir = false;
+
FAPI_TRY( mss::getScom(l_mca, MCA_IOM_PHY0_DDRPHY_FIR_REG, l_phyfir_data) );
l_phyfir_masked = l_phyfir_data & l_phyfir_mask;
@@ -213,6 +218,8 @@ fapi2::ReturnCode during_draminit_training( const fapi2::Target<fapi2::TARGET_TY
// We'll have the error log to know what fir bit triggered and when, so we should be fine clearing here
FAPI_TRY( mss::putScom(l_mca, MCA_IOM_PHY0_DDRPHY_FIR_REG_AND, l_phyfir_mask.invert()) );
+ // Check the FIR here
+ l_check_fir = true;
FAPI_ASSERT( l_phyfir_masked == 0,
fapi2::MSS_DRAMINIT_TRAINING_PORT_FIR()
.set_PHY_FIR(l_phyfir_masked)
@@ -222,8 +229,203 @@ fapi2::ReturnCode during_draminit_training( const fapi2::Target<fapi2::TARGET_TY
mss::c_str(i_target), l_phyfir_masked);
fapi_try_exit:
+
+ // Handle any fails seen above accordingly
+ return mss::check::fir_or_pll_fail( i_target, fapi2::current_err, l_check_fir);
+}
+
+// Declares FIR registers that are re-used between multiple functions
+// Vectors of FIR and mask registers to read through
+// As check_fir can be called in multiple places, we don't know what the mask may hold
+// In order to tell if a FIR is legit or not, we read the FIR and check it against the mask reg
+// Note: using a vector here in case we need to expand
+static const std::vector<std::pair<uint64_t, uint64_t>> MCBIST_FIR_REGS =
+{
+ // MCBIST FIR
+ {MCBIST_MCBISTFIRQ, MCBIST_MCBISTFIRMASK},
+};
+
+static const std::vector<std::pair<uint64_t, uint64_t>> MCA_FIR_REGS =
+{
+ // MCA ECC FIR
+ {MCA_FIR, MCA_MASK},
+ // MCA CAL FIR
+ {MCA_MBACALFIRQ, MCA_MBACALFIR_MASK},
+ // DDRPHY FIR
+ {MCA_IOM_PHY0_DDRPHY_FIR_REG, MCA_IOM_PHY0_DDRPHY_FIR_MASK_REG},
+};
+
+///
+/// @brief Checks whether any of the PLL unlock values are set
+/// @param[in] i_local_fir - the overall FIR register
+/// @param[in] i_perv_fir - the pervasive PLL FIR
+/// @param[in] i_mc_fir - the memory controller FIR
+/// @return fapi2::ReturnCode FAPI2_RC_SUCCESS iff ok
+///
+bool pll_unlock( const fapi2::buffer<uint64_t>& i_local_fir,
+ const fapi2::buffer<uint64_t>& i_perv_fir,
+ const fapi2::buffer<uint64_t>& i_mc_fir )
+{
+ // Note: the following registers did not have the scom fields defined, so we're constexpr'ing them here
+ constexpr uint64_t PERV_TP_ERROR_START = 25;
+ constexpr uint64_t PERV_TP_ERROR_LEN = 4;
+ constexpr uint64_t PERV_MC_ERROR_START = 25;
+
+ // No overall FIR (bit 21) was set, so just exit
+ if(!i_local_fir.getBit<PERV_1_LOCAL_FIR_IN21>())
+ {
+ FAPI_INF("Did not have the PERV_LOCAL_FIR bit set. No PLL error, exiting");
+ return false;
+ }
+
+ // Now, identify whether a PLL unlock caused the FIR bit to fail
+ FAPI_INF("PERV_TP_ERROR_REG %s PERV_MC01_ERROR_REG %s",
+ i_perv_fir.getBit<PERV_TP_ERROR_START, PERV_TP_ERROR_LEN>() ? "PLL lock fail" : "PLL ok",
+ i_mc_fir.getBit<PERV_MC_ERROR_START>() ? "PLL lock fail" : "PLL ok");
+
+ // We have a PLL unlock if the MC PLL unlock FIR bit is on or any of the TP PLL unlock bits are on
+ return (i_mc_fir.getBit<PERV_MC_ERROR_START>()) || (i_perv_fir.getBit<PERV_TP_ERROR_START, PERV_TP_ERROR_LEN>());
+}
+
+///
+/// @brief Checks whether any PLL FIRs have been set on a target
+/// @param[in] i_target - the target on which to operate
+/// @param[out] o_fir_error - true iff a FIR was hit
+/// @return fapi2::ReturnCode FAPI2_RC_SUCCESS iff ok
+///
+fapi2::ReturnCode pll_fir( const fapi2::Target<fapi2::TARGET_TYPE_MCBIST>& i_target, bool& o_fir_error )
+{
+ // Sets o_fir_error to false to begin with, just in case we have scom issues
+ o_fir_error = false;
+
+ // Gets the processor target
+ const auto& l_proc = mss::find_target<fapi2::TARGET_TYPE_PROC_CHIP>(i_target);
+
+ // Gets the register data
+ fapi2::buffer<uint64_t> l_local_fir;
+ fapi2::buffer<uint64_t> l_perv_fir;
+ fapi2::buffer<uint64_t> l_mc_fir;
+
+ FAPI_TRY(mss::getScom(l_proc, PERV_TP_LOCAL_FIR, l_local_fir), "%s failed to get 0x%016llx", mss::c_str(i_target),
+ PERV_TP_LOCAL_FIR);
+ FAPI_TRY(mss::getScom(l_proc, PERV_TP_ERROR_REG, l_perv_fir), "%s failed to get 0x%016llx", mss::c_str(i_target),
+ PERV_TP_ERROR_REG);
+ FAPI_TRY(mss::getScom(i_target, PERV_MC01_ERROR_REG, l_mc_fir), "%s failed to get 0x%016llx", mss::c_str(i_target),
+ PERV_MC01_ERROR_REG);
+
+ // Checks the data
+ o_fir_error = pll_unlock(l_local_fir, l_perv_fir, l_mc_fir);
+
+fapi_try_exit:
return fapi2::current_err;
}
+///
+/// @brief Checks whether any FIR have lit up
+/// @param[in] i_target - the target on which to operate - MCBIST specialization
+/// @param[out] o_fir_error - true iff a FIR was hit
+/// @return fapi2::ReturnCode FAPI2_RC_SUCCESS iff ok
+///
+template< >
+fapi2::ReturnCode bad_fir_bits( const fapi2::Target<fapi2::TARGET_TYPE_MCBIST>& i_target, bool& o_fir_error )
+{
+ // Start by assuming we do not have a FIR
+ o_fir_error = false;
+
+ // Loop, check the scoms, and check the FIR
+ // Note: we return out if any FIR is bad
+ for(const auto& l_fir_reg : MCBIST_FIR_REGS)
+ {
+ FAPI_TRY(fir_with_mask(i_target, l_fir_reg, o_fir_error));
+
+ // Exit if we found a FIR
+ if(o_fir_error)
+ {
+ return fapi2::FAPI2_RC_SUCCESS;
+ }
+ }
+
+ // Loop through all MCA's and all MCA FIR's
+ for(const auto& l_mca : mss::find_targets<fapi2::TARGET_TYPE_MCA>(i_target))
+ {
+ for(const auto& l_fir_reg : MCA_FIR_REGS)
+ {
+ FAPI_TRY(fir_with_mask(l_mca, l_fir_reg, o_fir_error));
+
+ // Exit if we found a FIR
+ if(o_fir_error)
+ {
+ return fapi2::FAPI2_RC_SUCCESS;
+ }
+ }
+ }
+
+ // Lastly, check for PLL unlocks
+ FAPI_TRY(pll_fir(i_target, o_fir_error));
+
+fapi_try_exit:
+ return fapi2::current_err;
+}
+
+
+///
+/// @brief Checks whether any FIR have lit up
+/// @param[in] i_target - the target on which to operate - MCA specialization
+/// @param[out] o_fir_error - true iff a FIR was hit
+/// @return fapi2::ReturnCode FAPI2_RC_SUCCESS iff ok
+///
+template< >
+fapi2::ReturnCode bad_fir_bits( const fapi2::Target<fapi2::TARGET_TYPE_MCA>& i_target, bool& o_fir_error )
+{
+ const auto& l_mcbist = mss::find_target<fapi2::TARGET_TYPE_MCBIST>(i_target);
+ // Start by assuming we do not have a FIR
+ o_fir_error = false;
+
+ // Loop, check the scoms, and check the FIR
+ // Note: we return out if any FIR is bad
+ for(const auto& l_fir_reg : MCBIST_FIR_REGS)
+ {
+ FAPI_TRY(fir_with_mask(l_mcbist, l_fir_reg, o_fir_error));
+
+ // Exit if we found a FIR
+ if(o_fir_error)
+ {
+ return fapi2::FAPI2_RC_SUCCESS;
+ }
+ }
+
+ // Loop through all MCA FIR's
+ for(const auto& l_fir_reg : MCA_FIR_REGS)
+ {
+ FAPI_TRY(fir_with_mask(i_target, l_fir_reg, o_fir_error));
+
+ // Exit if we found a FIR
+ if(o_fir_error)
+ {
+ return fapi2::FAPI2_RC_SUCCESS;
+ }
+ }
+
+ // Lastly, check for PLL unlocks
+ FAPI_TRY(pll_fir(l_mcbist, o_fir_error));
+
+fapi_try_exit:
+ return fapi2::current_err;
+}
+
+
+///
+/// @brief Checks whether any FIR have lit up
+/// @param[in] i_target - the target on which to operate - DIMM specialization
+/// @param[out] o_fir_error - true iff a FIR was hit
+/// @return fapi2::ReturnCode FAPI2_RC_SUCCESS iff ok
+///
+template< >
+fapi2::ReturnCode bad_fir_bits( const fapi2::Target<fapi2::TARGET_TYPE_DIMM>& i_target, bool& o_fir_error )
+{
+ const auto l_mca = mss::find_target<fapi2::TARGET_TYPE_MCA>(i_target);
+ return bad_fir_bits(l_mca, o_fir_error);
+}
+
}
}
diff --git a/src/import/chips/p9/procedures/hwp/memory/lib/fir/check.H b/src/import/chips/p9/procedures/hwp/memory/lib/fir/check.H
index ded638e49..fc82aaed1 100644
--- a/src/import/chips/p9/procedures/hwp/memory/lib/fir/check.H
+++ b/src/import/chips/p9/procedures/hwp/memory/lib/fir/check.H
@@ -27,7 +27,7 @@
/// @file check.H
/// @brief Subroutines for checking MSS FIR
///
-// *HWP HWP Owner: Brian Silver <bsilver@us.ibm.com>
+// *HWP HWP Owner: Andre Marin <aamarin@us.ibm.com>
// *HWP HWP Backup: Marc Gollub <gollub@us.ibm.com>
// *HWP Team: Memory
// *HWP Level: 2
@@ -37,6 +37,7 @@
#define _MSS_CHECK_FIR_H_
#include <fapi2.H>
+#include <generic/memory/lib/utils/scom.H>
namespace mss
{
@@ -58,6 +59,7 @@ fapi2::ReturnCode during_phy_reset( const fapi2::Target<T>& i_target );
///
/// @brief Check FIR bits during draminit training
+/// @tparam T the fapi2::TargetType which hold the FIR bits
/// @param[in] i_target the dimm that was trained
/// @note We check for fir errors after training each rank
/// to see if there was a problem with the engine
@@ -69,6 +71,149 @@ fapi2::ReturnCode during_phy_reset( const fapi2::Target<T>& i_target );
template< fapi2::TargetType T >
fapi2::ReturnCode during_draminit_training( const fapi2::Target<T>& i_target );
+///
+/// @brief Checks whether any of the PLL unlock values are set
+/// @param[in] i_local_fir - the overall FIR register
+/// @param[in] i_perv_fir - the pervasive PLL FIR
+/// @param[in] i_mc_fir - the memory controller FIR
+/// @return fapi2::ReturnCode FAPI2_RC_SUCCESS iff ok
+///
+bool pll_unlock( const fapi2::buffer<uint64_t>& i_local_fir,
+ const fapi2::buffer<uint64_t>& i_perv_fir,
+ const fapi2::buffer<uint64_t>& i_mc_fir );
+
+///
+/// @brief Checks whether any PLL FIRs have been set on a target
+/// @param[in] i_target - the target on which to operate
+/// @param[out] o_fir_error - true iff a FIR was hit
+/// @return fapi2::ReturnCode FAPI2_RC_SUCCESS iff ok
+///
+fapi2::ReturnCode pll_fir( const fapi2::Target<fapi2::TARGET_TYPE_MCBIST>& i_target, bool& o_fir_error );
+
+///
+/// @brief Checks whether any FIRs have lit up on a target
+/// @tparam T the fapi2::TargetType which hold the FIR bits
+/// @param[in] i_target - the target on which to operate
+/// @param[out] o_fir_error - true iff a FIR was hit
+/// @return fapi2::ReturnCode FAPI2_RC_SUCCESS iff ok
+///
+template< fapi2::TargetType T >
+fapi2::ReturnCode bad_fir_bits( const fapi2::Target<T>& i_target, bool& o_fir_error );
+
+///
+/// @brief Checks whether the passed in FIRs have any un-masked errors set
+/// @tparam T the fapi2::TargetType which hold the FIR bits
+/// @param[in] i_target - the target on which to operate
+/// @param[in] i_fir_regs - FIR register and mask register
+/// @param[out] o_fir_error - true iff a FIR was hit
+/// @return fapi2::ReturnCode FAPI2_RC_SUCCESS iff ok
+///
+template< fapi2::TargetType T >
+inline fapi2::ReturnCode fir_with_mask( const fapi2::Target<T>& i_target,
+ const std::pair<uint64_t, uint64_t>& i_fir_regs,
+ bool& o_fir_error )
+{
+ // Temporary variables to make the code a bit more readable
+ const auto FIR_REG = i_fir_regs.first;
+ const auto FIR_MASK = i_fir_regs.second;
+
+ fapi2::buffer<uint64_t> l_fir;
+ fapi2::buffer<uint64_t> l_fir_mask;
+
+ // Read the registers
+ FAPI_TRY(mss::getScom(i_target, FIR_REG, l_fir));
+ FAPI_TRY(mss::getScom(i_target, FIR_MASK, l_fir_mask));
+
+
+ // The mask register will need to be inverted as a 0 in the mask register means the FIR is legit
+ // A bitwise and works the opposite way
+ l_fir_mask.invert();
+
+ // If we have any unmasked bit, set that we have a FIR error and exit out with success
+ // Note: we want to set success here as PRD will find the FIR as "new" and retrigger the procedure this way
+ o_fir_error = ((l_fir & l_fir_mask) != 0);
+
+ // And print the information for debuggability
+ FAPI_INF("%s %s on reg 0x%016lx value 0x%016lx and mask 0x%016lx value 0x%016lx", mss::c_str(i_target),
+ o_fir_error ? "has FIR's set" : "has no FIR's set", FIR_REG, l_fir, FIR_MASK, l_fir_mask.invert());
+
+fapi_try_exit:
+ return fapi2::current_err;
+}
+
+///
+/// @brief Checks whether a FIR or unlocked PLL could be the root cause of another failure
+/// @tparam T the fapi2::TargetType which hold the FIR bits
+/// @param[in] i_target - the target on which to operate
+/// @param[in] i_rc - the return code for the function - cannot be const due to a HB compile issue
+/// @return fapi2::ReturnCode FAPI2_RC_SUCCESS iff ok
+/// @note This is a helper function to enable unit testing
+///
+template< fapi2::TargetType T >
+fapi2::ReturnCode hostboot_fir_or_pll_fail( const fapi2::Target<T>& i_target, fapi2::ReturnCode& i_rc)
+{
+ // We didn't have an error, so return success
+ if(i_rc == fapi2::FAPI2_RC_SUCCESS)
+ {
+ FAPI_INF("%s has a good return code, returning success", mss::c_str(i_target));
+ return fapi2::FAPI2_RC_SUCCESS;
+ }
+
+ fapi2::ReturnCode l_fircheck_scom_err(fapi2::FAPI2_RC_SUCCESS);
+ bool l_fir_error = false;
+
+ FAPI_ERR("%s has a bad return code, time to check some firs!", mss::c_str(i_target));
+
+ l_fircheck_scom_err = bad_fir_bits(i_target, l_fir_error);
+
+ FAPI_ERR("%s took a fail. FIR was %s", mss::c_str(i_target),
+ l_fir_error ? "set - returning FIR RC" : "unset - returning inputted RC");
+
+ // If we had a FIR error, log the original error and return success
+ // PRD will handle the original error
+ if(l_fir_error)
+ {
+ fapi2::log_related_error(i_target, i_rc, fapi2::FAPI2_ERRL_SEV_RECOVERED);
+ fapi2::current_err = fapi2::FAPI2_RC_SUCCESS;
+ }
+ else
+ {
+ fapi2::current_err = i_rc;
+ }
+
+ return fapi2::current_err;
+}
+
+///
+/// @brief Checks whether a FIR or unlocked PLL could be the root cause of another failure, if a check fir boolean is passed in
+/// @tparam T the fapi2::TargetType which hold the FIR bits
+/// @param[in] i_target - the target on which to operate
+/// @param[in] i_rc - the return code for the function - cannot be const due to a HB compile issue
+/// @param[in] i_check_fir - true IFF the FIR needs to be checked - defaults to true
+/// @return fapi2::ReturnCode FAPI2_RC_SUCCESS iff ok
+///
+template< fapi2::TargetType T >
+fapi2::ReturnCode fir_or_pll_fail( const fapi2::Target<T>& i_target, fapi2::ReturnCode& i_rc,
+ const bool i_check_fir = true)
+{
+#ifdef __HOSTBOOT_MODULE
+
+ fapi2::ReturnCode l_rc(i_rc);
+
+ // If need be, check the FIR below
+ if(i_check_fir)
+ {
+ // Handle any issues according to PRD FIR scheme, as a FIR could have caused this issue
+ l_rc = hostboot_fir_or_pll_fail(i_target, l_rc);
+ }
+
+ return l_rc;
+
+#else
+ return i_rc;
+#endif
+}
+
}
}
#endif
diff --git a/src/import/chips/p9/procedures/hwp/memory/lib/mc/port.H b/src/import/chips/p9/procedures/hwp/memory/lib/mc/port.H
index 876a83909..b6c2ece01 100644
--- a/src/import/chips/p9/procedures/hwp/memory/lib/mc/port.H
+++ b/src/import/chips/p9/procedures/hwp/memory/lib/mc/port.H
@@ -964,7 +964,7 @@ fapi2::ReturnCode reset_zqcal_config( const fapi2::Target<T>& i_target )
for (const auto r : l_ranks)
{
- l_phy_zqcal_config.setBit(TT::PER_ZCAL_ENA_RANK + rank::map_rank_ordinal_to_phy(i_target, r));
+ FAPI_TRY(l_phy_zqcal_config.setBit(TT::PER_ZCAL_ENA_RANK + rank::map_rank_ordinal_to_phy(i_target, r)));
}
// Write the ZQCAL periodic config
diff --git a/src/import/chips/p9/procedures/hwp/memory/lib/mc/xlate.C b/src/import/chips/p9/procedures/hwp/memory/lib/mc/xlate.C
index 17563fc83..bdee48e3c 100644
--- a/src/import/chips/p9/procedures/hwp/memory/lib/mc/xlate.C
+++ b/src/import/chips/p9/procedures/hwp/memory/lib/mc/xlate.C
@@ -936,7 +936,7 @@ fapi2::ReturnCode xlate_dimm_2R2T8Gbx4( const dimm::kind& i_kind,
// We're basically a 2R 4Gbx4 with an extra row. So lets setup like we're one of those,
// add row 16 and shift the D bit as needed.
- xlate_dimm_2R2T4Gbx4(i_kind, i_offset, i_largest, io_xlate0, io_xlate1, io_xlate2);
+ FAPI_TRY(xlate_dimm_2R2T4Gbx4(i_kind, i_offset, i_largest, io_xlate0, io_xlate1, io_xlate2));
// Tell the MC which of the row bits are valid, and map the DIMM selector
// We're a 17 row DIMM, so ROW16 is valid.
@@ -1941,7 +1941,7 @@ fapi2::ReturnCode setup_xlate_map_helper( std::vector<dimm::kind>& io_dimm_kinds
set_DIMM_TYPE(k.iv_dimm_type).
set_ROWS(k.iv_rows).
set_SIZE(k.iv_size),
- "no address translation funtion for DIMM %s %dMR (%d total ranks) %dGbx%d (%dGB) %d rows in slot %d",
+ "no address translation function for DIMM %s %dMR (%d total ranks) %dGbx%d (%dGB) %d rows in slot %d",
mss::c_str(k.iv_target),
k.iv_master_ranks,
k.iv_total_ranks,
diff --git a/src/import/chips/p9/procedures/hwp/memory/lib/phy/ddr_phy.C b/src/import/chips/p9/procedures/hwp/memory/lib/phy/ddr_phy.C
index 86a8621fa..e1e63fec5 100644
--- a/src/import/chips/p9/procedures/hwp/memory/lib/phy/ddr_phy.C
+++ b/src/import/chips/p9/procedures/hwp/memory/lib/phy/ddr_phy.C
@@ -521,6 +521,11 @@ fapi2::ReturnCode process_initial_cal_errors( const fapi2::Target<TARGET_TYPE_DI
uint64_t l_rank_pairs = 0;
uint8_t cal_abort_on_error = 0;
+ // This boolean tells the code whether we took a training fail or a scom fail reading the status registers
+ // It starts as false, given that we need to read out the registers
+ // When we start checking all of the values of the status registers, it gets set to true
+ bool l_check_firs = false;
+
const auto& l_mca = mss::find_target<fapi2::TARGET_TYPE_MCA>(i_target);
fapi2::buffer<uint64_t> l_err_data;
@@ -550,6 +555,9 @@ fapi2::ReturnCode process_initial_cal_errors( const fapi2::Target<TARGET_TYPE_DI
}
// Error information from other registers is gathered in the FFDC from the XML
+ // From here on out, check the FIRs
+ // Using this boolean to avoid having to check the FIR's after each assert below
+ l_check_firs = true;
// So we can do a few things here. If we're aborting on the first calibration error,
// we only expect to have one error bit set. If we ran all the calibrations, we can
@@ -692,7 +700,8 @@ fapi_try_exit:
(fapi2::current_err == fapi2::FAPI2_RC_SUCCESS ? "success" : "errors reported"),
mss::c_str(l_mca));
- return fapi2::current_err;
+ // Checks the FIR's, if need be
+ return mss::check::fir_or_pll_fail( i_target, fapi2::current_err, l_check_firs);
}
///
diff --git a/src/import/chips/p9/procedures/hwp/memory/lib/phy/dp16.C b/src/import/chips/p9/procedures/hwp/memory/lib/phy/dp16.C
index 0e346881a..129c37515 100644
--- a/src/import/chips/p9/procedures/hwp/memory/lib/phy/dp16.C
+++ b/src/import/chips/p9/procedures/hwp/memory/lib/phy/dp16.C
@@ -52,6 +52,7 @@
#include <generic/memory/lib/utils/c_str.H>
#include <lib/workarounds/dp16_workarounds.H>
+#include <lib/fir/check.H>
#include <generic/memory/lib/utils/mss_math.H>
using fapi2::TARGET_TYPE_MCS;
@@ -3260,6 +3261,22 @@ fapi_try_exit:
///
fapi2::ReturnCode record_bad_bits( const fapi2::Target<fapi2::TARGET_TYPE_MCA>& i_target )
{
+ // If we have a FIR set that could have caused our training fail, then skip checking bad bits in FW
+ // PRD will handle the FIR and retrigger the procedure
+#ifdef __HOSTBOOT_MODULE
+ bool l_fir_error = false;
+ FAPI_TRY(mss::check::bad_fir_bits(i_target, l_fir_error), "%s took an error while checking FIR's",
+ mss::c_str(i_target));
+
+ // Exit if we took a FIR error - PRD will handle bad bits
+ if(l_fir_error)
+ {
+ FAPI_INF("%s has FIR's set, exiting to let PRD handle it", mss::c_str(i_target));
+ return fapi2::FAPI2_RC_SUCCESS;
+ }
+
+#endif
+
for( const auto& d : mss::find_targets<fapi2::TARGET_TYPE_DIMM>(i_target) )
{
uint8_t l_data[MAX_RANK_PER_DIMM][BAD_DQ_BYTE_COUNT] = {};
@@ -3367,11 +3384,17 @@ fapi2::ReturnCode process_rdvref_cal_errors( const fapi2::Target<fapi2::TARGET_T
size_t l_index = 0;
std::vector<fapi2::buffer<uint64_t>> l_data;
+ // Boolean to keep track of if a fail was calibration related, or scom related
+ bool l_cal_fail = false;
+
// Suck all the cal error bits out ...
FAPI_TRY( mss::scom_suckah(l_mca, TT::RD_VREF_CAL_ERROR_REG, l_data) );
FAPI_INF("%s Processing RD_VREF_CAL_ERROR", mss::c_str(i_target));
+ // From here on out, the FIR's are all cal fails
+ l_cal_fail = true;
+
for (const auto& v : l_data)
{
// They should all be 0's. If they're not, we have a problem.
@@ -3383,14 +3406,17 @@ fapi2::ReturnCode process_rdvref_cal_errors( const fapi2::Target<fapi2::TARGET_T
.set_VALUE(v),
"DP16 failed read vref calibration on %s. register 0x%016lx value 0x%016lx",
mss::c_str(l_mca), TT::RD_VREF_CAL_ERROR_REG[l_index], v);
+
++l_index;
}
- FAPI_INF("RD_VREF_CAL_ERROR complete");
+ FAPI_INF("%s RD_VREF_CAL_ERROR complete", mss::c_str(i_target));
return fapi2::FAPI2_RC_SUCCESS;
fapi_try_exit:
- return fapi2::current_err;
+
+ // If the FIR's are cal fails, then check to see if FIRs or PLL fails were the cause
+ return mss::check::fir_or_pll_fail( i_target, fapi2::current_err, l_cal_fail);
}
///
@@ -3412,10 +3438,16 @@ fapi2::ReturnCode process_wrvref_cal_errors( const fapi2::Target<fapi2::TARGET_T
std::vector<std::pair<fapi2::buffer<uint64_t>, fapi2::buffer<uint64_t>>> l_data;
std::vector<std::pair<fapi2::buffer<uint64_t>, fapi2::buffer<uint64_t>>> l_mask;
+ // Boolean to keep track of if a fail was calibration related, or scom related
+ bool l_cal_fail = false;
+
// Suck all the cal error bits out ...
FAPI_TRY( mss::scom_suckah(l_mca, TT::WR_VREF_ERROR_REG, l_data) );
FAPI_TRY( mss::scom_suckah(l_mca, TT::WR_VREF_ERROR_MASK_REG, l_mask) );
+ // From here on out, the FIR's are all cal fails
+ l_cal_fail = true;
+
// Loop through both data and mask
{
// Note: ideally these would be cbegin/cend, but HB doesn't support constant iterators for vectors
@@ -3480,11 +3512,13 @@ fapi2::ReturnCode process_wrvref_cal_errors( const fapi2::Target<fapi2::TARGET_T
}
}
- FAPI_INF("WRVREF_CAL_ERROR complete");
+ FAPI_INF("%s WRVREF_CAL_ERROR complete", mss::c_str(i_target));
return fapi2::FAPI2_RC_SUCCESS;
fapi_try_exit:
- return fapi2::current_err;
+
+ // If the FIR's are cal fails, then check to see if FIR's were the cause
+ return mss::check::fir_or_pll_fail( i_target, fapi2::current_err, l_cal_fail);
}
///
diff --git a/src/import/chips/p9/procedures/hwp/memory/lib/shared/mss_const.H b/src/import/chips/p9/procedures/hwp/memory/lib/shared/mss_const.H
index cf6a871e5..d6e5c4f53 100644
--- a/src/import/chips/p9/procedures/hwp/memory/lib/shared/mss_const.H
+++ b/src/import/chips/p9/procedures/hwp/memory/lib/shared/mss_const.H
@@ -199,6 +199,7 @@ enum ffdc_functions
RD_CTR_WORKAROUND_READ_DATA = 7,
OVERRIDE_ODT_WR_CONFIG = 8,
RECORD_BAD_BITS_HELPER = 9,
+ SET_PAIR_VALID = 10,
};
// Static consts describing the bits used in the cal_step_enable attribute
// These are bit positions. 0 is the left most bit.
diff --git a/src/import/chips/p9/procedures/hwp/memory/lib/workarounds/dp16_workarounds.C b/src/import/chips/p9/procedures/hwp/memory/lib/workarounds/dp16_workarounds.C
index 95dbe26f8..a5fa507bb 100644
--- a/src/import/chips/p9/procedures/hwp/memory/lib/workarounds/dp16_workarounds.C
+++ b/src/import/chips/p9/procedures/hwp/memory/lib/workarounds/dp16_workarounds.C
@@ -47,6 +47,7 @@
#include <lib/phy/phy_cntrl.H>
#include <lib/dimm/rank.H>
#include <lib/utils/bit_count.H>
+#include <lib/fir/check.H>
namespace mss
{
@@ -547,10 +548,13 @@ fapi2::ReturnCode dqs_align_workaround(const fapi2::Target<fapi2::TARGET_TYPE_MC
// If we can't, exit with success
if (! chip_ec_feature_mss_dqs_workaround(i_target) )
{
- FAPI_DBG("Skipping DQS workaround because of ec feature attribute");
+ FAPI_DBG("%s Skipping DQS workaround because of ec feature attribute", mss::c_str(i_target));
return fapi2::FAPI2_RC_SUCCESS;
}
+ // Boolean to keep track of if a fail was calibration related, or scom related
+ bool l_cal_fail = false;
+
FAPI_TRY( eff_dram_width( i_target, l_dram_width) );
l_is_x8 = ((l_dram_width[0] == fapi2::ENUM_ATTR_EFF_DRAM_WIDTH_X8) ||
@@ -603,6 +607,8 @@ fapi2::ReturnCode dqs_align_workaround(const fapi2::Target<fapi2::TARGET_TYPE_MC
// Clear all disable bits - this will cause calibration to re-run everything that failed, including WR LVL fails
FAPI_TRY(mss::workarounds::dp16::dqs_align::reset_disables(i_target, i_rp));
+ // Next, we're checking for CAL fails, so make sure to check the FIR's below
+ l_cal_fail = true;
// If the loop timed out, bomb out
// If this is firmware, they'll log it as info and run to memdiags
@@ -617,11 +623,16 @@ fapi2::ReturnCode dqs_align_workaround(const fapi2::Target<fapi2::TARGET_TYPE_MC
"%s i_rp %lu DQS workaround failed! 10 loops reached without everything passing",
mss::c_str(i_target), i_rp);
+ // Below, the errors are scom related, no need to check the FIR's
+ l_cal_fail = false;
+
// Now plop the delays back in to the registers
FAPI_TRY(mss::workarounds::dp16::dqs_align::set_passing_values( i_target, i_rp, l_passing_values));
fapi_try_exit:
- return fapi2::current_err;
+
+ // If the FIR's are cal fails, then check to see if FIR's or PLL's could be the cause
+ return mss::check::fir_or_pll_fail(i_target, fapi2::current_err, l_cal_fail);
}
///
@@ -777,7 +788,8 @@ fapi_try_exit:
/// @param[in,out] io_passing_values - the passing values, a map from the DQS number to the value
/// @return fapi2::ReturnCode FAPI2_RC_SUCCESS if ok
///
-fapi2::ReturnCode record_passing_values( const fapi2::Target<fapi2::TARGET_TYPE_MCA>& i_target, const uint64_t i_rp,
+fapi2::ReturnCode record_passing_values( const fapi2::Target<fapi2::TARGET_TYPE_MCA>& i_target,
+ const uint64_t i_rp,
std::map<uint64_t, uint64_t>& io_passing_values)
{
// Traits declaration
diff --git a/src/import/chips/p9/procedures/hwp/memory/p9_mss_draminit_training.C b/src/import/chips/p9/procedures/hwp/memory/p9_mss_draminit_training.C
index 533a53905..b4de8bd90 100644
--- a/src/import/chips/p9/procedures/hwp/memory/p9_mss_draminit_training.C
+++ b/src/import/chips/p9/procedures/hwp/memory/p9_mss_draminit_training.C
@@ -68,7 +68,7 @@ extern "C"
std::vector<fapi2::ReturnCode> l_fails;
- FAPI_INF("Start draminit training");
+ FAPI_INF("%s Start draminit training", mss::c_str(i_target));
// If there are no DIMM we don't need to bother. In fact, we can't as we didn't setup
// attributes for the PHY, etc.
OpenPOWER on IntegriCloud