diff options
author | Jacob Harvey <jlharvey@us.ibm.com> | 2017-08-31 17:54:14 -0500 |
---|---|---|
committer | Daniel M. Crowell <dcrowell@us.ibm.com> | 2017-09-07 14:41:06 -0400 |
commit | f18a5784ca82e3e44572fa55ac36673736d9eb66 (patch) | |
tree | d987fb5a85eae0919f98874905a3bea8916c624c /src | |
parent | 241d612d4398d7cf38274e812f84f0b4b9d9b4cb (diff) | |
download | talos-hostboot-f18a5784ca82e3e44572fa55ac36673736d9eb66.tar.gz talos-hostboot-f18a5784ca82e3e44572fa55ac36673736d9eb66.zip |
Add FIR checking to training error checking
Change-Id: I7de696f3724a3a3b10650790d481ecb1130d6d7c
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/45580
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Tested-by: Hostboot CI <hostboot-ci+hostboot@us.ibm.com>
Reviewed-by: STEPHEN GLANCY <sglancy@us.ibm.com>
Reviewed-by: ANDRE A. MARIN <aamarin@us.ibm.com>
Reviewed-by: Jennifer A. Stofer <stofer@us.ibm.com>
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/45583
Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com>
Reviewed-by: Daniel M. Crowell <dcrowell@us.ibm.com>
Diffstat (limited to 'src')
4 files changed, 112 insertions, 3 deletions
diff --git a/src/import/chips/p9/procedures/hwp/memory/lib/fir/check.C b/src/import/chips/p9/procedures/hwp/memory/lib/fir/check.C index ed1cbb30b..e0615943f 100644 --- a/src/import/chips/p9/procedures/hwp/memory/lib/fir/check.C +++ b/src/import/chips/p9/procedures/hwp/memory/lib/fir/check.C @@ -181,6 +181,52 @@ fapi_try_exit: return fapi2::current_err; } +/// +/// @brief Check FIR bits during draminit training +/// @param[in] i_target the dimm that was trained +/// @note We check for fir errors after training each rank +/// to see if there was a problem with the engine. +/// FFDC errors returned from this will be handled similar to other training errors: +/// Logged as informational if it affects less than a nibble and a bit. +/// Reported if it affects more than that +/// @return fapi2::ReturnCode FAPI2_RC_SUCCESS iff ok +/// +template<> +fapi2::ReturnCode during_draminit_training( const fapi2::Target<fapi2::TARGET_TYPE_DIMM>& i_target) +{ + const auto& l_mca = mss::find_target<fapi2::TARGET_TYPE_MCA>(i_target); + + // Creating a mask to check for FIR errors. + // These are DP16 parity errors that would be triggered in case of a general PHY error + // During draminit_training, this would mean a training error dealing with the PHY + fapi2::buffer<uint64_t> l_phyfir_mask; + l_phyfir_mask.setBit<MCA_IOM_PHY0_DDRPHY_FIR_REG_ERROR_0>() + .setBit<MCA_IOM_PHY0_DDRPHY_FIR_REG_ERROR_1>() + .setBit<MCA_IOM_PHY0_DDRPHY_FIR_REG_ERROR_2>() + .setBit<MCA_IOM_PHY0_DDRPHY_FIR_REG_ERROR_3>() + .setBit<MCA_IOM_PHY0_DDRPHY_FIR_REG_ERROR_4>() + .setBit<MCA_IOM_PHY0_DDRPHY_FIR_REG_ERROR_5>() + .setBit<MCA_IOM_PHY0_DDRPHY_FIR_REG_ERROR_6>() + .setBit<MCA_IOM_PHY0_DDRPHY_FIR_REG_ERROR_7>(); + + fapi2::buffer<uint64_t> l_phyfir_data; + fapi2::buffer<uint64_t> l_phyfir_masked; + + FAPI_TRY( mss::getScom(l_mca, MCA_IOM_PHY0_DDRPHY_FIR_REG, l_phyfir_data) ); + + l_phyfir_masked = l_phyfir_data & l_phyfir_mask; + + FAPI_ASSERT( l_phyfir_masked == 0, + fapi2::MSS_DRAMINIT_TRAINING_PORT_FIR() + .set_PHY_FIR(l_phyfir_masked) + .set_DIMM_TARGET(i_target) + .set_MCA_TARGET(l_mca), + "Initial CAL failed: Reporting FIR bits set for %s ( phy: 0x%016lx", + mss::c_str(i_target), l_phyfir_masked); + +fapi_try_exit: + return fapi2::current_err; +} } } diff --git a/src/import/chips/p9/procedures/hwp/memory/lib/fir/check.H b/src/import/chips/p9/procedures/hwp/memory/lib/fir/check.H index 27f884940..ded638e49 100644 --- a/src/import/chips/p9/procedures/hwp/memory/lib/fir/check.H +++ b/src/import/chips/p9/procedures/hwp/memory/lib/fir/check.H @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2016 */ +/* Contributors Listed Below - COPYRIGHT 2016,2017 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -56,6 +56,19 @@ namespace check template< fapi2::TargetType T > fapi2::ReturnCode during_phy_reset( const fapi2::Target<T>& i_target ); +/// +/// @brief Check FIR bits during draminit training +/// @param[in] i_target the dimm that was trained +/// @note We check for fir errors after training each rank +/// to see if there was a problem with the engine +/// FFDC errors return from this will be handle similar to other training errors +/// Logged if it affects less than a nibble and a bit. +/// Reported if it affects more than that +/// @return fapi2::ReturnCode FAPI2_RC_SUCCESS iff ok +/// +template< fapi2::TargetType T > +fapi2::ReturnCode during_draminit_training( const fapi2::Target<T>& i_target ); + } } #endif diff --git a/src/import/chips/p9/procedures/hwp/memory/lib/phy/ddr_phy.C b/src/import/chips/p9/procedures/hwp/memory/lib/phy/ddr_phy.C index 4aa6a48bd..ca275de9c 100644 --- a/src/import/chips/p9/procedures/hwp/memory/lib/phy/ddr_phy.C +++ b/src/import/chips/p9/procedures/hwp/memory/lib/phy/ddr_phy.C @@ -46,6 +46,7 @@ #include <lib/phy/adr32s.H> #include <lib/phy/adr.H> #include <lib/phy/seq.H> +#include <lib/fir/check.H> #include <lib/workarounds/dp16_workarounds.H> #include <lib/workarounds/wr_vref_workarounds.H> #include <lib/dimm/ddr4/latch_wr_vref.H> @@ -540,7 +541,11 @@ fapi2::ReturnCode process_initial_cal_errors( const fapi2::Target<TARGET_TYPE_DI if ((l_rank_pairs == 0) || (l_errors == 0)) { - FAPI_INF("Initial cal - no errors reported %s", mss::c_str(l_mca)); + // If we got here, we check the phy firs to see if the engine had a problem + // If there's no FIRs lit up, we return SUCCESS + // If there's a FIR, we return a general error that will trigger a BAD_DQ check by the calling function + FAPI_TRY( mss::check::during_draminit_training(i_target) ); + FAPI_INF("Initial cal success %s", mss::c_str(l_mca)); return fapi2::FAPI2_RC_SUCCESS; } @@ -683,6 +688,10 @@ fapi2::ReturnCode process_initial_cal_errors( const fapi2::Target<TARGET_TYPE_DI ); fapi_try_exit: + FAPI_INF("Initial cal - %s %s", + (fapi2::current_err == fapi2::FAPI2_RC_SUCCESS ? "success" : "errors reported"), + mss::c_str(l_mca)); + return fapi2::current_err; } @@ -750,8 +759,9 @@ fapi2::ReturnCode find_and_log_cal_errors(const fapi2::Target<fapi2::TARGET_TYPE } } - FAPI_ERR("Seeing calibration errors for p9_mss_draminit_training %s: Keep running? %s", + FAPI_ERR("Seeing calibration errors for p9_mss_draminit_training %s rp %d: Keep running? %s", mss::c_str(l_dimm), + i_rp, (l_rc == fapi2::FAPI2_RC_SUCCESS) ? "Yes" : "no"); // Let's update the attribute with the failing DQ bits since we had a training error diff --git a/src/import/chips/p9/procedures/xml/error_info/p9_memory_mss_draminit_training.xml b/src/import/chips/p9/procedures/xml/error_info/p9_memory_mss_draminit_training.xml index 64a5d3d74..51286282a 100644 --- a/src/import/chips/p9/procedures/xml/error_info/p9_memory_mss_draminit_training.xml +++ b/src/import/chips/p9/procedures/xml/error_info/p9_memory_mss_draminit_training.xml @@ -98,6 +98,46 @@ </registerFfdc> <hwpError> + <rc>RC_MSS_DRAMINIT_TRAINING_PORT_FIR</rc> + <description> + A PHY fir was lit up due to draminit training. + There could be a problem with the training engine + Checking fir bits 1-7 + </description> + <ffdc>PHY_FIR</ffdc> + <ffdc>DIMM_TARGET</ffdc> + <collectRegisterFfdc> + <id>REG_FFDC_MSS_DRAMINIT_TRAINING_ERROR_INFO</id> + <target>MCA_TARGET</target> + <targetType>TARGET_TYPE_MCA</targetType> + </collectRegisterFfdc> + <collectRegisterFfdc> + <id>REG_FFDC_MSS_DRAMINIT_TRAINING_FAILURE_DISABLE_REGS</id> + <target>MCA_TARGET</target> + <targetType>TARGET_TYPE_MCA</targetType> + </collectRegisterFfdc> + <collectRegisterFfdc> + <id>REG_FFDC_MSS_DRAMINIT_TRAINING_ERROR_STATUS</id> + <target>MCA_TARGET</target> + <targetType>TARGET_TYPE_MCA</targetType> + </collectRegisterFfdc> + <callout> + <target>DIMM_TARGET</target> + <priority>HIGH</priority> + </callout> + <deconfigure> + <target>DIMM_TARGET</target> + </deconfigure> + <gard> + <target>DIMM_TARGET</target> + </gard> + <callout> + <procedure>CODE</procedure> + <priority>MEDIUM</priority> + </callout> +</hwpError> + +<hwpError> <rc>RC_MSS_DRAMINIT_TRAINING_MULTIPLE_ERRORS</rc> <description>Multiple training steps failed for a given position within this calibration.</description> <ffdc>FAILED_STEPS</ffdc> |