From e6bca1c97afaea818b8f41a3e39d0eb25910219d Mon Sep 17 00:00:00 2001 From: Brian Silver Date: Thu, 27 Oct 2016 15:57:15 -0500 Subject: Change bad bit processing to process bad bit attributes Better process rank-pairs rather than per-DP Change-Id: Iab8e21a934368fcf201f0e7b91aa8b859b3b0e47 Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/31926 Reviewed-by: STEPHEN GLANCY Dev-Ready: Brian R. Silver Tested-by: Jenkins Server Tested-by: Hostboot CI Reviewed-by: ANDRE A. MARIN Reviewed-by: Brian R. Silver Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/31928 Tested-by: Jenkins OP Build CI Tested-by: FSP CI Jenkins Reviewed-by: Christian R. Geddes --- .../hwp/memory/lib/mss_attribute_accessors.H | 99 +++++++ .../p9/procedures/hwp/memory/lib/phy/ddr_phy.C | 40 ++- .../chips/p9/procedures/hwp/memory/lib/phy/dp16.C | 330 ++++++++++++++++----- .../chips/p9/procedures/hwp/memory/lib/phy/dp16.H | 58 ++-- .../procedures/hwp/memory/lib/utils/conversions.H | 4 +- .../hwp/memory/p9_mss_draminit_training.C | 21 ++ .../xml/attribute_info/memory_dq_attributes.xml | 49 ++- .../error_info/p9_memory_mss_draminit_training.xml | 48 +-- 8 files changed, 509 insertions(+), 140 deletions(-) (limited to 'src/import') diff --git a/src/import/chips/p9/procedures/hwp/memory/lib/mss_attribute_accessors.H b/src/import/chips/p9/procedures/hwp/memory/lib/mss_attribute_accessors.H index 0a1b3c896..e1cb7e4ac 100644 --- a/src/import/chips/p9/procedures/hwp/memory/lib/mss_attribute_accessors.H +++ b/src/import/chips/p9/procedures/hwp/memory/lib/mss_attribute_accessors.H @@ -26029,6 +26029,105 @@ fapi_try_exit: return fapi2::current_err; } +/// +/// @brief ATTR_BAD_DQ_BITMAP getter +/// @param[in] const ref to the fapi2::Target +/// @param[out] uint8_t* memory to store the value +/// @note Generated by gen_accessors.pl generateParameters (A) +/// @return fapi2::ReturnCode - FAPI2_RC_SUCCESS iff get is OK +/// @note Bad DQ bitmap from a controller point of view. The data is a 10 byte bitmap for +/// each of 4 possible ranks. The bad DQ data is stored in NVRAM, and it is stored +/// in a special format translated to a DIMM Connector point of view. All of these +/// details are hidden from the user of this +/// attribute. +/// +inline fapi2::ReturnCode bad_dq_bitmap(const fapi2::Target& i_target, uint8_t* o_array) +{ + if (o_array == nullptr) + { + FAPI_ERR("nullptr passed to attribute accessor %s", __func__); + return fapi2::FAPI2_RC_INVALID_PARAMETER; + } + + uint8_t l_value[2][2][4][10]; + auto l_mca = i_target.getParent(); + auto l_mcs = l_mca.getParent(); + + FAPI_TRY( FAPI_ATTR_GET(fapi2::ATTR_BAD_DQ_BITMAP, l_mcs, l_value) ); + memcpy(o_array, &(l_value[mss::index(l_mca)][mss::index(i_target)][0]), 40); + return fapi2::current_err; + +fapi_try_exit: + FAPI_ERR("failed accessing ATTR_BAD_DQ_BITMAP: 0x%lx (target: %s)", + uint64_t(fapi2::current_err), mss::c_str(i_target)); + return fapi2::current_err; +} + +/// +/// @brief ATTR_BAD_DQ_BITMAP getter +/// @param[in] const ref to the fapi2::Target +/// @param[out] uint8_t* memory to store the value +/// @note Generated by gen_accessors.pl generateParameters (B) +/// @return fapi2::ReturnCode - FAPI2_RC_SUCCESS iff get is OK +/// @note Bad DQ bitmap from a controller point of view. The data is a 10 byte bitmap for +/// each of 4 possible ranks. The bad DQ data is stored in NVRAM, and it is stored +/// in a special format translated to a DIMM Connector point of view. All of these +/// details are hidden from the user of this +/// attribute. +/// +inline fapi2::ReturnCode bad_dq_bitmap(const fapi2::Target& i_target, uint8_t* o_array) +{ + if (o_array == nullptr) + { + FAPI_ERR("nullptr passed to attribute accessor %s", __func__); + return fapi2::FAPI2_RC_INVALID_PARAMETER; + } + + uint8_t l_value[2][2][4][10]; + auto l_mcs = i_target.getParent(); + + FAPI_TRY( FAPI_ATTR_GET(fapi2::ATTR_BAD_DQ_BITMAP, l_mcs, l_value) ); + memcpy(o_array, &(l_value[mss::index(i_target)][0]), 80); + return fapi2::current_err; + +fapi_try_exit: + FAPI_ERR("failed accessing ATTR_BAD_DQ_BITMAP: 0x%lx (target: %s)", + uint64_t(fapi2::current_err), mss::c_str(i_target)); + return fapi2::current_err; +} + +/// +/// @brief ATTR_BAD_DQ_BITMAP getter +/// @param[in] const ref to the fapi2::Target +/// @param[out] uint8_t* memory to store the value +/// @note Generated by gen_accessors.pl generateParameters (C) +/// @return fapi2::ReturnCode - FAPI2_RC_SUCCESS iff get is OK +/// @note Bad DQ bitmap from a controller point of view. The data is a 10 byte bitmap for +/// each of 4 possible ranks. The bad DQ data is stored in NVRAM, and it is stored +/// in a special format translated to a DIMM Connector point of view. All of these +/// details are hidden from the user of this +/// attribute. +/// +inline fapi2::ReturnCode bad_dq_bitmap(const fapi2::Target& i_target, uint8_t* o_array) +{ + if (o_array == nullptr) + { + FAPI_ERR("nullptr passed to attribute accessor %s", __func__); + return fapi2::FAPI2_RC_INVALID_PARAMETER; + } + + uint8_t l_value[2][2][4][10]; + + FAPI_TRY( FAPI_ATTR_GET(fapi2::ATTR_BAD_DQ_BITMAP, i_target, l_value) ); + memcpy(o_array, &l_value, 160); + return fapi2::current_err; + +fapi_try_exit: + FAPI_ERR("failed accessing ATTR_BAD_DQ_BITMAP: 0x%lx (target: %s)", + uint64_t(fapi2::current_err), mss::c_str(i_target)); + return fapi2::current_err; +} + /// /// @brief ATTR_FREQ_PROC_REFCLOCK_KHZ getter diff --git a/src/import/chips/p9/procedures/hwp/memory/lib/phy/ddr_phy.C b/src/import/chips/p9/procedures/hwp/memory/lib/phy/ddr_phy.C index e59742159..1b9dedada 100644 --- a/src/import/chips/p9/procedures/hwp/memory/lib/phy/ddr_phy.C +++ b/src/import/chips/p9/procedures/hwp/memory/lib/phy/ddr_phy.C @@ -540,7 +540,8 @@ fapi2::ReturnCode process_initial_cal_errors( const fapi2::Target& i_target // Section 5.2.4.3 DP16 Data Bit Disable 0 on page 288 // Section 5.2.4.4 DP16 Data Bit Disable 1 on page 289 FAPI_TRY( mss::dp16::reset_data_bit_enable(p) ); + + // Not going to load bad bits from the attributes until after f/w bring up +#ifdef LOAD_BAD_BITS_FROM_ATTR FAPI_TRY( mss::dp16::reset_bad_bits(p) ); +#endif // New for Nimbus reset the DLL FAPI_TRY( mss::dp16::reset_dll(p) ); diff --git a/src/import/chips/p9/procedures/hwp/memory/lib/phy/dp16.C b/src/import/chips/p9/procedures/hwp/memory/lib/phy/dp16.C index ac2f4353a..1472d5426 100644 --- a/src/import/chips/p9/procedures/hwp/memory/lib/phy/dp16.C +++ b/src/import/chips/p9/procedures/hwp/memory/lib/phy/dp16.C @@ -50,6 +50,7 @@ #include #include +using fapi2::TARGET_TYPE_MCS; using fapi2::TARGET_TYPE_MCA; using fapi2::TARGET_TYPE_DIMM; using fapi2::TARGET_TYPE_MCBIST; @@ -410,6 +411,39 @@ const std::vector dp16Traits::READ_DELAY_OFFSET_REG = MCA_DDRPHY_DP16_READ_DELAY_OFFSET1_RANK_PAIR3_P0_4, }; +// Definition of the DISABLE registers, per dp per rank pair +const std::vector< std::vector> > dp16Traits::BIT_DISABLE_REG = +{ + { + { MCA_DDRPHY_DP16_DATA_BIT_DISABLE0_RP0_P0_0, MCA_DDRPHY_DP16_DATA_BIT_DISABLE1_RP0_P0_0 }, + { MCA_DDRPHY_DP16_DATA_BIT_DISABLE0_RP0_P0_1, MCA_DDRPHY_DP16_DATA_BIT_DISABLE1_RP0_P0_1 }, + { MCA_DDRPHY_DP16_DATA_BIT_DISABLE0_RP0_P0_2, MCA_DDRPHY_DP16_DATA_BIT_DISABLE1_RP0_P0_2 }, + { MCA_DDRPHY_DP16_DATA_BIT_DISABLE0_RP0_P0_3, MCA_DDRPHY_DP16_DATA_BIT_DISABLE1_RP0_P0_3 }, + { MCA_DDRPHY_DP16_DATA_BIT_DISABLE0_RP0_P0_4, MCA_DDRPHY_DP16_DATA_BIT_DISABLE1_RP0_P0_4 }, + }, + { + { MCA_DDRPHY_DP16_DATA_BIT_DISABLE0_RP1_P0_0, MCA_DDRPHY_DP16_DATA_BIT_DISABLE1_RP1_P0_0 }, + { MCA_DDRPHY_DP16_DATA_BIT_DISABLE0_RP1_P0_1, MCA_DDRPHY_DP16_DATA_BIT_DISABLE1_RP1_P0_1 }, + { MCA_DDRPHY_DP16_DATA_BIT_DISABLE0_RP1_P0_2, MCA_DDRPHY_DP16_DATA_BIT_DISABLE1_RP1_P0_2 }, + { MCA_DDRPHY_DP16_DATA_BIT_DISABLE0_RP1_P0_3, MCA_DDRPHY_DP16_DATA_BIT_DISABLE1_RP1_P0_3 }, + { MCA_DDRPHY_DP16_DATA_BIT_DISABLE0_RP1_P0_4, MCA_DDRPHY_DP16_DATA_BIT_DISABLE1_RP1_P0_4 }, + }, + { + { MCA_DDRPHY_DP16_DATA_BIT_DISABLE0_RP2_P0_0, MCA_DDRPHY_DP16_DATA_BIT_DISABLE1_RP2_P0_0 }, + { MCA_DDRPHY_DP16_DATA_BIT_DISABLE0_RP2_P0_1, MCA_DDRPHY_DP16_DATA_BIT_DISABLE1_RP2_P0_1 }, + { MCA_DDRPHY_DP16_DATA_BIT_DISABLE0_RP2_P0_2, MCA_DDRPHY_DP16_DATA_BIT_DISABLE1_RP2_P0_2 }, + { MCA_DDRPHY_DP16_DATA_BIT_DISABLE0_RP2_P0_3, MCA_DDRPHY_DP16_DATA_BIT_DISABLE1_RP2_P0_3 }, + { MCA_DDRPHY_DP16_DATA_BIT_DISABLE0_RP2_P0_4, MCA_DDRPHY_DP16_DATA_BIT_DISABLE1_RP2_P0_4 }, + }, + { + { MCA_DDRPHY_DP16_DATA_BIT_DISABLE0_RP3_P0_0, MCA_DDRPHY_DP16_DATA_BIT_DISABLE1_RP3_P0_0 }, + { MCA_DDRPHY_DP16_DATA_BIT_DISABLE0_RP3_P0_1, MCA_DDRPHY_DP16_DATA_BIT_DISABLE1_RP3_P0_1 }, + { MCA_DDRPHY_DP16_DATA_BIT_DISABLE0_RP3_P0_2, MCA_DDRPHY_DP16_DATA_BIT_DISABLE1_RP3_P0_2 }, + { MCA_DDRPHY_DP16_DATA_BIT_DISABLE0_RP3_P0_3, MCA_DDRPHY_DP16_DATA_BIT_DISABLE1_RP3_P0_3 }, + { MCA_DDRPHY_DP16_DATA_BIT_DISABLE0_RP3_P0_4, MCA_DDRPHY_DP16_DATA_BIT_DISABLE1_RP3_P0_4 }, + }, +}; + /// /// @brief Given a RD_VREF value, create a PHY 'standard' bit field for that percentage. /// @tparam T fapi2 Target Type - derived @@ -1765,6 +1799,8 @@ fapi2::ReturnCode process_bad_bits( const fapi2::Target& i_targ const fapi2::Target& i_dimm, const uint64_t l_rp ) { + typedef dp16Traits TT; + // In a x4 configuration, all bits in the disable registers are used. // Named like a local variable so it matches the x8 vector constexpr uint8_t l_x4_mask = 0b11111111; @@ -1856,8 +1892,6 @@ fapi2::ReturnCode process_bad_bits( const fapi2::Target& i_targ }; // The field in the disable bit register address which specifies which rp the register is for. - constexpr uint64_t RP_START_BIT = 22; - constexpr uint64_t RP_LEN = 2; constexpr uint64_t RP_OFFSET = 60; // The DQS bits (disable 1) are left aligned in a 16 bit register and we have a @@ -1883,32 +1917,23 @@ fapi2::ReturnCode process_bad_bits( const fapi2::Target& i_targ fapi2::buffer l_rpb(l_rp); std::vector< std::pair, fapi2::buffer > > l_read; - // A tasty vector of the disable bits for RP0. We'll add in the RP bits before we scom. - std::vector> l_addr = - { - { MCA_DDRPHY_DP16_DATA_BIT_DISABLE0_RP0_P0_0, MCA_DDRPHY_DP16_DATA_BIT_DISABLE1_RP0_P0_0 }, - { MCA_DDRPHY_DP16_DATA_BIT_DISABLE0_RP0_P0_1, MCA_DDRPHY_DP16_DATA_BIT_DISABLE1_RP0_P0_1 }, - { MCA_DDRPHY_DP16_DATA_BIT_DISABLE0_RP0_P0_2, MCA_DDRPHY_DP16_DATA_BIT_DISABLE1_RP0_P0_2 }, - { MCA_DDRPHY_DP16_DATA_BIT_DISABLE0_RP0_P0_3, MCA_DDRPHY_DP16_DATA_BIT_DISABLE1_RP0_P0_3 }, - { MCA_DDRPHY_DP16_DATA_BIT_DISABLE0_RP0_P0_4, MCA_DDRPHY_DP16_DATA_BIT_DISABLE1_RP0_P0_4 }, - }; - while (l_rpb != 0) { // We increment l_which_dp as soon as we enter the loop below uint64_t l_which_dp = ~0; + // A map of the indexes of the bad nibbles. We do bad nibble checking in two phases; + // the DQS and the DQ. Since bad bits in the DQ/DQS of the nibble are the same nibble, + // we use a map to consolodate the findings. In the end all we care about is whether there + // is more than one entry in this map. + std::map l_bad_nibbles; + uint64_t l_bad_bits = 0; + // Find the first bit set in the rank pairs - this will tell us which rank pair has a fail const auto l_fbs = mss::first_bit_set(uint64_t(l_rpb)) - RP_OFFSET; - // Fix up the vector so it grabs registers for this rank pair - for (auto& r : l_addr) - { - r.first = fapi2::buffer(r.first ).insertFromRight(l_fbs); - r.second = fapi2::buffer(r.second).insertFromRight(l_fbs); - - FAPI_INF("checking bad bits for RP%d (0x%016lX, 0x%016lX)", l_fbs, r.first, r.second); - } + const auto l_addr = TT::BIT_DISABLE_REG[l_fbs]; + FAPI_INF("checking bad bits for RP%d", l_fbs); FAPI_TRY( mss::scom_suckah(i_target, l_addr, l_read) ); @@ -1920,14 +1945,8 @@ fapi2::ReturnCode process_bad_bits( const fapi2::Target& i_targ uint64_t l_dq_bad_bit_count = 0; uint64_t l_dqs_bad_bit_count = 0; - // A map of the indexes of the bad nibbles. We do bad nibble checking in two phases; - // the DQS and the DQ. Since bad bits in the DQ/DQS of the nibble are the same nibble, - // we use a map to consolodate the findings. In the end all we care about is whether there - // is more than one entry in this map. - std::map l_bad_nibbles; - uint64_t l_bad_bits = 0; - l_which_dp += 1; + const uint64_t l_which_nibble = l_which_dp * BITS_PER_NIBBLE; FAPI_INF("read disable0 0x%016lx disable1 0x%016lx", v.first, v.second); @@ -1942,7 +1961,8 @@ fapi2::ReturnCode process_bad_bits( const fapi2::Target& i_targ // // Check for a simple fail. In all cases if we see 6 or more bits set in the - // disable 0, we have a dead nibble + more than one bit. + // disable 0, we have a dead nibble + more than one bit. We don't need to worry about + // disable bits because this port will be deconfigured all together. // l_dq_bad_bit_count = mss::bit_count(uint64_t(v.first)); FAPI_INF("bad DQ count for port %d DP%d %d", l_which_port, l_which_dp, l_dq_bad_bit_count); @@ -1950,7 +1970,6 @@ fapi2::ReturnCode process_bad_bits( const fapi2::Target& i_targ fapi2::MSS_DISABLED_BITS().set_TARGET_IN_ERROR(i_target), "port %d DP%d too many bad DQ bits 0x%016lx", l_which_port, l_which_dp, v.first); - // // Find the DQS mask for this DP. // @@ -1988,13 +2007,13 @@ fapi2::ReturnCode process_bad_bits( const fapi2::Target& i_targ // We shift it over to mask off the nibble we're checking const uint16_t l_dqs_nibble_mask = 0b1100000000000000 >> (n * BITS_PER_DQS); - FAPI_INF("port %d DP%d nibble %d mask: 0x%x dqs: 0x%x", - l_which_port, l_which_dp, n, l_dqs_nibble_mask, v.second); + FAPI_INF("port %d DP%d nibble %d (%d) mask: 0x%x dqs: 0x%x", + l_which_port, l_which_dp, n, n + l_which_nibble, l_dqs_nibble_mask, v.second); if ((l_dqs_nibble_mask & v.second) != 0) { FAPI_INF("dqs check indicating %d as a bad nibble", n); - l_bad_nibbles[n] = 1; + l_bad_nibbles[n + l_which_nibble] = 1; } } @@ -2003,7 +2022,7 @@ fapi2::ReturnCode process_bad_bits( const fapi2::Target& i_targ // we're done. FAPI_ASSERT(l_bad_nibbles.size() <= MAX_BAD_NIBBLES, fapi2::MSS_DISABLED_BITS().set_TARGET_IN_ERROR(i_target), - "port %d DP%d too many bad nibbles %d", l_which_port, l_which_dp, l_bad_nibbles.size()); + "port %d DP%d too many bad nibbles %d", l_which_port, l_which_nibble, l_bad_nibbles.size()); } // @@ -2024,8 +2043,8 @@ fapi2::ReturnCode process_bad_bits( const fapi2::Target& i_targ // we add this bit to the total of bad singleton bits. const uint64_t l_bit_count = mss::bit_count(l_dq_nibble_mask & v.first); - FAPI_INF("port %d DP%d nibble %d mask: 0x%x dq: 0x%x c: %d", - l_which_port, l_which_dp, n, l_dq_nibble_mask, v.first, l_bit_count); + FAPI_INF("port %d DP%d nibble %d (%d) mask: 0x%x dq: 0x%x c: %d", + l_which_port, l_which_dp, n, n + l_which_nibble, l_dq_nibble_mask, v.first, l_bit_count); // If we don't have any set bits, we're good to go. If we have more than the max bad bits, @@ -2035,47 +2054,47 @@ fapi2::ReturnCode process_bad_bits( const fapi2::Target& i_targ { if (l_bit_count > MAX_BAD_BITS) { - FAPI_INF("dq check indicating %d as a bad nibble", n); - l_bad_nibbles[n] = 1; + FAPI_INF("dq check indicating %d (%d) as a bad nibble", n, n + l_which_nibble); + l_bad_nibbles[n + l_which_nibble] = 1; } else { - FAPI_INF("dq check indicating nibble %d has a bad bit", n); + FAPI_INF("dq check indicating nibble %d (%d) has a bad bit", n, n + l_which_nibble); l_bad_bits += l_bit_count; } } } + } + } - // - // Ok, so now we know how many bad bits we have and how many bad nibbles. If we have more than - // one bad nibble, we're cooked. If we have one bad nibble and one bad bit, we're ok. Also, if - // we have no bad nibbles and two bad bits (a sly bad nibble) we are ok - one of those bad bits - // counts as a bad nibble. - // - FAPI_ASSERT(l_bad_nibbles.size() <= MAX_BAD_NIBBLES, - fapi2::MSS_DISABLED_BITS().set_TARGET_IN_ERROR(i_target), - "port %d DP%d too many bad nibbles %d", - l_which_port, l_which_dp, l_bad_nibbles.size()); - - // If we have one bad nibble, assert that we have one or fewer bad bits - if (l_bad_nibbles.size() == MAX_BAD_NIBBLES) - { - FAPI_ASSERT(l_bad_bits <= MAX_BAD_BITS, - fapi2::MSS_DISABLED_BITS().set_TARGET_IN_ERROR(i_target), - "port %d DP%d bad nibbles %d + %d bad bits", - l_which_port, l_which_dp, l_bad_nibbles.size(), l_bad_bits); - } + // + // Ok, so now we know how many bad bits we have and how many bad nibbles. If we have more than + // one bad nibble, we're cooked. If we have one bad nibble and one bad bit, we're ok. Also, if + // we have no bad nibbles and two bad bits (a sly bad nibble) we are ok - one of those bad bits + // counts as a bad nibble. + // + FAPI_ASSERT(l_bad_nibbles.size() <= MAX_BAD_NIBBLES, + fapi2::MSS_DISABLED_BITS().set_TARGET_IN_ERROR(i_target), + "port %d DP%d too many bad nibbles %d", + l_which_port, l_which_dp, l_bad_nibbles.size()); + + // If we have one bad nibble, assert that we have one or fewer bad bits + if (l_bad_nibbles.size() == MAX_BAD_NIBBLES) + { + FAPI_ASSERT(l_bad_bits <= MAX_BAD_BITS, + fapi2::MSS_DISABLED_BITS().set_TARGET_IN_ERROR(i_target), + "port %d DP%d bad nibbles %d + %d bad bits", + l_which_port, l_which_dp, l_bad_nibbles.size(), l_bad_bits); + } - // If we have no bad nibbles, assert we have 2 or fewer bad bits. This is a sly bad nibble - // scenario; one of the bits is represents a bad nibble - if (l_bad_nibbles.size() == 0) - { - FAPI_ASSERT(l_bad_bits <= SLY_BAD_BITS, - fapi2::MSS_DISABLED_BITS().set_TARGET_IN_ERROR(i_target), - "port %d DP%d %d bad bits", - l_which_port, l_which_dp, l_bad_bits); - } - } + // If we have no bad nibbles, assert we have 2 or fewer bad bits. This is a sly bad nibble + // scenario; one of the bits is represents a bad nibble + if (l_bad_nibbles.size() == 0) + { + FAPI_ASSERT(l_bad_bits <= SLY_BAD_BITS, + fapi2::MSS_DISABLED_BITS().set_TARGET_IN_ERROR(i_target), + "port %d DP%d %d bad bits", + l_which_port, l_which_dp, l_bad_bits); } // We're all done. Clear the bit @@ -2086,5 +2105,182 @@ fapi_try_exit: return fapi2::current_err; } +/// +/// @brief Reset the bad-bits masks for a port +/// @note Read the bad bits from the f/w attributes and stuff them in the +/// appropriate registers. +/// @param[in] i_target the fapi2 target of the port +/// @return fapi2::ReturnCode FAPI2_RC_SUCCESS if bad bits can be repaired +/// +fapi2::ReturnCode reset_bad_bits( const fapi2::Target& i_target ) +{ + // The magic 10 is because there are 80 bits represented in this attribute, and each element is 8 bits. + // So to get to 80, we need 10 bytes. + uint8_t l_bad_dq[MAX_DIMM_PER_PORT][MAX_RANK_PER_DIMM][10] = { 0 }; + FAPI_TRY( mss::bad_dq_bitmap(i_target, &(l_bad_dq[0][0][0])) ); + + return reset_bad_bits_helper(i_target, l_bad_dq); + +fapi_try_exit: + return fapi2::current_err; +} + +/// +/// @brief Reset the bad-bits masks for a port - helper for ease of testing +/// @note Read the bad bits from the f/w attributes and stuff them in the +/// appropriate registers. +/// @note The magic 10 is because there are 80 bits represented in this attribute, and each element is 8 bits. +/// So to get to 80, we need 10 bytes. +/// @param[in] i_target the fapi2 target of the port +/// @param[in] i_bad_dq array representing the data from the bad dq bitmap +/// @return fapi2::ReturnCode FAPI2_RC_SUCCESS if bad bits can be repaired +/// +fapi2::ReturnCode reset_bad_bits_helper( const fapi2::Target& i_target, + const uint8_t i_bad_dq[MAX_DIMM_PER_PORT][MAX_RANK_PER_DIMM][10] ) +{ + typedef dp16Traits TT; + + std::vector l_ranks; + + // Loop over the ranks, makes things simpler than looping over the DIMM (surprisingly) + FAPI_TRY( rank::ranks(i_target, l_ranks) ); + + for (const auto& r : l_ranks) + { + uint64_t l_rp = 0; + uint64_t l_dimm_index = rank::get_dimm_from_rank(r); + FAPI_TRY( mss::rank::get_pair_from_rank(i_target, r, l_rp) ); + + FAPI_INF("processing bad bits for DIMM%d rank %d (%d) rp %d", l_dimm_index, mss::index(r), r, l_rp); + + // We loop over the disable registers for this rank pair, and shift the bits from the attribute + // array in to the disable registers + { + // This is the set of registers for this rank pair. It is indexed by DP. We know the bad bits + // [0] and [1] are the 16 bits for DP0, [2],[3] are the 16 for DP1, etc. + const auto& l_addrs = TT::BIT_DISABLE_REG[l_rp]; + + // This is the section of the attribute we need to use. The result is an array of 10 bytes. + const uint8_t* l_bad_bits = &(i_bad_dq[l_dimm_index][mss::index(r)][0]); + + // Where in the array we are, incremented by two for every DP + size_t l_byte_index = 0; + + for (const auto& a : l_addrs) + { + uint64_t l_register_value = (l_bad_bits[l_byte_index] << 8) | l_bad_bits[l_byte_index + 1]; + + FAPI_INF("writing %s 0x%0lX value 0x%0lX from 0x%X, 0x%X", + mss::c_str(i_target), a.first, l_register_value, + l_bad_bits[l_byte_index], l_bad_bits[l_byte_index + 1]); + + // TODO RTC: 163674 Only wriiting the DISABLE0 register - not sure what happened to the DQS? + FAPI_TRY( mss::putScom(i_target, a.first, l_register_value) ); + l_byte_index += 2; + } + } + } + +fapi_try_exit: + return fapi2::current_err; +} + +/// +/// @brief Write disable bits +/// @note This is different than a register write as it writes attributes which +/// cause firmware to act on the disabled bits. +/// @param[in] i_target the fapi2 target of the port +/// @return fapi2::ReturnCode FAPI2_RC_SUCCESS if bad bits can be repaired +/// +fapi2::ReturnCode record_bad_bits( const fapi2::Target& i_target ) +{ + const auto& l_mcs = mss::find_target(i_target); + uint8_t l_value[PORTS_PER_MCS][MAX_DIMM_PER_PORT][MAX_RANK_PER_DIMM][10] = { 0 }; + + // Process the bad bits into an array. We copy these in to their own array + // as it allows the compiler to check indexes where a passed pointer wouldn't + // otherwise do. + uint8_t l_data[MAX_DIMM_PER_PORT][MAX_RANK_PER_DIMM][10] = { 0 }; + FAPI_TRY( mss::dp16::record_bad_bits_helper(i_target, l_data) ); + + // Read the attribute + FAPI_TRY( mss::bad_dq_bitmap(l_mcs, &(l_value[0][0][0][0])) ); + + // Modify + memcpy( &(l_value[mss::index(i_target)][0][0][0]), &(l_data[0][0][0]), + MAX_DIMM_PER_PORT * MAX_RANK_PER_DIMM * 10 ); + + // Write + FAPI_TRY( FAPI_ATTR_SET(fapi2::ATTR_BAD_DQ_BITMAP, l_mcs, l_value) ); + +fapi_try_exit: + return fapi2::current_err; +} + +/// +/// @brief Write disable bits - helper for testing +/// @note This is different than a register write as it writes attributes which +/// cause firmware to act on the disabled bits. +/// @param[in] i_target the fapi2 target of the port +/// @param[out] o_bad_dq an array of [MAX_DIMM_PER_PORT][MAX_RANK_PER_DIMM][10] containing the attribute information +/// @return fapi2::ReturnCode FAPI2_RC_SUCCESS if bad bits can be repaired +/// +fapi2::ReturnCode record_bad_bits_helper( const fapi2::Target& i_target, + uint8_t (&o_bad_dq)[MAX_DIMM_PER_PORT][MAX_RANK_PER_DIMM][10] ) +{ + typedef dp16Traits TT; + + std::vector l_ranks; + + // Loop over the ranks, makes things simpler than looping over the DIMM (surprisingly) + FAPI_TRY( rank::ranks(i_target, l_ranks) ); + + for (const auto& r : l_ranks) + { + uint64_t l_rp = 0; + uint64_t l_dimm_index = rank::get_dimm_from_rank(r); + FAPI_TRY( mss::rank::get_pair_from_rank(i_target, r, l_rp) ); + + FAPI_INF("recording bad bits for DIMM%d rank %d (%d) rp %d", l_dimm_index, mss::index(r), r, l_rp); + + // We loop over the disable registers for this rank pair, and shift the bits from the attribute + // array in to the disable registers + { + // Grab a pointer to our argument simply to make the code a little easier to read + uint8_t* l_bad_bits = &(o_bad_dq[l_dimm_index][mss::index(r)][0]); + + // The values we'll pull from the registers in the scom suckah below. We only read the registers for + // our current rank pair. + std::vector< std::pair< fapi2::buffer, fapi2::buffer > > l_register_value; + + FAPI_TRY( mss::scom_suckah(i_target, TT::BIT_DISABLE_REG[l_rp], l_register_value) ); + + // Where in the array we are, incremented by two for every DP + size_t l_byte_index = 0; + + for (const auto& v : l_register_value) + { + // Grab the left and right bytes from the bad bits register and stick them in the + // nth and nth + 1 bytes of the array + l_bad_bits[l_byte_index] = (v.first & 0xFF00) >> 8; + l_bad_bits[l_byte_index + 1] = v.first & 0x00FF; + + FAPI_DBG("writing %s value 0x%0lX to 0x%X, 0x%X from 0x%016lx", + mss::c_str(i_target), + v.first, + l_bad_bits[l_byte_index], + l_bad_bits[l_byte_index + 1], + v.first); + + // TODO RTC: 163674 Only writing the DISABLE0 register - not sure what happened to the DQS? + l_byte_index += 2; + } + } + } + +fapi_try_exit: + return fapi2::current_err; +} + } // close namespace dp16 } // close namespace mss diff --git a/src/import/chips/p9/procedures/hwp/memory/lib/phy/dp16.H b/src/import/chips/p9/procedures/hwp/memory/lib/phy/dp16.H index b243e3bc4..af01193dc 100644 --- a/src/import/chips/p9/procedures/hwp/memory/lib/phy/dp16.H +++ b/src/import/chips/p9/procedures/hwp/memory/lib/phy/dp16.H @@ -171,6 +171,8 @@ class dp16Traits static const std::vector< std::pair > WR_VREF_VALUE_RP2_REG; static const std::vector< std::pair > WR_VREF_VALUE_RP3_REG; + static const std::vector< std::vector> > BIT_DISABLE_REG; + enum { DLL_CNTL_INIT_RXDLL_CAL_RESET = MCA_DDRPHY_DP16_DLL_CNTL0_P0_0_01_INIT_RXDLL_CAL_RESET, @@ -413,7 +415,6 @@ inline fapi2::ReturnCode read_dll_cntl( const fapi2::Target& i_target, fapi2: fapi_try_exit: return fapi2::current_err; - } /// @@ -440,7 +441,6 @@ inline fapi2::ReturnCode write_dll_cntl( const fapi2::Target& i_target, const fapi_try_exit: return fapi2::current_err; - } /// @@ -846,13 +846,23 @@ fapi2::ReturnCode reset_data_bit_enable( const fapi2::Target& i_target ); /// /// @brief Reset the bad-bits masks for a port -/// @tparam T the fapi2::TargetType -/// @tparam TT the target traits -/// @param[in] i_target the target (MCA or MBA?) +/// @note Read the bad bits from the f/w attributes and stuff them in the +/// appropriate registers. +/// @param[in] i_target the fapi2 target of the port /// @return FAPI2_RC_SUCCESS if and only if ok /// -template< fapi2::TargetType T, typename TT = dp16Traits > -inline fapi2::ReturnCode reset_bad_bits(const fapi2::Target& i_target); +fapi2::ReturnCode reset_bad_bits(const fapi2::Target& i_target); + +/// +/// @brief Reset the bad-bits masks for a port - helper for testing +/// @note The magic 10 is because there are 80 bits represented in this attribute, and each element is 8 bits. +/// So to get to 80, we need 10 bytes. +/// @param[in] i_target the fapi2 target of the port +/// @param[in] i_bad_dq array representing the data from the bad dq bitmap +/// @return FAPI2_RC_SUCCESS if and only if ok +/// +fapi2::ReturnCode reset_bad_bits_helper(const fapi2::Target& i_target, + const uint8_t i_bad_dq[MAX_DIMM_PER_PORT][MAX_RANK_PER_DIMM][10]); /// /// @brief Configure the DP16 io_tx config0 registers @@ -929,18 +939,6 @@ fapi2::ReturnCode reset_write_clock_enable( const fapi2::Target& i_target ); -/// -/// @brief Reset the bad-bits masks for a port -/// @tparam T the fapi2::TargetType -/// @param[in] i_target the target (MCA or MBA?) -/// @return FAPI2_RC_SUCCESS if and only if ok -/// -inline fapi2::ReturnCode reset_bad_bits( const fapi2::Target& i_target) -{ - // Note: We need to do this ... BRS - return fapi2::FAPI2_RC_SUCCESS; -} - /// /// @brief Configure the DP16 io_tx config0 registers /// @param[in] i_target a MCBIST target @@ -1712,7 +1710,7 @@ template< fapi2::TargetType T, typename TT = dp16Traits > fapi2::ReturnCode reset_read_delay_offset_registers( const fapi2::Target& i_target ); /// -/// @brief Process disable bits and setup controller as necessary +/// @brief Process disable bits /// @param[in] i_target the fapi2 target of the port /// @param[in] i_dimm the fapi2 target of the failed DIMM /// @param[in] i_rp the rank pairs to check as a bit-map @@ -1722,6 +1720,26 @@ fapi2::ReturnCode process_bad_bits( const fapi2::Target& const fapi2::Target& i_dimm, const uint64_t l_rp ); +/// +/// @brief Write disable bits +/// @note This is different than a register write as it writes attributes which +/// cause firmware to act on the disabled bits. +/// @param[in] i_target the fapi2 target of the port +/// @return fapi2::ReturnCode FAPI2_RC_SUCCESS if bad bits can be repaired +/// +fapi2::ReturnCode record_bad_bits( const fapi2::Target& i_target ); + +/// +/// @brief Write disable bits - helper for testing +/// @note This is different than a register write as it writes attributes which +/// cause firmware to act on the disabled bits. +/// @param[in] i_target the fapi2 target of the port +/// @param[out] o_bad_dq an array of [MAX_DIMM_PER_PORT][MAX_RANK_PER_DIMM][10] containing the attribute information +/// @return fapi2::ReturnCode FAPI2_RC_SUCCESS if bad bits can be repaired +/// +fapi2::ReturnCode record_bad_bits_helper( const fapi2::Target& i_target, + uint8_t (&o_bad_dq)[MAX_DIMM_PER_PORT][MAX_RANK_PER_DIMM][10] ); + } // close namespace dp16 } // close namespace mss diff --git a/src/import/chips/p9/procedures/hwp/memory/lib/utils/conversions.H b/src/import/chips/p9/procedures/hwp/memory/lib/utils/conversions.H index 9de5b3e8c..897d90075 100644 --- a/src/import/chips/p9/procedures/hwp/memory/lib/utils/conversions.H +++ b/src/import/chips/p9/procedures/hwp/memory/lib/utils/conversions.H @@ -111,7 +111,7 @@ inline fapi2::ReturnCode freq_to_ps(const T i_speed_grade, OT& o_tCK_in_ps ) break; default: - FAPI_ERR("Invalid dimm speed grade (MT/s) - %d - provided"); + FAPI_ERR("Invalid dimm speed grade (MT/s) - %d - provided", i_speed_grade); return fapi2::FAPI2_RC_INVALID_PARAMETER; break; } @@ -149,7 +149,7 @@ fapi2::ReturnCode ps_to_freq(const T i_time_in_ps, OT& o_speed_grade) break; default: - FAPI_ERR("Invalid clock period (tCK) - %d - provided"); + FAPI_ERR("Invalid clock period (tCK) - %d - provided", i_time_in_ps); return fapi2::FAPI2_RC_INVALID_PARAMETER; break; } diff --git a/src/import/chips/p9/procedures/hwp/memory/p9_mss_draminit_training.C b/src/import/chips/p9/procedures/hwp/memory/p9_mss_draminit_training.C index c4d02ddce..b1a0401df 100644 --- a/src/import/chips/p9/procedures/hwp/memory/p9_mss_draminit_training.C +++ b/src/import/chips/p9/procedures/hwp/memory/p9_mss_draminit_training.C @@ -55,6 +55,8 @@ extern "C" const uint16_t i_special_training, const uint8_t i_abort_on_error) { + // Keep track of the last error seen by a port + fapi2::ReturnCode l_port_error = fapi2::FAPI2_RC_SUCCESS; fapi2::buffer l_cal_steps_enabled = i_special_training; FAPI_INF("Start draminit training"); @@ -93,6 +95,10 @@ extern "C" for( const auto& p : mss::find_targets(i_target)) { + // Keep track of the last error seen by a rank pair + fapi2::ReturnCode l_rank_pair_error = fapi2::FAPI2_RC_SUCCESS; + + mss::ccs::program l_program; // Setup a series of register probes which we'll see during the polling loop @@ -195,10 +201,25 @@ extern "C" { goto fapi_try_exit; } + + // Keep tack of the last cal error we saw. + l_rank_pair_error = fapi2::current_err; } } + + // Once we've trained all the rank pairs we can record the bad bits in the attributes if we have an error + // This error is the most recent error seen on a port, too, so we keep track of that. + if (l_rank_pair_error != fapi2::FAPI2_RC_SUCCESS) + { + FAPI_TRY( mss::dp16::record_bad_bits(p) ); + l_port_error = l_rank_pair_error; + } } + // So we're calibrated the entire port. If we're here either we didn't have any errors or the last error + // seen on a port is the error for this entire controller. + fapi2::current_err = l_port_error; + fapi_try_exit: FAPI_INF("End draminit training"); return fapi2::current_err; diff --git a/src/import/chips/p9/procedures/xml/attribute_info/memory_dq_attributes.xml b/src/import/chips/p9/procedures/xml/attribute_info/memory_dq_attributes.xml index 6dab3219e..673346c8c 100644 --- a/src/import/chips/p9/procedures/xml/attribute_info/memory_dq_attributes.xml +++ b/src/import/chips/p9/procedures/xml/attribute_info/memory_dq_attributes.xml @@ -22,21 +22,40 @@ + - - ATTR_MSS_VPD_DQ_MAP - TARGET_TYPE_MCS - - [PORT][Dimm DQ PIN] The map from the Dual Inline Memory Module (DIMM) Data (DQ) Pin to the Module Package Data (DQ) Pinout - - - uint8 - - - 0 - 144 - vpd_dq_map - 2 72 - + + + ATTR_MSS_VPD_DQ_MAP + TARGET_TYPE_MCS + + [PORT][Dimm DQ PIN] The map from the Dual Inline Memory Module (DIMM) Data (DQ) Pin to the Module Package Data (DQ) Pinout + + + uint8 + + + 0 + 144 + vpd_dq_map + 2 72 + + + + ATTR_BAD_DQ_BITMAP + TARGET_TYPE_MCS + + Bad DQ bitmap from a controller point of view. + The data is a 10 byte bitmap for each of 4 possible ranks. + The bad DQ data is stored in NVRAM, and it is stored in a special format + translated to a DIMM Connector point of view. + All of these details are hidden from the user of this attribute. + + uint8 + 2 2 4 10 + + + + diff --git a/src/import/chips/p9/procedures/xml/error_info/p9_memory_mss_draminit_training.xml b/src/import/chips/p9/procedures/xml/error_info/p9_memory_mss_draminit_training.xml index bda99efa7..8e009db08 100644 --- a/src/import/chips/p9/procedures/xml/error_info/p9_memory_mss_draminit_training.xml +++ b/src/import/chips/p9/procedures/xml/error_info/p9_memory_mss_draminit_training.xml @@ -104,12 +104,12 @@ RANKGROUP_POSITION REG_FFDC_MSS_DRAMINIT_TRAINING_FAILURE_DISABLE_REGS - TARGET_IN_ERROR + TARGET_WITH_REGISTERS TARGET_TYPE_MCA REG_FFDC_MSS_DRAMINIT_TRAINING_ERROR_STATUS - TARGET_IN_ERROR + TARGET_WITH_REGISTERS TARGET_TYPE_MCA @@ -129,12 +129,12 @@ RANKGROUP_POSITION REG_FFDC_MSS_DRAMINIT_TRAINING_FAILURE_DISABLE_REGS - TARGET_IN_ERROR + TARGET_WITH_REGISTERS TARGET_TYPE_MCA REG_FFDC_MSS_DRAMINIT_TRAINING_ERROR_STATUS - TARGET_IN_ERROR + TARGET_WITH_REGISTERS TARGET_TYPE_MCA @@ -154,12 +154,12 @@ RANKGROUP_POSITION REG_FFDC_MSS_DRAMINIT_TRAINING_FAILURE_DISABLE_REGS - TARGET_IN_ERROR + TARGET_WITH_REGISTERS TARGET_TYPE_MCA REG_FFDC_MSS_DRAMINIT_TRAINING_ERROR_STATUS - TARGET_IN_ERROR + TARGET_WITH_REGISTERS TARGET_TYPE_MCA @@ -179,12 +179,12 @@ RANKGROUP_POSITION REG_FFDC_MSS_DRAMINIT_TRAINING_FAILURE_DISABLE_REGS - TARGET_IN_ERROR + TARGET_WITH_REGISTERS TARGET_TYPE_MCA REG_FFDC_MSS_DRAMINIT_TRAINING_ERROR_STATUS - TARGET_IN_ERROR + TARGET_WITH_REGISTERS TARGET_TYPE_MCA @@ -204,13 +204,13 @@ RANKGROUP_POSITION REG_FFDC_MSS_DRAMINIT_TRAINING_FAILURE_DISABLE_REGS - TARGET_IN_ERROR + TARGET_WITH_REGISTERS TARGET_TYPE_MCA REG_FFDC_MSS_DRAMINIT_TRAINING_ERROR_STATUS + TARGET_WITH_REGISTERS TARGET_TYPE_MCA - TARGET_IN_ERROR TARGET_IN_ERROR @@ -229,12 +229,12 @@ RANKGROUP_POSITION REG_FFDC_MSS_DRAMINIT_TRAINING_FAILURE_DISABLE_REGS - TARGET_IN_ERROR + TARGET_WITH_REGISTERS TARGET_TYPE_MCA REG_FFDC_MSS_DRAMINIT_TRAINING_ERROR_STATUS - TARGET_IN_ERROR + TARGET_WITH_REGISTERS TARGET_TYPE_MCA @@ -254,12 +254,12 @@ RANKGROUP_POSITION REG_FFDC_MSS_DRAMINIT_TRAINING_FAILURE_DISABLE_REGS - TARGET_IN_ERROR + TARGET_WITH_REGISTERS TARGET_TYPE_MCA REG_FFDC_MSS_DRAMINIT_TRAINING_ERROR_STATUS - TARGET_IN_ERROR + TARGET_WITH_REGISTERS TARGET_TYPE_MCA @@ -279,12 +279,12 @@ RANKGROUP_POSITION REG_FFDC_MSS_DRAMINIT_TRAINING_FAILURE_DISABLE_REGS - TARGET_IN_ERROR + TARGET_WITH_REGISTERS TARGET_TYPE_MCA REG_FFDC_MSS_DRAMINIT_TRAINING_ERROR_STATUS - TARGET_IN_ERROR + TARGET_WITH_REGISTERS TARGET_TYPE_MCA @@ -304,12 +304,12 @@ RANKGROUP_POSITION REG_FFDC_MSS_DRAMINIT_TRAINING_FAILURE_DISABLE_REGS - TARGET_IN_ERROR + TARGET_WITH_REGISTERS TARGET_TYPE_MCA REG_FFDC_MSS_DRAMINIT_TRAINING_ERROR_STATUS - TARGET_IN_ERROR + TARGET_WITH_REGISTERS TARGET_TYPE_MCA @@ -329,12 +329,12 @@ RANKGROUP_POSITION REG_FFDC_MSS_DRAMINIT_TRAINING_FAILURE_DISABLE_REGS - TARGET_IN_ERROR + TARGET_WITH_REGISTERS TARGET_TYPE_MCA REG_FFDC_MSS_DRAMINIT_TRAINING_ERROR_STATUS - TARGET_IN_ERROR + TARGET_WITH_REGISTERS TARGET_TYPE_MCA @@ -354,12 +354,12 @@ RANKGROUP_POSITION REG_FFDC_MSS_DRAMINIT_TRAINING_FAILURE_DISABLE_REGS - TARGET_IN_ERROR + TARGET_WITH_REGISTERS TARGET_TYPE_MCA REG_FFDC_MSS_DRAMINIT_TRAINING_ERROR_STATUS - TARGET_IN_ERROR + TARGET_WITH_REGISTERS TARGET_TYPE_MCA @@ -379,12 +379,12 @@ RANKGROUP_POSITION REG_FFDC_MSS_DRAMINIT_TRAINING_FAILURE_DISABLE_REGS - TARGET_IN_ERROR + TARGET_WITH_REGISTERS TARGET_TYPE_MCA REG_FFDC_MSS_DRAMINIT_TRAINING_ERROR_STATUS - TARGET_IN_ERROR + TARGET_WITH_REGISTERS TARGET_TYPE_MCA -- cgit v1.2.1