diff options
Diffstat (limited to 'src/import/chips/p9/procedures')
7 files changed, 1113 insertions, 6 deletions
diff --git a/src/import/chips/p9/procedures/hwp/memory/lib/ecc/ecc.H b/src/import/chips/p9/procedures/hwp/memory/lib/ecc/ecc.H index 1ba343e68..1bd25dda8 100644 --- a/src/import/chips/p9/procedures/hwp/memory/lib/ecc/ecc.H +++ b/src/import/chips/p9/procedures/hwp/memory/lib/ecc/ecc.H @@ -109,6 +109,7 @@ inline fapi2::ReturnCode set_hwms( const fapi2::Target<T>& i_target, mss::ecc::hwms::set_chipmark(l_buffer, i_galois); mss::ecc::hwms::set_confirmed(l_buffer, mss::YES); + mss::ecc::hwms::set_exit_1(l_buffer, mss::YES); } FAPI_TRY( mss::ecc::hwms::write(i_target, i_rank, l_buffer) ); @@ -189,11 +190,46 @@ fapi_try_exit: } /// +/// @brief Query Hardware Marks +/// @tparam T the fapi2::TargetType - derived +/// @param[in] i_target the fapi2 target +/// @param[out] o_marks vector of Galois codes of any marks set +/// @return fapi2::ReturnCode FAPI2_RC_SUCCESS if ok +/// @note no rank information is returned +/// +template< fapi2::TargetType T > +inline fapi2::ReturnCode get_hw_marks( const fapi2::Target<T>& i_target, + std::vector<uint64_t>& o_marks ) +{ + fapi2::buffer<uint64_t> l_buffer; + uint64_t l_galois = 0; + auto l_confirmed = mss::states::NO; + + o_marks.clear(); + + for (uint64_t l_rank = 0; l_rank < MAX_MRANK_PER_PORT; ++l_rank) + { + FAPI_TRY( get_hwms(i_target, l_rank, l_galois, l_confirmed) ); + + if (l_confirmed == mss::states::YES) + { + o_marks.push_back(l_galois); + } + } + + return fapi2::FAPI2_RC_SUCCESS; + +fapi_try_exit: + return fapi2::current_err; +} + +/// /// @brief Query Firmware Marks /// @tparam T the fapi2::TargetType - derived /// @param[in] i_target the fapi2 target /// @param[out] o_marks vector of Galois codes of any marks set /// @return fapi2::ReturnCode FAPI2_RC_SUCCESS if ok +/// @note no rank information is returned /// template< fapi2::TargetType T > inline fapi2::ReturnCode get_fw_marks( const fapi2::Target<T>& i_target, @@ -217,6 +253,8 @@ inline fapi2::ReturnCode get_fw_marks( const fapi2::Target<T>& i_target, } } + return fapi2::FAPI2_RC_SUCCESS; + fapi_try_exit: return fapi2::current_err; } diff --git a/src/import/chips/p9/procedures/hwp/memory/lib/ecc/galois.H b/src/import/chips/p9/procedures/hwp/memory/lib/ecc/galois.H index 4e763714e..3161b1759 100644 --- a/src/import/chips/p9/procedures/hwp/memory/lib/ecc/galois.H +++ b/src/import/chips/p9/procedures/hwp/memory/lib/ecc/galois.H @@ -132,6 +132,46 @@ fapi2::ReturnCode symbol_to_dq( const uint8_t i_symbol, uint8_t& o_dq ) return fapi2::FAPI2_RC_SUCCESS; } +/// +/// @brief Return DQ index from a given Galois code +/// @tparam T fapi2 Target Type defaults to TARGET_TYPE_MCA +/// @tparam TT traits type defaults to eccTraits<T> +/// @param[in] i_galois the Galois code +/// @param[out] o_dq DQ index represented by given Galois code +/// @return FAPI2_RC_SUCCESS iff all is ok +/// +template< fapi2::TargetType T = fapi2::TARGET_TYPE_MCA, typename TT = eccTraits<T> > +fapi2::ReturnCode galois_to_dq( const uint8_t i_galois, uint8_t& o_dq ) +{ + uint8_t l_symbol = 0; + + FAPI_TRY( galois_to_symbol<T>(i_galois, l_symbol) ); + FAPI_TRY( symbol_to_dq<T>(l_symbol, o_dq) ); + +fapi_try_exit: + return fapi2::current_err; +} + +/// +/// @brief Return Galois code from a given DQ index +/// @tparam T fapi2 Target Type defaults to TARGET_TYPE_MCA +/// @tparam TT traits type defaults to eccTraits<T> +/// @param[in] i_dq the DQ index +/// @param[out] o_galois Galois code represented by given symbol +/// @return FAPI2_RC_SUCCESS iff all is ok +/// +template< fapi2::TargetType T = fapi2::TARGET_TYPE_MCA, typename TT = eccTraits<T> > +fapi2::ReturnCode dq_to_galois( const uint8_t i_dq, uint8_t& o_galois ) +{ + uint8_t l_symbol = 0; + + FAPI_TRY( mss::ecc::dq_to_symbol<T>(i_dq, l_symbol) ); + FAPI_TRY( mss::ecc::symbol_to_galois<T>(l_symbol, o_galois) ); + +fapi_try_exit: + return fapi2::current_err; +} + } // close namespace ecc } // close namespace mss diff --git a/src/import/chips/p9/procedures/hwp/memory/lib/mc/port.C b/src/import/chips/p9/procedures/hwp/memory/lib/mc/port.C index 979ca5eb8..0703fd828 100644 --- a/src/import/chips/p9/procedures/hwp/memory/lib/mc/port.C +++ b/src/import/chips/p9/procedures/hwp/memory/lib/mc/port.C @@ -37,6 +37,7 @@ #include <lib/mc/port.H> #include <lib/shared/mss_const.H> #include <lib/utils/scom.H> +#include <lib/ecc/ecc.H> namespace mss { @@ -302,4 +303,510 @@ fapi_try_exit: return fapi2::current_err; } +/// +/// @brief Convert a bitmap from the BAD_DQ_BITMAP attribute to a vector of bad DQ indexes +/// @param[in] i_bad_bits an 8-bit bitmap of bad bits +/// @param[in] i_nibble which nibble of the bitmap to convert +/// @return std::vector of DQ bits marked as bad in the bitmap +/// +std::vector<uint64_t> bad_bit_helper(const uint8_t i_bad_bits, const size_t i_nibble) +{ + std::vector<uint64_t> l_output; + fapi2::buffer<uint8_t> l_bit_buffer(i_bad_bits); + + const size_t l_start = (i_nibble == 0) ? 0 : BITS_PER_NIBBLE; + + for (size_t l_offset = 0; l_offset < BITS_PER_NIBBLE; ++l_offset) + { + if (l_bit_buffer.getBit(l_start + l_offset)) + { + l_output.push_back(l_start + l_offset); + } + } + + return l_output; +} + +/// +/// @brief Place a symbol mark in a Firmware Mark Store register +/// @param[in] i_target the DIMM target +/// @param[in] i_rank the rank +/// @param[in] i_dq the bad DQ bit +/// @return FAPI2_RC_SUCCESS if and only if ok +/// +template<> +fapi2::ReturnCode place_symbol_mark(const fapi2::Target<fapi2::TARGET_TYPE_DIMM>& i_target, + const uint64_t i_rank, + const uint64_t i_dq) +{ + const auto& l_mca = mss::find_target<fapi2::TARGET_TYPE_MCA>(i_target); + const auto l_dimm_idx = mss::index(i_target); + const auto l_rank_idx = mss::index(i_rank); + + uint8_t l_galois = 0; + mss::mcbist::address l_addr; + + // For symbol marks, we set the appropriate Firmware Mark Store reg, with the symbol's + // Galois code, mark_type=SYMBOL, mark_region=MRANK, and the address of the DIMM+MRANK + // TODO RTC:165133 Remove static_cast once Galois API is updated to accept uint64_t input + FAPI_TRY( mss::ecc::dq_to_galois(static_cast<uint8_t>(i_dq), l_galois) ); + + l_addr.set_dimm(l_dimm_idx).set_master_rank(l_rank_idx); + + FAPI_DBG("Setting firmware symbol mark on rank:%d dq:%d galois:0x%02x", i_rank, i_dq, l_galois); + FAPI_TRY( mss::ecc::set_fwms(l_mca, i_rank, l_galois, mss::ecc::fwms::mark_type::SYMBOL, + mss::ecc::fwms::mark_region::MRANK, l_addr) ); + +fapi_try_exit: + return fapi2::current_err; +} + +/// +/// @brief Place a chip mark in a Hardware Mark Store register +/// @param[in] i_target the DIMM target +/// @param[in] i_rank the rank +/// @return FAPI2_RC_SUCCESS if and only if ok +/// +template<> +fapi2::ReturnCode place_chip_mark(const fapi2::Target<fapi2::TARGET_TYPE_DIMM>& i_target, + const uint64_t i_rank) +{ + const auto& l_mca = mss::find_target<fapi2::TARGET_TYPE_MCA>(i_target); + + uint8_t l_galois = 0; + + // For chip marks, we set the appropriate Hardware Mark Store reg, with the DIMM's + // symbol[0] Galois code, and both confirmed and exit1 bits set + FAPI_TRY( mss::ecc::symbol_to_galois(0, l_galois) ); + + FAPI_DBG("Setting hardware (chip) mark on rank:%d galois:0x%02x", i_rank, l_galois); + FAPI_TRY( mss::ecc::set_hwms(l_mca, i_rank, l_galois) ); + +fapi_try_exit: + return fapi2::current_err; +} + +/// +/// @brief Restore symbol and chip marks according to BAD_DQ_BITMAP attribute, helper function for unit testing +/// Specialization for TARGET_TYPE_DIMM +/// @param[in] i_target the DIMM target +/// @param[in] i_bad_bits the bad bits values from the VPD, for the specified DIMM +/// @param[out] o_repairs_applied 8-bit mask, where a bit set means a rank had repairs applied (bit0-7 = rank0-7) +/// @param[out] o_repairs_exceeded 2-bit mask, where a bit set means a DIMM had more bad bits than could be repaired (bit0-1 = DIMM0-1) +/// @return FAPI2_RC_SUCCESS if and only if ok +/// +// TODO RTC:157753 Template parameters here are Nimbus specific. Convert to attribute/trait of TARGET_TYPE_MCA when traits are created. +template<> +fapi2::ReturnCode restore_repairs_helper<fapi2::TARGET_TYPE_DIMM, MAX_RANK_PER_DIMM, BAD_DQ_BYTE_COUNT>( + const fapi2::Target<fapi2::TARGET_TYPE_DIMM>& i_target, + const uint8_t i_bad_bits[MAX_RANK_PER_DIMM][BAD_DQ_BYTE_COUNT], + fapi2::buffer<uint8_t>& o_repairs_applied, + fapi2::buffer<uint8_t>& o_repairs_exceeded) +{ + FAPI_INF("Restore repair marks from bad DQ data"); + + std::vector<uint64_t> l_ranks; + const auto l_dimm_idx = mss::index(i_target); + + FAPI_TRY( mss::rank::ranks(i_target, l_ranks) ); + + // loop through ranks + for (const auto l_rank : l_ranks) + { + const auto l_rank_idx = mss::index(l_rank); + + repair_state_machine<fapi2::TARGET_TYPE_DIMM> l_machine; + + // loop through bytes + for (uint64_t l_byte = 0; l_byte < (MAX_DQ_NIBBLES_X4 / NIBBLES_PER_BYTE); ++l_byte) + { + for (size_t l_nibble = 0; l_nibble < NIBBLES_PER_BYTE; ++l_nibble) + { + const auto l_bad_dq_vector = bad_bit_helper(i_bad_bits[l_rank_idx][l_byte], l_nibble); + FAPI_DBG("Total bad bits on DIMM:%d rank:%d nibble%d: %d", l_dimm_idx, l_rank, (l_byte * NIBBLES_PER_BYTE) + l_nibble, + l_bad_dq_vector.size()); + + // apply repairs and update repair machine state + // if there are no bad bits (l_bad_dq_vector.size() == 0) no action is necessary + if (l_bad_dq_vector.size() == 1) + { + FAPI_TRY( l_machine.one_bad_dq(i_target, l_rank, (l_bad_dq_vector[0] + (l_byte * BITS_PER_BYTE)), + o_repairs_applied, o_repairs_exceeded) ); + } + else if (l_bad_dq_vector.size() > 1) + { + FAPI_TRY( l_machine.multiple_bad_dq(i_target, l_rank, o_repairs_applied, o_repairs_exceeded) ); + } + + // if repairs have been exceeded, we're done + if (o_repairs_exceeded.getBit(l_dimm_idx)) + { + FAPI_INF("Repairs exceeded on DIMM %s", mss::c_str(i_target)); + return fapi2::FAPI2_RC_SUCCESS; + } + } // end loop through nibbles + } // end loop through bytes + } // end loop through ranks + +fapi_try_exit: + return fapi2::current_err; +} + +/// +/// @brief Restore symbol and chip marks according to BAD_DQ_BITMAP attribute +/// Specialization for TARGET_TYPE_MCA +/// @param[in] i_target A target representing a port +/// @param[out] o_repairs_applied 8-bit mask, where a bit set means a rank had repairs applied (bit0-7 = rank0-7) +/// @param[out] o_repairs_exceeded 2-bit mask, where a bit set means a DIMM had more bad bits than could be repaired (bit0-1 = DIMM0-1) +/// @return FAPI2_RC_SUCCESS if and only if ok +/// +template<> +fapi2::ReturnCode restore_repairs( const fapi2::Target<fapi2::TARGET_TYPE_MCA>& i_target, + fapi2::buffer<uint8_t>& o_repairs_applied, + fapi2::buffer<uint8_t>& o_repairs_exceeded) +{ + uint8_t l_bad_bits[MAX_DIMM_PER_PORT][MAX_RANK_PER_DIMM][BAD_DQ_BYTE_COUNT]; + + FAPI_TRY( mss::bad_dq_bitmap(i_target, &(l_bad_bits[0][0][0])) ); + + o_repairs_applied = 0; + o_repairs_exceeded = 0; + + for (const auto& l_dimm : mss::find_targets<fapi2::TARGET_TYPE_DIMM>(i_target)) + { + FAPI_TRY( (restore_repairs_helper<fapi2::TARGET_TYPE_DIMM, MAX_RANK_PER_DIMM, BAD_DQ_BYTE_COUNT>( + l_dimm, l_bad_bits[mss::index(l_dimm)], o_repairs_applied, o_repairs_exceeded)) ); + } + +fapi_try_exit: + return fapi2::current_err; +} + +/// +/// @brief Set a new state in the repair state machine +/// @tparam T, the fapi2 target type of the DIMM +/// @param[in,out] io_machine the repair state machine +/// @param[in] i_state shared pointer to the new state to set +/// +template< fapi2::TargetType T > +void repair_state<T>::set_state(repair_state_machine<T>& io_machine, std::shared_ptr<repair_state<T>> i_state) +{ + io_machine.update_state(i_state); +} + +/// +/// @brief Perform a repair for a single bad DQ bit in a nibble +/// Specialization for TARGET_TYPE_DIMM +/// @param[in,out] io_machine the repair state machine +/// @param[in] i_target the DIMM target +/// @param[in] i_rank the rank +/// @param[in] i_dq the DQ bit index +/// @param[in,out] io_repairs_applied 8-bit mask, where a bit set means that rank had repairs applied +/// @param[in,out] io_repairs_exceeded 2-bit mask, where a bit set means that DIMM had more bad bits than could be repaired +/// @return FAPI2_RC_SUCCESS if and only if ok +/// +template<> +fapi2::ReturnCode no_fails<fapi2::TARGET_TYPE_DIMM>::one_bad_dq(repair_state_machine<fapi2::TARGET_TYPE_DIMM>& + io_machine, + const fapi2::Target<fapi2::TARGET_TYPE_DIMM>& i_target, + const uint64_t i_rank, + const uint64_t i_dq, + fapi2::buffer<uint8_t>& io_repairs_applied, + fapi2::buffer<uint8_t>& io_repairs_exceeded) +{ + // place a symbol mark + FAPI_TRY( place_symbol_mark(i_target, i_rank, i_dq) ); + io_repairs_applied.setBit(i_rank); + { + const auto new_state = std::make_shared<symbol_mark_only<fapi2::TARGET_TYPE_DIMM>>(); + set_state(io_machine, new_state); + } +fapi_try_exit: + return fapi2::current_err; +} + +/// +/// @brief Perform a repair for multiple bad DQ bits in a nibble +/// Specialization for TARGET_TYPE_DIMM +/// @param[in,out] io_machine the repair state machine +/// @param[in] i_target the DIMM target +/// @param[in] i_rank the rank +/// @param[in,out] io_repairs_applied 8-bit mask, where a bit set means that rank had repairs applied +/// @param[in,out] io_repairs_exceeded 2-bit mask, where a bit set means that DIMM had more bad bits than could be repaired +/// @return FAPI2_RC_SUCCESS if and only if ok +/// +template<> +fapi2::ReturnCode no_fails<fapi2::TARGET_TYPE_DIMM>::multiple_bad_dq(repair_state_machine<fapi2::TARGET_TYPE_DIMM>& + io_machine, + const fapi2::Target<fapi2::TARGET_TYPE_DIMM>& i_target, + const uint64_t i_rank, + fapi2::buffer<uint8_t>& io_repairs_applied, + fapi2::buffer<uint8_t>& io_repairs_exceeded) +{ + // place a chip mark + FAPI_TRY( place_chip_mark(i_target, i_rank) ); + io_repairs_applied.setBit(i_rank); + { + const auto new_state = std::make_shared<chip_mark_only<fapi2::TARGET_TYPE_DIMM>>(); + set_state(io_machine, new_state); + } +fapi_try_exit: + return fapi2::current_err; +} + +/// +/// @brief Perform a repair for a single bad DQ bit in a nibble +/// Specialization for TARGET_TYPE_DIMM +/// @param[in,out] io_machine the repair state machine +/// @param[in] i_target the DIMM target +/// @param[in] i_rank the rank +/// @param[in] i_dq the DQ bit index +/// @param[in,out] io_repairs_applied 8-bit mask, where a bit set means that rank had repairs applied +/// @param[in,out] io_repairs_exceeded 2-bit mask, where a bit set means that DIMM had more bad bits than could be repaired +/// @return FAPI2_RC_SUCCESS if and only if ok +/// +template<> +fapi2::ReturnCode symbol_mark_only<fapi2::TARGET_TYPE_DIMM>::one_bad_dq(repair_state_machine<fapi2::TARGET_TYPE_DIMM>& + io_machine, + const fapi2::Target<fapi2::TARGET_TYPE_DIMM>& i_target, + const uint64_t i_rank, + const uint64_t i_dq, + fapi2::buffer<uint8_t>& io_repairs_applied, + fapi2::buffer<uint8_t>& io_repairs_exceeded) +{ + // leave an unrepaired DQ + const auto new_state = std::make_shared<symbol_mark_plus_unrepaired_dq<fapi2::TARGET_TYPE_DIMM>>(); + set_state(io_machine, new_state); + return fapi2::FAPI2_RC_SUCCESS; +} + +/// +/// @brief Perform a repair for multiple bad DQ bits in a nibble +/// Specialization for TARGET_TYPE_DIMM +/// @param[in,out] io_machine the repair state machine +/// @param[in] i_target the DIMM target +/// @param[in] i_rank the rank +/// @param[in,out] io_repairs_applied 8-bit mask, where a bit set means that rank had repairs applied +/// @param[in,out] io_repairs_exceeded 2-bit mask, where a bit set means that DIMM had more bad bits than could be repaired +/// @return FAPI2_RC_SUCCESS if and only if ok +/// +template<> +fapi2::ReturnCode symbol_mark_only<fapi2::TARGET_TYPE_DIMM>::multiple_bad_dq( + repair_state_machine<fapi2::TARGET_TYPE_DIMM>& io_machine, + const fapi2::Target<fapi2::TARGET_TYPE_DIMM>& i_target, + const uint64_t i_rank, + fapi2::buffer<uint8_t>& io_repairs_applied, + fapi2::buffer<uint8_t>& io_repairs_exceeded) +{ + // place a chip mark + FAPI_TRY( place_chip_mark(i_target, i_rank) ); + io_repairs_applied.setBit(i_rank); + { + const auto new_state = std::make_shared<chip_and_symbol_mark<fapi2::TARGET_TYPE_DIMM>>(); + set_state(io_machine, new_state); + } +fapi_try_exit: + return fapi2::current_err; +} + +/// +/// @brief Perform a repair for a single bad DQ bit in a nibble +/// Specialization for TARGET_TYPE_DIMM +/// @param[in,out] io_machine the repair state machine +/// @param[in] i_target the DIMM target +/// @param[in] i_rank the rank +/// @param[in] i_dq the DQ bit index +/// @param[in,out] io_repairs_applied 8-bit mask, where a bit set means that rank had repairs applied +/// @param[in,out] io_repairs_exceeded 2-bit mask, where a bit set means that DIMM had more bad bits than could be repaired +/// @return FAPI2_RC_SUCCESS if and only if ok +/// +template<> +fapi2::ReturnCode symbol_mark_plus_unrepaired_dq<fapi2::TARGET_TYPE_DIMM>::one_bad_dq( + repair_state_machine<fapi2::TARGET_TYPE_DIMM>& io_machine, + const fapi2::Target<fapi2::TARGET_TYPE_DIMM>& i_target, + const uint64_t i_rank, + const uint64_t i_dq, + fapi2::buffer<uint8_t>& io_repairs_applied, + fapi2::buffer<uint8_t>& io_repairs_exceeded) +{ + // repairs exceeded + io_repairs_exceeded.setBit(mss::index(i_target)); + return fapi2::FAPI2_RC_SUCCESS; +} + +/// +/// @brief Perform a repair for multiple bad DQ bits in a nibble +/// Specialization for TARGET_TYPE_DIMM +/// @param[in,out] io_machine the repair state machine +/// @param[in] i_target the DIMM target +/// @param[in] i_rank the rank +/// @param[in,out] io_repairs_applied 8-bit mask, where a bit set means that rank had repairs applied +/// @param[in,out] io_repairs_exceeded 2-bit mask, where a bit set means that DIMM had more bad bits than could be repaired +/// @return FAPI2_RC_SUCCESS if and only if ok +/// +template<> +fapi2::ReturnCode symbol_mark_plus_unrepaired_dq<fapi2::TARGET_TYPE_DIMM>::multiple_bad_dq( + repair_state_machine<fapi2::TARGET_TYPE_DIMM>& io_machine, + const fapi2::Target<fapi2::TARGET_TYPE_DIMM>& i_target, + const uint64_t i_rank, + fapi2::buffer<uint8_t>& io_repairs_applied, + fapi2::buffer<uint8_t>& io_repairs_exceeded) +{ + // place a chip mark, but also repairs exceeded + FAPI_TRY( place_chip_mark(i_target, i_rank) ); + io_repairs_applied.setBit(i_rank); + io_repairs_exceeded.setBit(mss::index(i_target)); + { + const auto new_state = std::make_shared<chip_and_symbol_mark<fapi2::TARGET_TYPE_DIMM>>(); + set_state(io_machine, new_state); + } +fapi_try_exit: + return fapi2::current_err; +} + +/// +/// @brief Perform a repair for a single bad DQ bit in a nibble +/// Specialization for TARGET_TYPE_DIMM +/// @param[in,out] io_machine the repair state machine +/// @param[in] i_target the DIMM target +/// @param[in] i_rank the rank +/// @param[in] i_dq the DQ bit index +/// @param[in,out] io_repairs_applied 8-bit mask, where a bit set means that rank had repairs applied +/// @param[in,out] io_repairs_exceeded 2-bit mask, where a bit set means that DIMM had more bad bits than could be repaired +/// @return FAPI2_RC_SUCCESS if and only if ok +/// +template<> +fapi2::ReturnCode chip_mark_only<fapi2::TARGET_TYPE_DIMM>::one_bad_dq( + repair_state_machine<fapi2::TARGET_TYPE_DIMM>& + io_machine, + const fapi2::Target<fapi2::TARGET_TYPE_DIMM>& i_target, + const uint64_t i_rank, + const uint64_t i_dq, + fapi2::buffer<uint8_t>& io_repairs_applied, + fapi2::buffer<uint8_t>& io_repairs_exceeded) +{ + // place a symbol mark + FAPI_TRY( place_symbol_mark(i_target, i_rank, i_dq) ); + io_repairs_applied.setBit(i_rank); + { + const auto new_state = std::make_shared<chip_and_symbol_mark<fapi2::TARGET_TYPE_DIMM>>(); + set_state(io_machine, new_state); + } +fapi_try_exit: + return fapi2::current_err; +} + +/// +/// @brief Perform a repair for multiple bad DQ bits in a nibble +/// Specialization for TARGET_TYPE_DIMM +/// @param[in,out] io_machine the repair state machine +/// @param[in] i_target the DIMM target +/// @param[in] i_rank the rank +/// @param[in,out] io_repairs_applied 8-bit mask, where a bit set means that rank had repairs applied +/// @param[in,out] io_repairs_exceeded 2-bit mask, where a bit set means that DIMM had more bad bits than could be repaired +/// @return FAPI2_RC_SUCCESS if and only if ok +/// +template<> +fapi2::ReturnCode chip_mark_only<fapi2::TARGET_TYPE_DIMM>::multiple_bad_dq( + repair_state_machine<fapi2::TARGET_TYPE_DIMM>& io_machine, + const fapi2::Target<fapi2::TARGET_TYPE_DIMM>& i_target, + const uint64_t i_rank, + fapi2::buffer<uint8_t>& io_repairs_applied, + fapi2::buffer<uint8_t>& io_repairs_exceeded) +{ + // repairs exceeded + io_repairs_exceeded.setBit(mss::index(i_target)); + return fapi2::FAPI2_RC_SUCCESS; +} + +/// +/// @brief Perform a repair for a single bad DQ bit in a nibble +/// Specialization for TARGET_TYPE_DIMM +/// @param[in,out] io_machine the repair state machine +/// @param[in] i_target the DIMM target +/// @param[in] i_rank the rank +/// @param[in] i_dq the DQ bit index +/// @param[in,out] io_repairs_applied 8-bit mask, where a bit set means that rank had repairs applied +/// @param[in,out] io_repairs_exceeded 2-bit mask, where a bit set means that DIMM had more bad bits than could be repaired +/// @return FAPI2_RC_SUCCESS if and only if ok +/// +template<> +fapi2::ReturnCode chip_and_symbol_mark<fapi2::TARGET_TYPE_DIMM>::one_bad_dq( + repair_state_machine<fapi2::TARGET_TYPE_DIMM>& io_machine, + const fapi2::Target<fapi2::TARGET_TYPE_DIMM>& i_target, + const uint64_t i_rank, + const uint64_t i_dq, + fapi2::buffer<uint8_t>& io_repairs_applied, + fapi2::buffer<uint8_t>& io_repairs_exceeded) +{ + // repairs exceeded + io_repairs_exceeded.setBit(mss::index(i_target)); + return fapi2::FAPI2_RC_SUCCESS; +} + +/// +/// @brief Perform a repair for multiple bad DQ bits in a nibble +/// Specialization for TARGET_TYPE_DIMM +/// @param[in,out] io_machine the repair state machine +/// @param[in] i_target the DIMM target +/// @param[in] i_rank the rank +/// @param[in,out] io_repairs_applied 8-bit mask, where a bit set means that rank had repairs applied +/// @param[in,out] io_repairs_exceeded 2-bit mask, where a bit set means that DIMM had more bad bits than could be repaired +/// @return FAPI2_RC_SUCCESS if and only if ok +/// +template<> +fapi2::ReturnCode chip_and_symbol_mark<fapi2::TARGET_TYPE_DIMM>::multiple_bad_dq( + repair_state_machine<fapi2::TARGET_TYPE_DIMM>& io_machine, + const fapi2::Target<fapi2::TARGET_TYPE_DIMM>& i_target, + const uint64_t i_rank, + fapi2::buffer<uint8_t>& io_repairs_applied, + fapi2::buffer<uint8_t>& io_repairs_exceeded) +{ + // repairs exceeded + io_repairs_exceeded.setBit(mss::index(i_target)); + return fapi2::FAPI2_RC_SUCCESS; +} + +/// +/// @brief Perform a repair for a single bad DQ bit in a nibble +/// @tparam T, the fapi2 target type of the DIMM +/// @param[in] i_target the DIMM target +/// @param[in] i_rank the rank +/// @param[in] i_dq the DQ bit index +/// @param[in,out] io_repairs_applied 8-bit mask, where a bit set means that rank had repairs applied +/// @param[in,out] io_repairs_exceeded 2-bit mask, where a bit set means that DIMM had more bad bits than could be repaired +/// @return FAPI2_RC_SUCCESS if and only if ok +/// +template< fapi2::TargetType T > +fapi2::ReturnCode repair_state_machine<T>::one_bad_dq(const fapi2::Target<T>& i_target, + const uint64_t i_rank, + const uint64_t i_dq, + fapi2::buffer<uint8_t>& io_repairs_applied, + fapi2::buffer<uint8_t>& io_repairs_exceeded) +{ + FAPI_TRY( iv_repair_state->one_bad_dq(*this, i_target, i_rank, i_dq, io_repairs_applied, io_repairs_exceeded) ); +fapi_try_exit: + return fapi2::current_err; +} + +/// +/// @brief Perform a repair for multiple bad DQ bits in a nibble +/// @tparam T, the fapi2 target type of the DIMM +/// @param[in] i_target the DIMM target +/// @param[in] i_rank the rank +/// @param[in,out] io_repairs_applied 8-bit mask, where a bit set means that rank had repairs applied +/// @param[in,out] io_repairs_exceeded 2-bit mask, where a bit set means that DIMM had more bad bits than could be repaired +/// @return FAPI2_RC_SUCCESS if and only if ok +/// +template< fapi2::TargetType T > +fapi2::ReturnCode repair_state_machine<T>::multiple_bad_dq(const fapi2::Target<T>& i_target, + const uint64_t i_rank, + fapi2::buffer<uint8_t>& io_repairs_applied, + fapi2::buffer<uint8_t>& io_repairs_exceeded) +{ + FAPI_TRY( iv_repair_state->multiple_bad_dq(*this, i_target, i_rank, io_repairs_applied, io_repairs_exceeded) ); +fapi_try_exit: + return fapi2::current_err; +} + } // ns mss diff --git a/src/import/chips/p9/procedures/hwp/memory/lib/mc/port.H b/src/import/chips/p9/procedures/hwp/memory/lib/mc/port.H index 208df154c..190983f1e 100644 --- a/src/import/chips/p9/procedures/hwp/memory/lib/mc/port.H +++ b/src/import/chips/p9/procedures/hwp/memory/lib/mc/port.H @@ -46,6 +46,7 @@ #include <lib/utils/scom.H> #include <lib/dimm/rank.H> #include <c_str.H> +#include <lib/mcbist/address.H> namespace mss { @@ -760,6 +761,421 @@ fapi_try_exit: return fapi2::current_err; } +/// +/// @brief Convert a bitmap from the BAD_DQ_BITMAP attribute to a vector of bad DQ indexes +/// @param[in] i_bad_bits an 8-bit bitmap of bad bits +/// @param[in] i_nibble which nibble of the bitmap to convert +/// @return std::vector of DQ bits marked as bad in the bitmap +/// +std::vector<uint64_t> bad_bit_helper(const uint8_t i_bad_bits, const size_t i_nibble); + +/// +/// @brief Place a symbol mark in a Firmware Mark Store register +/// @tparam T, the fapi2 target type of the DIMM (derived) +/// @param[in] i_target the DIMM target +/// @param[in] i_rank the rank +/// @param[in] i_dq the bad DQ bit +/// @return FAPI2_RC_SUCCESS if and only if ok +/// +template< fapi2::TargetType T > +fapi2::ReturnCode place_symbol_mark(const fapi2::Target<T>& i_target, + const uint64_t i_rank, + const uint64_t i_dq); + +/// +/// @brief Place a chip mark in a Hardware Mark Store register +/// @tparam T, the fapi2 target type of the DIMM (derived) +/// @param[in] i_target the DIMM target +/// @param[in] i_rank the rank +/// @return FAPI2_RC_SUCCESS if and only if ok +/// +template< fapi2::TargetType T > +fapi2::ReturnCode place_chip_mark(const fapi2::Target<T>& i_target, + const uint64_t i_rank); + +// Forward declaration for use in repair_state classes +template< fapi2::TargetType T > +class repair_state_machine; + +/// +/// @class mss::repair_state +/// @brief A class for keeping track of bad bit repair states in a repair_state_machine +/// @tparam T, the fapi2 target type of the DIMM +/// @note this is a base class +/// +template< fapi2::TargetType T > +class repair_state +{ + public: + /// @brief default contructor + repair_state() = default; + /// @brief default destructor + virtual ~repair_state() = default; + + /// + /// @brief Perform a repair for a single bad DQ bit in a nibble + /// @param[in,out] io_machine the repair state machine + /// @param[in] i_target the DIMM target + /// @param[in] i_rank the rank + /// @param[in] i_dq the DQ bit index + /// @param[in,out] io_repairs_applied 8-bit mask, where a bit set means that rank had repairs applied + /// @param[in,out] io_repairs_exceeded 2-bit mask, where a bit set means that DIMM had more bad bits than could be repaired + /// @return FAPI2_RC_SUCCESS if and only if ok + /// + virtual fapi2::ReturnCode one_bad_dq(repair_state_machine<T>& io_machine, + const fapi2::Target<T>& i_target, + const uint64_t i_rank, + const uint64_t i_dq, + fapi2::buffer<uint8_t>& io_repairs_applied, + fapi2::buffer<uint8_t>& io_repairs_exceeded) = 0; + + /// + /// @brief Perform a repair for multiple bad DQ bits in a nibble + /// @param[in,out] io_machine the repair state machine + /// @param[in] i_target the DIMM target + /// @param[in] i_rank the rank + /// @param[in,out] io_repairs_applied 8-bit mask, where a bit set means that rank had repairs applied + /// @param[in,out] io_repairs_exceeded 2-bit mask, where a bit set means that DIMM had more bad bits than could be repaired + /// @return FAPI2_RC_SUCCESS if and only if ok + /// + virtual fapi2::ReturnCode multiple_bad_dq(repair_state_machine<T>& io_machine, + const fapi2::Target<T>& i_target, + const uint64_t i_rank, + fapi2::buffer<uint8_t>& io_repairs_applied, + fapi2::buffer<uint8_t>& io_repairs_exceeded) = 0; + + protected: + /// + /// @brief Set a new state in the repair state machine + /// @param[in,out] io_machine the repair state machine + /// @param[in] i_state pointer to the new state to set + /// + void set_state(repair_state_machine<T>& io_machine, std::shared_ptr<repair_state<T>> i_state); +}; + +/// +/// @class mss::no_fails +/// @brief repair_state class for no fails (no marks applied) +/// @tparam T, the fapi2 target type of the DIMM +/// +template< fapi2::TargetType T > +class no_fails : public repair_state<T> +{ + public: + /// @brief default contructor + no_fails() = default; + /// @brief default destructor + ~no_fails() = default; + + /// + /// @brief Perform a repair for a single bad DQ bit in a nibble + /// @param[in,out] io_machine the repair state machine + /// @param[in] i_target the DIMM target + /// @param[in] i_rank the rank + /// @param[in] i_dq the DQ bit index + /// @param[in,out] io_repairs_applied 8-bit mask, where a bit set means that rank had repairs applied + /// @param[in,out] io_repairs_exceeded 2-bit mask, where a bit set means that DIMM had more bad bits than could be repaired + /// @return FAPI2_RC_SUCCESS if and only if ok + /// + fapi2::ReturnCode one_bad_dq(repair_state_machine<T>& io_machine, + const fapi2::Target<T>& i_target, + const uint64_t i_rank, + const uint64_t i_dq, + fapi2::buffer<uint8_t>& io_repairs_applied, + fapi2::buffer<uint8_t>& io_repairs_exceeded) override; + + /// + /// @brief Perform a repair for multiple bad DQ bits in a nibble + /// @param[in,out] io_machine the repair state machine + /// @param[in] i_target the DIMM target + /// @param[in] i_rank the rank + /// @param[in,out] io_repairs_applied 8-bit mask, where a bit set means that rank had repairs applied + /// @param[in,out] io_repairs_exceeded 2-bit mask, where a bit set means that DIMM had more bad bits than could be repaired + /// @return FAPI2_RC_SUCCESS if and only if ok + /// + fapi2::ReturnCode multiple_bad_dq(repair_state_machine<T>& io_machine, + const fapi2::Target<T>& i_target, + const uint64_t i_rank, + fapi2::buffer<uint8_t>& io_repairs_applied, + fapi2::buffer<uint8_t>& io_repairs_exceeded) override; +}; + +/// +/// @class mss::symbol_mark_only +/// @brief repair_state class for when only a symbol mark has been used +/// @tparam T, the fapi2 target type of the DIMM +/// +template< fapi2::TargetType T > +class symbol_mark_only : public repair_state<T> +{ + public: + /// @brief default contructor + symbol_mark_only() = default; + /// @brief default destructor + ~symbol_mark_only() = default; + + /// + /// @brief Perform a repair for a single bad DQ bit in a nibble + /// @param[in,out] io_machine the repair state machine + /// @param[in] i_target the DIMM target + /// @param[in] i_rank the rank + /// @param[in] i_dq the DQ bit index + /// @param[in,out] io_repairs_applied 8-bit mask, where a bit set means that rank had repairs applied + /// @param[in,out] io_repairs_exceeded 2-bit mask, where a bit set means that DIMM had more bad bits than could be repaired + /// @return FAPI2_RC_SUCCESS if and only if ok + /// + fapi2::ReturnCode one_bad_dq(repair_state_machine<T>& io_machine, + const fapi2::Target<T>& i_target, + const uint64_t i_rank, + const uint64_t i_dq, + fapi2::buffer<uint8_t>& io_repairs_applied, + fapi2::buffer<uint8_t>& io_repairs_exceeded) override; + + /// + /// @brief Perform a repair for multiple bad DQ bits in a nibble + /// @param[in,out] io_machine the repair state machine + /// @param[in] i_target the DIMM target + /// @param[in] i_rank the rank + /// @param[in,out] io_repairs_applied 8-bit mask, where a bit set means that rank had repairs applied + /// @param[in,out] io_repairs_exceeded 2-bit mask, where a bit set means that DIMM had more bad bits than could be repaired + /// @return FAPI2_RC_SUCCESS if and only if ok + /// + fapi2::ReturnCode multiple_bad_dq(repair_state_machine<T>& io_machine, + const fapi2::Target<T>& i_target, + const uint64_t i_rank, + fapi2::buffer<uint8_t>& io_repairs_applied, + fapi2::buffer<uint8_t>& io_repairs_exceeded) override; +}; + +/// +/// @class mss::symbol_mark_plus_unrepaired_dq +/// @brief repair_state class for when only a symbol mark has been used, and one DQ bit remains unrepaired +/// @tparam T, the fapi2 target type of the DIMM +/// +template< fapi2::TargetType T > +class symbol_mark_plus_unrepaired_dq : public repair_state<T> +{ + public: + /// @brief default contructor + symbol_mark_plus_unrepaired_dq() = default; + /// @brief default destructor + ~symbol_mark_plus_unrepaired_dq() = default; + + /// + /// @brief Perform a repair for a single bad DQ bit in a nibble + /// @param[in,out] io_machine the repair state machine + /// @param[in] i_target the DIMM target + /// @param[in] i_rank the rank + /// @param[in] i_dq the DQ bit index + /// @param[in,out] io_repairs_applied 8-bit mask, where a bit set means that rank had repairs applied + /// @param[in,out] io_repairs_exceeded 2-bit mask, where a bit set means that DIMM had more bad bits than could be repaired + /// @return FAPI2_RC_SUCCESS if and only if ok + /// + fapi2::ReturnCode one_bad_dq(repair_state_machine<T>& io_machine, + const fapi2::Target<T>& i_target, + const uint64_t i_rank, + const uint64_t i_dq, + fapi2::buffer<uint8_t>& io_repairs_applied, + fapi2::buffer<uint8_t>& io_repairs_exceeded) override; + + /// + /// @brief Perform a repair for multiple bad DQ bits in a nibble + /// @param[in,out] io_machine the repair state machine + /// @param[in] i_target the DIMM target + /// @param[in] i_rank the rank + /// @param[in,out] io_repairs_applied 8-bit mask, where a bit set means that rank had repairs applied + /// @param[in,out] io_repairs_exceeded 2-bit mask, where a bit set means that DIMM had more bad bits than could be repaired + /// @return FAPI2_RC_SUCCESS if and only if ok + /// + fapi2::ReturnCode multiple_bad_dq(repair_state_machine<T>& io_machine, + const fapi2::Target<T>& i_target, + const uint64_t i_rank, + fapi2::buffer<uint8_t>& io_repairs_applied, + fapi2::buffer<uint8_t>& io_repairs_exceeded) override; +}; + +/// +/// @class mss::chip_mark_only +/// @brief repair_state class for when only a chip mark has been used +/// @tparam T, the fapi2 target type of the DIMM +/// +template< fapi2::TargetType T > +class chip_mark_only : public repair_state<T> +{ + public: + /// @brief default contructor + chip_mark_only() = default; + /// @brief default destructor + ~chip_mark_only() = default; + + /// + /// @brief Perform a repair for a single bad DQ bit in a nibble + /// @param[in,out] io_machine the repair state machine + /// @param[in] i_target the DIMM target + /// @param[in] i_rank the rank + /// @param[in] i_dq the DQ bit index + /// @param[in,out] io_repairs_applied 8-bit mask, where a bit set means that rank had repairs applied + /// @param[in,out] io_repairs_exceeded 2-bit mask, where a bit set means that DIMM had more bad bits than could be repaired + /// @return FAPI2_RC_SUCCESS if and only if ok + /// + fapi2::ReturnCode one_bad_dq(repair_state_machine<T>& io_machine, + const fapi2::Target<T>& i_target, + const uint64_t i_rank, + const uint64_t i_dq, + fapi2::buffer<uint8_t>& io_repairs_applied, + fapi2::buffer<uint8_t>& io_repairs_exceeded) override; + + /// + /// @brief Perform a repair for multiple bad DQ bits in a nibble + /// @param[in,out] io_machine the repair state machine + /// @param[in] i_target the DIMM target + /// @param[in] i_rank the rank + /// @param[in,out] io_repairs_applied 8-bit mask, where a bit set means that rank had repairs applied + /// @param[in,out] io_repairs_exceeded 2-bit mask, where a bit set means that DIMM had more bad bits than could be repaired + /// @return FAPI2_RC_SUCCESS if and only if ok + /// + fapi2::ReturnCode multiple_bad_dq(repair_state_machine<T>& io_machine, + const fapi2::Target<T>& i_target, + const uint64_t i_rank, + fapi2::buffer<uint8_t>& io_repairs_applied, + fapi2::buffer<uint8_t>& io_repairs_exceeded) override; +}; + +/// +/// @class mss::chip_mark_only +/// @brief repair_state class for when both a chip mark and a symbol mark have been used +/// @tparam T, the fapi2 target type of the DIMM +/// +template< fapi2::TargetType T > +class chip_and_symbol_mark : public repair_state<T> +{ + public: + /// @brief default contructor + chip_and_symbol_mark() = default; + /// @brief default destructor + ~chip_and_symbol_mark() = default; + + /// + /// @brief Perform a repair for a single bad DQ bit in a nibble + /// @param[in,out] io_machine the repair state machine + /// @param[in] i_target the DIMM target + /// @param[in] i_rank the rank + /// @param[in] i_dq the DQ bit index + /// @param[in,out] io_repairs_applied 8-bit mask, where a bit set means that rank had repairs applied + /// @param[in,out] io_repairs_exceeded 2-bit mask, where a bit set means that DIMM had more bad bits than could be repaired + /// @return FAPI2_RC_SUCCESS if and only if ok + /// + fapi2::ReturnCode one_bad_dq(repair_state_machine<T>& io_machine, + const fapi2::Target<T>& i_target, + const uint64_t i_rank, + const uint64_t i_dq, + fapi2::buffer<uint8_t>& io_repairs_applied, + fapi2::buffer<uint8_t>& io_repairs_exceeded) override; + + /// + /// @brief Perform a repair for multiple bad DQ bits in a nibble + /// @param[in,out] io_machine the repair state machine + /// @param[in] i_target the DIMM target + /// @param[in] i_rank the rank + /// @param[in,out] io_repairs_applied 8-bit mask, where a bit set means that rank had repairs applied + /// @param[in,out] io_repairs_exceeded 2-bit mask, where a bit set means that DIMM had more bad bits than could be repaired + /// @return FAPI2_RC_SUCCESS if and only if ok + /// + fapi2::ReturnCode multiple_bad_dq(repair_state_machine<T>& io_machine, + const fapi2::Target<T>& i_target, + const uint64_t i_rank, + fapi2::buffer<uint8_t>& io_repairs_applied, + fapi2::buffer<uint8_t>& io_repairs_exceeded) override; +}; + +/// +/// @class mss::repair_state_machine +/// @brief state machine class used in restore_repairs_helper +/// @tparam T, the fapi2 target type of the DIMM +/// +template< fapi2::TargetType T > +class repair_state_machine +{ + public: + /// @brief constructor + repair_state_machine() + : iv_repair_state(std::make_shared<no_fails<T>>()) {} + + /// @brief default destructor + ~repair_state_machine() = default; + + /// + /// @brief Perform a repair for a single bad DQ bit in a nibble + /// @param[in] i_target the DIMM target + /// @param[in] i_rank the rank + /// @param[in] i_dq the DQ bit index + /// @param[in,out] io_repairs_applied 8-bit mask, where a bit set means that rank had repairs applied + /// @param[in,out] io_repairs_exceeded 2-bit mask, where a bit set means that DIMM had more bad bits than could be repaired + /// @return FAPI2_RC_SUCCESS if and only if ok + /// + fapi2::ReturnCode one_bad_dq(const fapi2::Target<T>& i_target, + const uint64_t i_rank, + const uint64_t i_dq, + fapi2::buffer<uint8_t>& io_repairs_applied, + fapi2::buffer<uint8_t>& io_repairs_exceeded); + + /// + /// @brief Perform a repair for multiple bad DQ bits in a nibble + /// @param[in] i_target the DIMM target + /// @param[in] i_rank the rank + /// @param[in,out] io_repairs_applied 8-bit mask, where a bit set means that rank had repairs applied + /// @param[in,out] io_repairs_exceeded 2-bit mask, where a bit set means that DIMM had more bad bits than could be repaired + /// @return FAPI2_RC_SUCCESS if and only if ok + /// + fapi2::ReturnCode multiple_bad_dq(const fapi2::Target<T>& i_target, + const uint64_t i_rank, + fapi2::buffer<uint8_t>& io_repairs_applied, + fapi2::buffer<uint8_t>& io_repairs_exceeded); + + /// + /// @brief Update the state of the state machine + /// @param[in] i_state shared pointer to the new state + /// + void update_state(std::shared_ptr<repair_state<T>> i_state) + { + iv_repair_state = i_state; + } + + private: + std::shared_ptr<repair_state<T>> iv_repair_state; +}; + +// TODO RTC: 157753 tparam R can be pulled from an MCA trait once we have it +/// +/// @brief Restore symbol and chip marks according to BAD_DQ_BITMAP attribute, helper function for unit testing +/// @tparam T, the fapi2 target type of the DIMM (derived) +/// @tparam R the maximum rank per DIMM +/// @tparam B the number of bytes per rank in the bad_dq_bitmap attribute +/// @param[in] i_target A target representing a DIMM +/// @param[in] i_bad_bits the bad bits values from the VPD, for the specified DIMM +/// @param[in,out] io_repairs_applied 8-bit mask, where a bit set means that rank had repairs applied +/// @param[in,out] io_repairs_exceeded 2-bit mask, where a bit set means that DIMM had more bad bits than could be repaired +/// @return FAPI2_RC_SUCCESS if and only if ok +/// +template< fapi2::TargetType T, uint64_t R, uint64_t B > +fapi2::ReturnCode restore_repairs_helper( const fapi2::Target<T>& i_target, + const uint8_t i_bad_bits[R][B], + fapi2::buffer<uint8_t>& io_repairs_applied, + fapi2::buffer<uint8_t>& io_repairs_exceeded); + +/// +/// @brief Restore symbol and chip marks according to BAD_DQ_BITMAP attribute +/// @tparam T, the fapi2 target type of the port (derived) +/// @param[in] i_target A target representing a port +/// @param[out] o_repairs_applied bit mask, where a bit set means a rank had repairs applied (bit0 = rank0, etc) +/// @param[out] o_repairs_exceeded bit mask, where a bit set means a DIMM had more bad bits than could be repaired (bit0 = DIMM0 etc) +/// @return FAPI2_RC_SUCCESS if and only if ok +/// +template< fapi2::TargetType T > +fapi2::ReturnCode restore_repairs( const fapi2::Target<T>& i_target, + fapi2::buffer<uint8_t>& o_repairs_applied, + fapi2::buffer<uint8_t>& o_repairs_exceeded); + }// mss #endif diff --git a/src/import/chips/p9/procedures/hwp/memory/lib/shared/mss_const.H b/src/import/chips/p9/procedures/hwp/memory/lib/shared/mss_const.H index e37d07576..5eee66a56 100644 --- a/src/import/chips/p9/procedures/hwp/memory/lib/shared/mss_const.H +++ b/src/import/chips/p9/procedures/hwp/memory/lib/shared/mss_const.H @@ -51,6 +51,7 @@ enum sizes MAX_DIMM_PER_PORT = 2, MAX_RANK_PER_DIMM = 4, NIBBLES_PER_DP = 4, + NIBBLES_PER_BYTE = 2, BITS_PER_NIBBLE = 4, BITS_PER_BYTE = 8, BITS_PER_DQS = 2, ///< Differential clock pair @@ -65,6 +66,7 @@ enum sizes MAX_DQ_NIBBLES_X4 = MAX_DQ_BITS / BITS_PER_NIBBLE, ///< For x4's there are 18 DQ nibbles for DQ 72 bits MARK_STORE_COUNT = 8, ///< Elements in a VPD mark/store array + BAD_DQ_BYTE_COUNT = 10, ///< Elements in a BAD_DQ_BITMAP attribute array BYTES_PER_GB = 1000000000, ///< Multiplier to go from GB to B T_PER_MT = 1000000, ///< Multiplier to go from MT/s to T/s diff --git a/src/import/chips/p9/procedures/hwp/memory/p9_mss_memdiag.C b/src/import/chips/p9/procedures/hwp/memory/p9_mss_memdiag.C index 8efc158b4..0bdb4de8f 100644 --- a/src/import/chips/p9/procedures/hwp/memory/p9_mss_memdiag.C +++ b/src/import/chips/p9/procedures/hwp/memory/p9_mss_memdiag.C @@ -37,21 +37,25 @@ #include <p9_mss_memdiag.H> #include <lib/utils/poll.H> +#include <lib/utils/find.H> #include <lib/utils/count_dimm.H> #include <lib/mcbist/address.H> #include <lib/mcbist/memdiags.H> #include <lib/mcbist/mcbist.H> +#include <lib/mc/port.H> +#include <lib/ecc/ecc.H> using fapi2::TARGET_TYPE_MCBIST; using fapi2::TARGET_TYPE_SYSTEM; +using fapi2::TARGET_TYPE_MCA; extern "C" { -/// -/// @brief Pattern test the DRAM -/// @param[in] i_target the McBIST of the ports of the dram you're training -/// @return FAPI2_RC_SUCCESS iff ok -/// + /// + /// @brief Pattern test the DRAM + /// @param[in] i_target the McBIST of the ports of the dram you're training + /// @return FAPI2_RC_SUCCESS iff ok + /// fapi2::ReturnCode p9_mss_memdiag( const fapi2::Target<TARGET_TYPE_MCBIST>& i_target ) { FAPI_INF("Start memdiag"); @@ -67,6 +71,54 @@ extern "C" uint8_t is_sim = false; FAPI_TRY( FAPI_ATTR_GET(fapi2::ATTR_IS_SIMULATION, fapi2::Target<TARGET_TYPE_SYSTEM>(), is_sim) ); + // Read the bad_dq_bitmap attribute and place corresponding symbol and chip marks + for (const auto& l_mca : mss::find_targets<TARGET_TYPE_MCA>(i_target)) + { + fapi2::buffer<uint8_t> l_repairs_applied; + fapi2::buffer<uint8_t> l_repairs_exceeded; + std::vector<uint64_t> l_ranks; + + FAPI_TRY( mss::restore_repairs( l_mca, l_repairs_applied, l_repairs_exceeded) ); + + // assert if we have exceeded the allowed repairs + for (const auto& l_dimm : mss::find_targets<fapi2::TARGET_TYPE_DIMM>(l_mca)) + { + FAPI_ASSERT( !(l_repairs_exceeded.getBit(mss::index(l_dimm))), + fapi2::MSS_MEMDIAGS_REPAIRS_EXCEEDED().set_TARGET(l_dimm), + "p9_mss_memdiag bad bit repairs exceeded %s", mss::c_str(l_dimm) ); + } + +#ifdef __HOSTBOOT_MODULE + // assert if both chip and symbol marks exist for any given rank + FAPI_TRY( mss::rank::ranks(l_mca, l_ranks) ); + + for (const auto l_rank : l_ranks) + { + if (l_repairs_applied.getBit(l_rank)) + { + uint64_t l_galois = 0; + mss::states l_confirmed = mss::NO; + // check for chip mark in hardware mark store + FAPI_TRY( mss::ecc::get_hwms(l_mca, l_rank, l_galois, l_confirmed) ); + + if (l_confirmed) + { + auto l_type = mss::ecc::fwms::mark_type::CHIP; + auto l_region = mss::ecc::fwms::mark_region::DISABLED; + auto l_addr = mss::mcbist::address(0); + // check for symbol mark in firmware mark store + FAPI_TRY( mss::ecc::get_fwms(l_mca, l_rank, l_galois, l_type, l_region, l_addr) ); + + FAPI_ASSERT( l_region == mss::ecc::fwms::mark_region::DISABLED, + fapi2::MSS_MEMDIAGS_CHIPMARK_AND_SYMBOLMARK().set_TARGET(l_mca).set_RANK(l_rank), + "p9_mss_memdiag both chip mark and symbol mark on rank %d: %s", l_rank, mss::c_str(l_mca) ); + } + } + } + +#endif + } + // We start the sf_init (write 0's) and it'll tickle the MCBIST complete FIR. PRD will see that // and start a background scrub. FAPI_TRY( memdiags::sf_init(i_target, mss::mcbist::PATTERN_0) ); diff --git a/src/import/chips/p9/procedures/xml/error_info/p9_memory_mss_memdiags.xml b/src/import/chips/p9/procedures/xml/error_info/p9_memory_mss_memdiags.xml index 0a7c4b183..80a6b90ee 100644 --- a/src/import/chips/p9/procedures/xml/error_info/p9_memory_mss_memdiags.xml +++ b/src/import/chips/p9/procedures/xml/error_info/p9_memory_mss_memdiags.xml @@ -5,7 +5,7 @@ <!-- --> <!-- OpenPOWER HostBoot Project --> <!-- --> -<!-- Contributors Listed Below - COPYRIGHT 2016 --> +<!-- Contributors Listed Below - COPYRIGHT 2016,2017 --> <!-- [+] International Business Machines Corp. --> <!-- --> <!-- --> @@ -61,6 +61,27 @@ <scomRegister>MCBIST_MCBEA3Q</scomRegister> </registerFfdc> + <registerFfdc> + <id>REG_FFDC_MSS_MARK_REPAIR_FAILURE</id> + <scomRegister>MCA_HWMS0</scomRegister> + <scomRegister>MCA_WDF_HWMS1</scomRegister> + <scomRegister>MCA_HWMS2</scomRegister> + <scomRegister>MCA_HWMS3</scomRegister> + <scomRegister>MCA_HWMS4</scomRegister> + <scomRegister>MCA_HWMS5</scomRegister> + <scomRegister>MCA_HWMS6</scomRegister> + <scomRegister>MCA_HWMS7</scomRegister> + + <scomRegister>MCA_FWMS0</scomRegister> + <scomRegister>MCA_WREITE_FWMS1</scomRegister> + <scomRegister>MCA_FWMS2</scomRegister> + <scomRegister>MCA_FWMS3</scomRegister> + <scomRegister>MCA_FWMS4</scomRegister> + <scomRegister>MCA_FWMS5</scomRegister> + <scomRegister>MCA_FWMS6</scomRegister> + <scomRegister>MCA_FWMS7</scomRegister> + </registerFfdc> + <hwpError> <rc>RC_MSS_MEMDIAGS_ERROR_IN_LAST_PATTERN</rc> <description>An error was caused by the last MCBIST pattern</description> @@ -246,5 +267,36 @@ </callout> </hwpError> + <hwpError> + <rc>RC_MSS_MEMDIAGS_REPAIRS_EXCEEDED</rc> + <description>A mark repair operation failed to repair enough bad bits</description> + <ffdc>TARGET</ffdc> + <collectRegisterFfdc> + <id>REG_FFDC_MSS_MARK_REPAIR_FAILURE</id> + <target>TARGET</target> + <targetType>TARGET_TYPE_MCA</targetType> + </collectRegisterFfdc> + <callout> + <target>TARGET</target> + <priority>HIGH</priority> + </callout> + </hwpError> + + <hwpError> + <rc>RC_MSS_MEMDIAGS_CHIPMARK_AND_SYMBOLMARK</rc> + <description>A mark repair operation set both a chipmark and symbol mark on one rank</description> + <ffdc>TARGET</ffdc> + <ffdc>RANK</ffdc> + <collectRegisterFfdc> + <id>REG_FFDC_MSS_MARK_REPAIR_FAILURE</id> + <target>TARGET</target> + <targetType>TARGET_TYPE_MCA</targetType> + </collectRegisterFfdc> + <callout> + <target>TARGET</target> + <priority>HIGH</priority> + </callout> + </hwpError> + </hwpErrors> |