summaryrefslogtreecommitdiffstats
path: root/src/import/chips/p9/procedures/hwp/memory
diff options
context:
space:
mode:
authorLouis Stermole <stermole@us.ibm.com>2017-01-30 10:59:43 -0600
committerDaniel M. Crowell <dcrowell@us.ibm.com>2017-02-28 16:15:19 -0500
commit8a6f2a39aa4f3cf106ee8b78fdc17c59224d4da2 (patch)
treee462c6798f1c8dd720dc191d6641a94b2bdbccce /src/import/chips/p9/procedures/hwp/memory
parent3b44d04006f31b5b88b47e0479d99ea40b030ccf (diff)
downloadtalos-hostboot-8a6f2a39aa4f3cf106ee8b78fdc17c59224d4da2.tar.gz
talos-hostboot-8a6f2a39aa4f3cf106ee8b78fdc17c59224d4da2.zip
Add MSS restore_repairs function
Change-Id: Ie1180d067cbf87f337e2ce74d2a369d29a862cb8 Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/36025 Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com> Reviewed-by: Brian R. Silver <bsilver@us.ibm.com> Reviewed-by: STEPHEN GLANCY <sglancy@us.ibm.com> Tested-by: Hostboot CI <hostboot-ci+hostboot@us.ibm.com> Reviewed-by: Zane C. Shelley <zshelle@us.ibm.com> Reviewed-by: Jennifer A. Stofer <stofer@us.ibm.com> Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/36027 Reviewed-by: Hostboot Team <hostboot@us.ibm.com> Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com> Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com> Reviewed-by: Daniel M. Crowell <dcrowell@us.ibm.com>
Diffstat (limited to 'src/import/chips/p9/procedures/hwp/memory')
-rw-r--r--src/import/chips/p9/procedures/hwp/memory/lib/ecc/ecc.H38
-rw-r--r--src/import/chips/p9/procedures/hwp/memory/lib/ecc/galois.H40
-rw-r--r--src/import/chips/p9/procedures/hwp/memory/lib/mc/port.C507
-rw-r--r--src/import/chips/p9/procedures/hwp/memory/lib/mc/port.H416
-rw-r--r--src/import/chips/p9/procedures/hwp/memory/lib/shared/mss_const.H2
-rw-r--r--src/import/chips/p9/procedures/hwp/memory/p9_mss_memdiag.C62
6 files changed, 1060 insertions, 5 deletions
diff --git a/src/import/chips/p9/procedures/hwp/memory/lib/ecc/ecc.H b/src/import/chips/p9/procedures/hwp/memory/lib/ecc/ecc.H
index 1ba343e68..1bd25dda8 100644
--- a/src/import/chips/p9/procedures/hwp/memory/lib/ecc/ecc.H
+++ b/src/import/chips/p9/procedures/hwp/memory/lib/ecc/ecc.H
@@ -109,6 +109,7 @@ inline fapi2::ReturnCode set_hwms( const fapi2::Target<T>& i_target,
mss::ecc::hwms::set_chipmark(l_buffer, i_galois);
mss::ecc::hwms::set_confirmed(l_buffer, mss::YES);
+ mss::ecc::hwms::set_exit_1(l_buffer, mss::YES);
}
FAPI_TRY( mss::ecc::hwms::write(i_target, i_rank, l_buffer) );
@@ -189,11 +190,46 @@ fapi_try_exit:
}
///
+/// @brief Query Hardware Marks
+/// @tparam T the fapi2::TargetType - derived
+/// @param[in] i_target the fapi2 target
+/// @param[out] o_marks vector of Galois codes of any marks set
+/// @return fapi2::ReturnCode FAPI2_RC_SUCCESS if ok
+/// @note no rank information is returned
+///
+template< fapi2::TargetType T >
+inline fapi2::ReturnCode get_hw_marks( const fapi2::Target<T>& i_target,
+ std::vector<uint64_t>& o_marks )
+{
+ fapi2::buffer<uint64_t> l_buffer;
+ uint64_t l_galois = 0;
+ auto l_confirmed = mss::states::NO;
+
+ o_marks.clear();
+
+ for (uint64_t l_rank = 0; l_rank < MAX_MRANK_PER_PORT; ++l_rank)
+ {
+ FAPI_TRY( get_hwms(i_target, l_rank, l_galois, l_confirmed) );
+
+ if (l_confirmed == mss::states::YES)
+ {
+ o_marks.push_back(l_galois);
+ }
+ }
+
+ return fapi2::FAPI2_RC_SUCCESS;
+
+fapi_try_exit:
+ return fapi2::current_err;
+}
+
+///
/// @brief Query Firmware Marks
/// @tparam T the fapi2::TargetType - derived
/// @param[in] i_target the fapi2 target
/// @param[out] o_marks vector of Galois codes of any marks set
/// @return fapi2::ReturnCode FAPI2_RC_SUCCESS if ok
+/// @note no rank information is returned
///
template< fapi2::TargetType T >
inline fapi2::ReturnCode get_fw_marks( const fapi2::Target<T>& i_target,
@@ -217,6 +253,8 @@ inline fapi2::ReturnCode get_fw_marks( const fapi2::Target<T>& i_target,
}
}
+ return fapi2::FAPI2_RC_SUCCESS;
+
fapi_try_exit:
return fapi2::current_err;
}
diff --git a/src/import/chips/p9/procedures/hwp/memory/lib/ecc/galois.H b/src/import/chips/p9/procedures/hwp/memory/lib/ecc/galois.H
index 4e763714e..3161b1759 100644
--- a/src/import/chips/p9/procedures/hwp/memory/lib/ecc/galois.H
+++ b/src/import/chips/p9/procedures/hwp/memory/lib/ecc/galois.H
@@ -132,6 +132,46 @@ fapi2::ReturnCode symbol_to_dq( const uint8_t i_symbol, uint8_t& o_dq )
return fapi2::FAPI2_RC_SUCCESS;
}
+///
+/// @brief Return DQ index from a given Galois code
+/// @tparam T fapi2 Target Type defaults to TARGET_TYPE_MCA
+/// @tparam TT traits type defaults to eccTraits<T>
+/// @param[in] i_galois the Galois code
+/// @param[out] o_dq DQ index represented by given Galois code
+/// @return FAPI2_RC_SUCCESS iff all is ok
+///
+template< fapi2::TargetType T = fapi2::TARGET_TYPE_MCA, typename TT = eccTraits<T> >
+fapi2::ReturnCode galois_to_dq( const uint8_t i_galois, uint8_t& o_dq )
+{
+ uint8_t l_symbol = 0;
+
+ FAPI_TRY( galois_to_symbol<T>(i_galois, l_symbol) );
+ FAPI_TRY( symbol_to_dq<T>(l_symbol, o_dq) );
+
+fapi_try_exit:
+ return fapi2::current_err;
+}
+
+///
+/// @brief Return Galois code from a given DQ index
+/// @tparam T fapi2 Target Type defaults to TARGET_TYPE_MCA
+/// @tparam TT traits type defaults to eccTraits<T>
+/// @param[in] i_dq the DQ index
+/// @param[out] o_galois Galois code represented by given symbol
+/// @return FAPI2_RC_SUCCESS iff all is ok
+///
+template< fapi2::TargetType T = fapi2::TARGET_TYPE_MCA, typename TT = eccTraits<T> >
+fapi2::ReturnCode dq_to_galois( const uint8_t i_dq, uint8_t& o_galois )
+{
+ uint8_t l_symbol = 0;
+
+ FAPI_TRY( mss::ecc::dq_to_symbol<T>(i_dq, l_symbol) );
+ FAPI_TRY( mss::ecc::symbol_to_galois<T>(l_symbol, o_galois) );
+
+fapi_try_exit:
+ return fapi2::current_err;
+}
+
} // close namespace ecc
} // close namespace mss
diff --git a/src/import/chips/p9/procedures/hwp/memory/lib/mc/port.C b/src/import/chips/p9/procedures/hwp/memory/lib/mc/port.C
index 979ca5eb8..0703fd828 100644
--- a/src/import/chips/p9/procedures/hwp/memory/lib/mc/port.C
+++ b/src/import/chips/p9/procedures/hwp/memory/lib/mc/port.C
@@ -37,6 +37,7 @@
#include <lib/mc/port.H>
#include <lib/shared/mss_const.H>
#include <lib/utils/scom.H>
+#include <lib/ecc/ecc.H>
namespace mss
{
@@ -302,4 +303,510 @@ fapi_try_exit:
return fapi2::current_err;
}
+///
+/// @brief Convert a bitmap from the BAD_DQ_BITMAP attribute to a vector of bad DQ indexes
+/// @param[in] i_bad_bits an 8-bit bitmap of bad bits
+/// @param[in] i_nibble which nibble of the bitmap to convert
+/// @return std::vector of DQ bits marked as bad in the bitmap
+///
+std::vector<uint64_t> bad_bit_helper(const uint8_t i_bad_bits, const size_t i_nibble)
+{
+ std::vector<uint64_t> l_output;
+ fapi2::buffer<uint8_t> l_bit_buffer(i_bad_bits);
+
+ const size_t l_start = (i_nibble == 0) ? 0 : BITS_PER_NIBBLE;
+
+ for (size_t l_offset = 0; l_offset < BITS_PER_NIBBLE; ++l_offset)
+ {
+ if (l_bit_buffer.getBit(l_start + l_offset))
+ {
+ l_output.push_back(l_start + l_offset);
+ }
+ }
+
+ return l_output;
+}
+
+///
+/// @brief Place a symbol mark in a Firmware Mark Store register
+/// @param[in] i_target the DIMM target
+/// @param[in] i_rank the rank
+/// @param[in] i_dq the bad DQ bit
+/// @return FAPI2_RC_SUCCESS if and only if ok
+///
+template<>
+fapi2::ReturnCode place_symbol_mark(const fapi2::Target<fapi2::TARGET_TYPE_DIMM>& i_target,
+ const uint64_t i_rank,
+ const uint64_t i_dq)
+{
+ const auto& l_mca = mss::find_target<fapi2::TARGET_TYPE_MCA>(i_target);
+ const auto l_dimm_idx = mss::index(i_target);
+ const auto l_rank_idx = mss::index(i_rank);
+
+ uint8_t l_galois = 0;
+ mss::mcbist::address l_addr;
+
+ // For symbol marks, we set the appropriate Firmware Mark Store reg, with the symbol's
+ // Galois code, mark_type=SYMBOL, mark_region=MRANK, and the address of the DIMM+MRANK
+ // TODO RTC:165133 Remove static_cast once Galois API is updated to accept uint64_t input
+ FAPI_TRY( mss::ecc::dq_to_galois(static_cast<uint8_t>(i_dq), l_galois) );
+
+ l_addr.set_dimm(l_dimm_idx).set_master_rank(l_rank_idx);
+
+ FAPI_DBG("Setting firmware symbol mark on rank:%d dq:%d galois:0x%02x", i_rank, i_dq, l_galois);
+ FAPI_TRY( mss::ecc::set_fwms(l_mca, i_rank, l_galois, mss::ecc::fwms::mark_type::SYMBOL,
+ mss::ecc::fwms::mark_region::MRANK, l_addr) );
+
+fapi_try_exit:
+ return fapi2::current_err;
+}
+
+///
+/// @brief Place a chip mark in a Hardware Mark Store register
+/// @param[in] i_target the DIMM target
+/// @param[in] i_rank the rank
+/// @return FAPI2_RC_SUCCESS if and only if ok
+///
+template<>
+fapi2::ReturnCode place_chip_mark(const fapi2::Target<fapi2::TARGET_TYPE_DIMM>& i_target,
+ const uint64_t i_rank)
+{
+ const auto& l_mca = mss::find_target<fapi2::TARGET_TYPE_MCA>(i_target);
+
+ uint8_t l_galois = 0;
+
+ // For chip marks, we set the appropriate Hardware Mark Store reg, with the DIMM's
+ // symbol[0] Galois code, and both confirmed and exit1 bits set
+ FAPI_TRY( mss::ecc::symbol_to_galois(0, l_galois) );
+
+ FAPI_DBG("Setting hardware (chip) mark on rank:%d galois:0x%02x", i_rank, l_galois);
+ FAPI_TRY( mss::ecc::set_hwms(l_mca, i_rank, l_galois) );
+
+fapi_try_exit:
+ return fapi2::current_err;
+}
+
+///
+/// @brief Restore symbol and chip marks according to BAD_DQ_BITMAP attribute, helper function for unit testing
+/// Specialization for TARGET_TYPE_DIMM
+/// @param[in] i_target the DIMM target
+/// @param[in] i_bad_bits the bad bits values from the VPD, for the specified DIMM
+/// @param[out] o_repairs_applied 8-bit mask, where a bit set means a rank had repairs applied (bit0-7 = rank0-7)
+/// @param[out] o_repairs_exceeded 2-bit mask, where a bit set means a DIMM had more bad bits than could be repaired (bit0-1 = DIMM0-1)
+/// @return FAPI2_RC_SUCCESS if and only if ok
+///
+// TODO RTC:157753 Template parameters here are Nimbus specific. Convert to attribute/trait of TARGET_TYPE_MCA when traits are created.
+template<>
+fapi2::ReturnCode restore_repairs_helper<fapi2::TARGET_TYPE_DIMM, MAX_RANK_PER_DIMM, BAD_DQ_BYTE_COUNT>(
+ const fapi2::Target<fapi2::TARGET_TYPE_DIMM>& i_target,
+ const uint8_t i_bad_bits[MAX_RANK_PER_DIMM][BAD_DQ_BYTE_COUNT],
+ fapi2::buffer<uint8_t>& o_repairs_applied,
+ fapi2::buffer<uint8_t>& o_repairs_exceeded)
+{
+ FAPI_INF("Restore repair marks from bad DQ data");
+
+ std::vector<uint64_t> l_ranks;
+ const auto l_dimm_idx = mss::index(i_target);
+
+ FAPI_TRY( mss::rank::ranks(i_target, l_ranks) );
+
+ // loop through ranks
+ for (const auto l_rank : l_ranks)
+ {
+ const auto l_rank_idx = mss::index(l_rank);
+
+ repair_state_machine<fapi2::TARGET_TYPE_DIMM> l_machine;
+
+ // loop through bytes
+ for (uint64_t l_byte = 0; l_byte < (MAX_DQ_NIBBLES_X4 / NIBBLES_PER_BYTE); ++l_byte)
+ {
+ for (size_t l_nibble = 0; l_nibble < NIBBLES_PER_BYTE; ++l_nibble)
+ {
+ const auto l_bad_dq_vector = bad_bit_helper(i_bad_bits[l_rank_idx][l_byte], l_nibble);
+ FAPI_DBG("Total bad bits on DIMM:%d rank:%d nibble%d: %d", l_dimm_idx, l_rank, (l_byte * NIBBLES_PER_BYTE) + l_nibble,
+ l_bad_dq_vector.size());
+
+ // apply repairs and update repair machine state
+ // if there are no bad bits (l_bad_dq_vector.size() == 0) no action is necessary
+ if (l_bad_dq_vector.size() == 1)
+ {
+ FAPI_TRY( l_machine.one_bad_dq(i_target, l_rank, (l_bad_dq_vector[0] + (l_byte * BITS_PER_BYTE)),
+ o_repairs_applied, o_repairs_exceeded) );
+ }
+ else if (l_bad_dq_vector.size() > 1)
+ {
+ FAPI_TRY( l_machine.multiple_bad_dq(i_target, l_rank, o_repairs_applied, o_repairs_exceeded) );
+ }
+
+ // if repairs have been exceeded, we're done
+ if (o_repairs_exceeded.getBit(l_dimm_idx))
+ {
+ FAPI_INF("Repairs exceeded on DIMM %s", mss::c_str(i_target));
+ return fapi2::FAPI2_RC_SUCCESS;
+ }
+ } // end loop through nibbles
+ } // end loop through bytes
+ } // end loop through ranks
+
+fapi_try_exit:
+ return fapi2::current_err;
+}
+
+///
+/// @brief Restore symbol and chip marks according to BAD_DQ_BITMAP attribute
+/// Specialization for TARGET_TYPE_MCA
+/// @param[in] i_target A target representing a port
+/// @param[out] o_repairs_applied 8-bit mask, where a bit set means a rank had repairs applied (bit0-7 = rank0-7)
+/// @param[out] o_repairs_exceeded 2-bit mask, where a bit set means a DIMM had more bad bits than could be repaired (bit0-1 = DIMM0-1)
+/// @return FAPI2_RC_SUCCESS if and only if ok
+///
+template<>
+fapi2::ReturnCode restore_repairs( const fapi2::Target<fapi2::TARGET_TYPE_MCA>& i_target,
+ fapi2::buffer<uint8_t>& o_repairs_applied,
+ fapi2::buffer<uint8_t>& o_repairs_exceeded)
+{
+ uint8_t l_bad_bits[MAX_DIMM_PER_PORT][MAX_RANK_PER_DIMM][BAD_DQ_BYTE_COUNT];
+
+ FAPI_TRY( mss::bad_dq_bitmap(i_target, &(l_bad_bits[0][0][0])) );
+
+ o_repairs_applied = 0;
+ o_repairs_exceeded = 0;
+
+ for (const auto& l_dimm : mss::find_targets<fapi2::TARGET_TYPE_DIMM>(i_target))
+ {
+ FAPI_TRY( (restore_repairs_helper<fapi2::TARGET_TYPE_DIMM, MAX_RANK_PER_DIMM, BAD_DQ_BYTE_COUNT>(
+ l_dimm, l_bad_bits[mss::index(l_dimm)], o_repairs_applied, o_repairs_exceeded)) );
+ }
+
+fapi_try_exit:
+ return fapi2::current_err;
+}
+
+///
+/// @brief Set a new state in the repair state machine
+/// @tparam T, the fapi2 target type of the DIMM
+/// @param[in,out] io_machine the repair state machine
+/// @param[in] i_state shared pointer to the new state to set
+///
+template< fapi2::TargetType T >
+void repair_state<T>::set_state(repair_state_machine<T>& io_machine, std::shared_ptr<repair_state<T>> i_state)
+{
+ io_machine.update_state(i_state);
+}
+
+///
+/// @brief Perform a repair for a single bad DQ bit in a nibble
+/// Specialization for TARGET_TYPE_DIMM
+/// @param[in,out] io_machine the repair state machine
+/// @param[in] i_target the DIMM target
+/// @param[in] i_rank the rank
+/// @param[in] i_dq the DQ bit index
+/// @param[in,out] io_repairs_applied 8-bit mask, where a bit set means that rank had repairs applied
+/// @param[in,out] io_repairs_exceeded 2-bit mask, where a bit set means that DIMM had more bad bits than could be repaired
+/// @return FAPI2_RC_SUCCESS if and only if ok
+///
+template<>
+fapi2::ReturnCode no_fails<fapi2::TARGET_TYPE_DIMM>::one_bad_dq(repair_state_machine<fapi2::TARGET_TYPE_DIMM>&
+ io_machine,
+ const fapi2::Target<fapi2::TARGET_TYPE_DIMM>& i_target,
+ const uint64_t i_rank,
+ const uint64_t i_dq,
+ fapi2::buffer<uint8_t>& io_repairs_applied,
+ fapi2::buffer<uint8_t>& io_repairs_exceeded)
+{
+ // place a symbol mark
+ FAPI_TRY( place_symbol_mark(i_target, i_rank, i_dq) );
+ io_repairs_applied.setBit(i_rank);
+ {
+ const auto new_state = std::make_shared<symbol_mark_only<fapi2::TARGET_TYPE_DIMM>>();
+ set_state(io_machine, new_state);
+ }
+fapi_try_exit:
+ return fapi2::current_err;
+}
+
+///
+/// @brief Perform a repair for multiple bad DQ bits in a nibble
+/// Specialization for TARGET_TYPE_DIMM
+/// @param[in,out] io_machine the repair state machine
+/// @param[in] i_target the DIMM target
+/// @param[in] i_rank the rank
+/// @param[in,out] io_repairs_applied 8-bit mask, where a bit set means that rank had repairs applied
+/// @param[in,out] io_repairs_exceeded 2-bit mask, where a bit set means that DIMM had more bad bits than could be repaired
+/// @return FAPI2_RC_SUCCESS if and only if ok
+///
+template<>
+fapi2::ReturnCode no_fails<fapi2::TARGET_TYPE_DIMM>::multiple_bad_dq(repair_state_machine<fapi2::TARGET_TYPE_DIMM>&
+ io_machine,
+ const fapi2::Target<fapi2::TARGET_TYPE_DIMM>& i_target,
+ const uint64_t i_rank,
+ fapi2::buffer<uint8_t>& io_repairs_applied,
+ fapi2::buffer<uint8_t>& io_repairs_exceeded)
+{
+ // place a chip mark
+ FAPI_TRY( place_chip_mark(i_target, i_rank) );
+ io_repairs_applied.setBit(i_rank);
+ {
+ const auto new_state = std::make_shared<chip_mark_only<fapi2::TARGET_TYPE_DIMM>>();
+ set_state(io_machine, new_state);
+ }
+fapi_try_exit:
+ return fapi2::current_err;
+}
+
+///
+/// @brief Perform a repair for a single bad DQ bit in a nibble
+/// Specialization for TARGET_TYPE_DIMM
+/// @param[in,out] io_machine the repair state machine
+/// @param[in] i_target the DIMM target
+/// @param[in] i_rank the rank
+/// @param[in] i_dq the DQ bit index
+/// @param[in,out] io_repairs_applied 8-bit mask, where a bit set means that rank had repairs applied
+/// @param[in,out] io_repairs_exceeded 2-bit mask, where a bit set means that DIMM had more bad bits than could be repaired
+/// @return FAPI2_RC_SUCCESS if and only if ok
+///
+template<>
+fapi2::ReturnCode symbol_mark_only<fapi2::TARGET_TYPE_DIMM>::one_bad_dq(repair_state_machine<fapi2::TARGET_TYPE_DIMM>&
+ io_machine,
+ const fapi2::Target<fapi2::TARGET_TYPE_DIMM>& i_target,
+ const uint64_t i_rank,
+ const uint64_t i_dq,
+ fapi2::buffer<uint8_t>& io_repairs_applied,
+ fapi2::buffer<uint8_t>& io_repairs_exceeded)
+{
+ // leave an unrepaired DQ
+ const auto new_state = std::make_shared<symbol_mark_plus_unrepaired_dq<fapi2::TARGET_TYPE_DIMM>>();
+ set_state(io_machine, new_state);
+ return fapi2::FAPI2_RC_SUCCESS;
+}
+
+///
+/// @brief Perform a repair for multiple bad DQ bits in a nibble
+/// Specialization for TARGET_TYPE_DIMM
+/// @param[in,out] io_machine the repair state machine
+/// @param[in] i_target the DIMM target
+/// @param[in] i_rank the rank
+/// @param[in,out] io_repairs_applied 8-bit mask, where a bit set means that rank had repairs applied
+/// @param[in,out] io_repairs_exceeded 2-bit mask, where a bit set means that DIMM had more bad bits than could be repaired
+/// @return FAPI2_RC_SUCCESS if and only if ok
+///
+template<>
+fapi2::ReturnCode symbol_mark_only<fapi2::TARGET_TYPE_DIMM>::multiple_bad_dq(
+ repair_state_machine<fapi2::TARGET_TYPE_DIMM>& io_machine,
+ const fapi2::Target<fapi2::TARGET_TYPE_DIMM>& i_target,
+ const uint64_t i_rank,
+ fapi2::buffer<uint8_t>& io_repairs_applied,
+ fapi2::buffer<uint8_t>& io_repairs_exceeded)
+{
+ // place a chip mark
+ FAPI_TRY( place_chip_mark(i_target, i_rank) );
+ io_repairs_applied.setBit(i_rank);
+ {
+ const auto new_state = std::make_shared<chip_and_symbol_mark<fapi2::TARGET_TYPE_DIMM>>();
+ set_state(io_machine, new_state);
+ }
+fapi_try_exit:
+ return fapi2::current_err;
+}
+
+///
+/// @brief Perform a repair for a single bad DQ bit in a nibble
+/// Specialization for TARGET_TYPE_DIMM
+/// @param[in,out] io_machine the repair state machine
+/// @param[in] i_target the DIMM target
+/// @param[in] i_rank the rank
+/// @param[in] i_dq the DQ bit index
+/// @param[in,out] io_repairs_applied 8-bit mask, where a bit set means that rank had repairs applied
+/// @param[in,out] io_repairs_exceeded 2-bit mask, where a bit set means that DIMM had more bad bits than could be repaired
+/// @return FAPI2_RC_SUCCESS if and only if ok
+///
+template<>
+fapi2::ReturnCode symbol_mark_plus_unrepaired_dq<fapi2::TARGET_TYPE_DIMM>::one_bad_dq(
+ repair_state_machine<fapi2::TARGET_TYPE_DIMM>& io_machine,
+ const fapi2::Target<fapi2::TARGET_TYPE_DIMM>& i_target,
+ const uint64_t i_rank,
+ const uint64_t i_dq,
+ fapi2::buffer<uint8_t>& io_repairs_applied,
+ fapi2::buffer<uint8_t>& io_repairs_exceeded)
+{
+ // repairs exceeded
+ io_repairs_exceeded.setBit(mss::index(i_target));
+ return fapi2::FAPI2_RC_SUCCESS;
+}
+
+///
+/// @brief Perform a repair for multiple bad DQ bits in a nibble
+/// Specialization for TARGET_TYPE_DIMM
+/// @param[in,out] io_machine the repair state machine
+/// @param[in] i_target the DIMM target
+/// @param[in] i_rank the rank
+/// @param[in,out] io_repairs_applied 8-bit mask, where a bit set means that rank had repairs applied
+/// @param[in,out] io_repairs_exceeded 2-bit mask, where a bit set means that DIMM had more bad bits than could be repaired
+/// @return FAPI2_RC_SUCCESS if and only if ok
+///
+template<>
+fapi2::ReturnCode symbol_mark_plus_unrepaired_dq<fapi2::TARGET_TYPE_DIMM>::multiple_bad_dq(
+ repair_state_machine<fapi2::TARGET_TYPE_DIMM>& io_machine,
+ const fapi2::Target<fapi2::TARGET_TYPE_DIMM>& i_target,
+ const uint64_t i_rank,
+ fapi2::buffer<uint8_t>& io_repairs_applied,
+ fapi2::buffer<uint8_t>& io_repairs_exceeded)
+{
+ // place a chip mark, but also repairs exceeded
+ FAPI_TRY( place_chip_mark(i_target, i_rank) );
+ io_repairs_applied.setBit(i_rank);
+ io_repairs_exceeded.setBit(mss::index(i_target));
+ {
+ const auto new_state = std::make_shared<chip_and_symbol_mark<fapi2::TARGET_TYPE_DIMM>>();
+ set_state(io_machine, new_state);
+ }
+fapi_try_exit:
+ return fapi2::current_err;
+}
+
+///
+/// @brief Perform a repair for a single bad DQ bit in a nibble
+/// Specialization for TARGET_TYPE_DIMM
+/// @param[in,out] io_machine the repair state machine
+/// @param[in] i_target the DIMM target
+/// @param[in] i_rank the rank
+/// @param[in] i_dq the DQ bit index
+/// @param[in,out] io_repairs_applied 8-bit mask, where a bit set means that rank had repairs applied
+/// @param[in,out] io_repairs_exceeded 2-bit mask, where a bit set means that DIMM had more bad bits than could be repaired
+/// @return FAPI2_RC_SUCCESS if and only if ok
+///
+template<>
+fapi2::ReturnCode chip_mark_only<fapi2::TARGET_TYPE_DIMM>::one_bad_dq(
+ repair_state_machine<fapi2::TARGET_TYPE_DIMM>&
+ io_machine,
+ const fapi2::Target<fapi2::TARGET_TYPE_DIMM>& i_target,
+ const uint64_t i_rank,
+ const uint64_t i_dq,
+ fapi2::buffer<uint8_t>& io_repairs_applied,
+ fapi2::buffer<uint8_t>& io_repairs_exceeded)
+{
+ // place a symbol mark
+ FAPI_TRY( place_symbol_mark(i_target, i_rank, i_dq) );
+ io_repairs_applied.setBit(i_rank);
+ {
+ const auto new_state = std::make_shared<chip_and_symbol_mark<fapi2::TARGET_TYPE_DIMM>>();
+ set_state(io_machine, new_state);
+ }
+fapi_try_exit:
+ return fapi2::current_err;
+}
+
+///
+/// @brief Perform a repair for multiple bad DQ bits in a nibble
+/// Specialization for TARGET_TYPE_DIMM
+/// @param[in,out] io_machine the repair state machine
+/// @param[in] i_target the DIMM target
+/// @param[in] i_rank the rank
+/// @param[in,out] io_repairs_applied 8-bit mask, where a bit set means that rank had repairs applied
+/// @param[in,out] io_repairs_exceeded 2-bit mask, where a bit set means that DIMM had more bad bits than could be repaired
+/// @return FAPI2_RC_SUCCESS if and only if ok
+///
+template<>
+fapi2::ReturnCode chip_mark_only<fapi2::TARGET_TYPE_DIMM>::multiple_bad_dq(
+ repair_state_machine<fapi2::TARGET_TYPE_DIMM>& io_machine,
+ const fapi2::Target<fapi2::TARGET_TYPE_DIMM>& i_target,
+ const uint64_t i_rank,
+ fapi2::buffer<uint8_t>& io_repairs_applied,
+ fapi2::buffer<uint8_t>& io_repairs_exceeded)
+{
+ // repairs exceeded
+ io_repairs_exceeded.setBit(mss::index(i_target));
+ return fapi2::FAPI2_RC_SUCCESS;
+}
+
+///
+/// @brief Perform a repair for a single bad DQ bit in a nibble
+/// Specialization for TARGET_TYPE_DIMM
+/// @param[in,out] io_machine the repair state machine
+/// @param[in] i_target the DIMM target
+/// @param[in] i_rank the rank
+/// @param[in] i_dq the DQ bit index
+/// @param[in,out] io_repairs_applied 8-bit mask, where a bit set means that rank had repairs applied
+/// @param[in,out] io_repairs_exceeded 2-bit mask, where a bit set means that DIMM had more bad bits than could be repaired
+/// @return FAPI2_RC_SUCCESS if and only if ok
+///
+template<>
+fapi2::ReturnCode chip_and_symbol_mark<fapi2::TARGET_TYPE_DIMM>::one_bad_dq(
+ repair_state_machine<fapi2::TARGET_TYPE_DIMM>& io_machine,
+ const fapi2::Target<fapi2::TARGET_TYPE_DIMM>& i_target,
+ const uint64_t i_rank,
+ const uint64_t i_dq,
+ fapi2::buffer<uint8_t>& io_repairs_applied,
+ fapi2::buffer<uint8_t>& io_repairs_exceeded)
+{
+ // repairs exceeded
+ io_repairs_exceeded.setBit(mss::index(i_target));
+ return fapi2::FAPI2_RC_SUCCESS;
+}
+
+///
+/// @brief Perform a repair for multiple bad DQ bits in a nibble
+/// Specialization for TARGET_TYPE_DIMM
+/// @param[in,out] io_machine the repair state machine
+/// @param[in] i_target the DIMM target
+/// @param[in] i_rank the rank
+/// @param[in,out] io_repairs_applied 8-bit mask, where a bit set means that rank had repairs applied
+/// @param[in,out] io_repairs_exceeded 2-bit mask, where a bit set means that DIMM had more bad bits than could be repaired
+/// @return FAPI2_RC_SUCCESS if and only if ok
+///
+template<>
+fapi2::ReturnCode chip_and_symbol_mark<fapi2::TARGET_TYPE_DIMM>::multiple_bad_dq(
+ repair_state_machine<fapi2::TARGET_TYPE_DIMM>& io_machine,
+ const fapi2::Target<fapi2::TARGET_TYPE_DIMM>& i_target,
+ const uint64_t i_rank,
+ fapi2::buffer<uint8_t>& io_repairs_applied,
+ fapi2::buffer<uint8_t>& io_repairs_exceeded)
+{
+ // repairs exceeded
+ io_repairs_exceeded.setBit(mss::index(i_target));
+ return fapi2::FAPI2_RC_SUCCESS;
+}
+
+///
+/// @brief Perform a repair for a single bad DQ bit in a nibble
+/// @tparam T, the fapi2 target type of the DIMM
+/// @param[in] i_target the DIMM target
+/// @param[in] i_rank the rank
+/// @param[in] i_dq the DQ bit index
+/// @param[in,out] io_repairs_applied 8-bit mask, where a bit set means that rank had repairs applied
+/// @param[in,out] io_repairs_exceeded 2-bit mask, where a bit set means that DIMM had more bad bits than could be repaired
+/// @return FAPI2_RC_SUCCESS if and only if ok
+///
+template< fapi2::TargetType T >
+fapi2::ReturnCode repair_state_machine<T>::one_bad_dq(const fapi2::Target<T>& i_target,
+ const uint64_t i_rank,
+ const uint64_t i_dq,
+ fapi2::buffer<uint8_t>& io_repairs_applied,
+ fapi2::buffer<uint8_t>& io_repairs_exceeded)
+{
+ FAPI_TRY( iv_repair_state->one_bad_dq(*this, i_target, i_rank, i_dq, io_repairs_applied, io_repairs_exceeded) );
+fapi_try_exit:
+ return fapi2::current_err;
+}
+
+///
+/// @brief Perform a repair for multiple bad DQ bits in a nibble
+/// @tparam T, the fapi2 target type of the DIMM
+/// @param[in] i_target the DIMM target
+/// @param[in] i_rank the rank
+/// @param[in,out] io_repairs_applied 8-bit mask, where a bit set means that rank had repairs applied
+/// @param[in,out] io_repairs_exceeded 2-bit mask, where a bit set means that DIMM had more bad bits than could be repaired
+/// @return FAPI2_RC_SUCCESS if and only if ok
+///
+template< fapi2::TargetType T >
+fapi2::ReturnCode repair_state_machine<T>::multiple_bad_dq(const fapi2::Target<T>& i_target,
+ const uint64_t i_rank,
+ fapi2::buffer<uint8_t>& io_repairs_applied,
+ fapi2::buffer<uint8_t>& io_repairs_exceeded)
+{
+ FAPI_TRY( iv_repair_state->multiple_bad_dq(*this, i_target, i_rank, io_repairs_applied, io_repairs_exceeded) );
+fapi_try_exit:
+ return fapi2::current_err;
+}
+
} // ns mss
diff --git a/src/import/chips/p9/procedures/hwp/memory/lib/mc/port.H b/src/import/chips/p9/procedures/hwp/memory/lib/mc/port.H
index 208df154c..190983f1e 100644
--- a/src/import/chips/p9/procedures/hwp/memory/lib/mc/port.H
+++ b/src/import/chips/p9/procedures/hwp/memory/lib/mc/port.H
@@ -46,6 +46,7 @@
#include <lib/utils/scom.H>
#include <lib/dimm/rank.H>
#include <c_str.H>
+#include <lib/mcbist/address.H>
namespace mss
{
@@ -760,6 +761,421 @@ fapi_try_exit:
return fapi2::current_err;
}
+///
+/// @brief Convert a bitmap from the BAD_DQ_BITMAP attribute to a vector of bad DQ indexes
+/// @param[in] i_bad_bits an 8-bit bitmap of bad bits
+/// @param[in] i_nibble which nibble of the bitmap to convert
+/// @return std::vector of DQ bits marked as bad in the bitmap
+///
+std::vector<uint64_t> bad_bit_helper(const uint8_t i_bad_bits, const size_t i_nibble);
+
+///
+/// @brief Place a symbol mark in a Firmware Mark Store register
+/// @tparam T, the fapi2 target type of the DIMM (derived)
+/// @param[in] i_target the DIMM target
+/// @param[in] i_rank the rank
+/// @param[in] i_dq the bad DQ bit
+/// @return FAPI2_RC_SUCCESS if and only if ok
+///
+template< fapi2::TargetType T >
+fapi2::ReturnCode place_symbol_mark(const fapi2::Target<T>& i_target,
+ const uint64_t i_rank,
+ const uint64_t i_dq);
+
+///
+/// @brief Place a chip mark in a Hardware Mark Store register
+/// @tparam T, the fapi2 target type of the DIMM (derived)
+/// @param[in] i_target the DIMM target
+/// @param[in] i_rank the rank
+/// @return FAPI2_RC_SUCCESS if and only if ok
+///
+template< fapi2::TargetType T >
+fapi2::ReturnCode place_chip_mark(const fapi2::Target<T>& i_target,
+ const uint64_t i_rank);
+
+// Forward declaration for use in repair_state classes
+template< fapi2::TargetType T >
+class repair_state_machine;
+
+///
+/// @class mss::repair_state
+/// @brief A class for keeping track of bad bit repair states in a repair_state_machine
+/// @tparam T, the fapi2 target type of the DIMM
+/// @note this is a base class
+///
+template< fapi2::TargetType T >
+class repair_state
+{
+ public:
+ /// @brief default contructor
+ repair_state() = default;
+ /// @brief default destructor
+ virtual ~repair_state() = default;
+
+ ///
+ /// @brief Perform a repair for a single bad DQ bit in a nibble
+ /// @param[in,out] io_machine the repair state machine
+ /// @param[in] i_target the DIMM target
+ /// @param[in] i_rank the rank
+ /// @param[in] i_dq the DQ bit index
+ /// @param[in,out] io_repairs_applied 8-bit mask, where a bit set means that rank had repairs applied
+ /// @param[in,out] io_repairs_exceeded 2-bit mask, where a bit set means that DIMM had more bad bits than could be repaired
+ /// @return FAPI2_RC_SUCCESS if and only if ok
+ ///
+ virtual fapi2::ReturnCode one_bad_dq(repair_state_machine<T>& io_machine,
+ const fapi2::Target<T>& i_target,
+ const uint64_t i_rank,
+ const uint64_t i_dq,
+ fapi2::buffer<uint8_t>& io_repairs_applied,
+ fapi2::buffer<uint8_t>& io_repairs_exceeded) = 0;
+
+ ///
+ /// @brief Perform a repair for multiple bad DQ bits in a nibble
+ /// @param[in,out] io_machine the repair state machine
+ /// @param[in] i_target the DIMM target
+ /// @param[in] i_rank the rank
+ /// @param[in,out] io_repairs_applied 8-bit mask, where a bit set means that rank had repairs applied
+ /// @param[in,out] io_repairs_exceeded 2-bit mask, where a bit set means that DIMM had more bad bits than could be repaired
+ /// @return FAPI2_RC_SUCCESS if and only if ok
+ ///
+ virtual fapi2::ReturnCode multiple_bad_dq(repair_state_machine<T>& io_machine,
+ const fapi2::Target<T>& i_target,
+ const uint64_t i_rank,
+ fapi2::buffer<uint8_t>& io_repairs_applied,
+ fapi2::buffer<uint8_t>& io_repairs_exceeded) = 0;
+
+ protected:
+ ///
+ /// @brief Set a new state in the repair state machine
+ /// @param[in,out] io_machine the repair state machine
+ /// @param[in] i_state pointer to the new state to set
+ ///
+ void set_state(repair_state_machine<T>& io_machine, std::shared_ptr<repair_state<T>> i_state);
+};
+
+///
+/// @class mss::no_fails
+/// @brief repair_state class for no fails (no marks applied)
+/// @tparam T, the fapi2 target type of the DIMM
+///
+template< fapi2::TargetType T >
+class no_fails : public repair_state<T>
+{
+ public:
+ /// @brief default contructor
+ no_fails() = default;
+ /// @brief default destructor
+ ~no_fails() = default;
+
+ ///
+ /// @brief Perform a repair for a single bad DQ bit in a nibble
+ /// @param[in,out] io_machine the repair state machine
+ /// @param[in] i_target the DIMM target
+ /// @param[in] i_rank the rank
+ /// @param[in] i_dq the DQ bit index
+ /// @param[in,out] io_repairs_applied 8-bit mask, where a bit set means that rank had repairs applied
+ /// @param[in,out] io_repairs_exceeded 2-bit mask, where a bit set means that DIMM had more bad bits than could be repaired
+ /// @return FAPI2_RC_SUCCESS if and only if ok
+ ///
+ fapi2::ReturnCode one_bad_dq(repair_state_machine<T>& io_machine,
+ const fapi2::Target<T>& i_target,
+ const uint64_t i_rank,
+ const uint64_t i_dq,
+ fapi2::buffer<uint8_t>& io_repairs_applied,
+ fapi2::buffer<uint8_t>& io_repairs_exceeded) override;
+
+ ///
+ /// @brief Perform a repair for multiple bad DQ bits in a nibble
+ /// @param[in,out] io_machine the repair state machine
+ /// @param[in] i_target the DIMM target
+ /// @param[in] i_rank the rank
+ /// @param[in,out] io_repairs_applied 8-bit mask, where a bit set means that rank had repairs applied
+ /// @param[in,out] io_repairs_exceeded 2-bit mask, where a bit set means that DIMM had more bad bits than could be repaired
+ /// @return FAPI2_RC_SUCCESS if and only if ok
+ ///
+ fapi2::ReturnCode multiple_bad_dq(repair_state_machine<T>& io_machine,
+ const fapi2::Target<T>& i_target,
+ const uint64_t i_rank,
+ fapi2::buffer<uint8_t>& io_repairs_applied,
+ fapi2::buffer<uint8_t>& io_repairs_exceeded) override;
+};
+
+///
+/// @class mss::symbol_mark_only
+/// @brief repair_state class for when only a symbol mark has been used
+/// @tparam T, the fapi2 target type of the DIMM
+///
+template< fapi2::TargetType T >
+class symbol_mark_only : public repair_state<T>
+{
+ public:
+ /// @brief default contructor
+ symbol_mark_only() = default;
+ /// @brief default destructor
+ ~symbol_mark_only() = default;
+
+ ///
+ /// @brief Perform a repair for a single bad DQ bit in a nibble
+ /// @param[in,out] io_machine the repair state machine
+ /// @param[in] i_target the DIMM target
+ /// @param[in] i_rank the rank
+ /// @param[in] i_dq the DQ bit index
+ /// @param[in,out] io_repairs_applied 8-bit mask, where a bit set means that rank had repairs applied
+ /// @param[in,out] io_repairs_exceeded 2-bit mask, where a bit set means that DIMM had more bad bits than could be repaired
+ /// @return FAPI2_RC_SUCCESS if and only if ok
+ ///
+ fapi2::ReturnCode one_bad_dq(repair_state_machine<T>& io_machine,
+ const fapi2::Target<T>& i_target,
+ const uint64_t i_rank,
+ const uint64_t i_dq,
+ fapi2::buffer<uint8_t>& io_repairs_applied,
+ fapi2::buffer<uint8_t>& io_repairs_exceeded) override;
+
+ ///
+ /// @brief Perform a repair for multiple bad DQ bits in a nibble
+ /// @param[in,out] io_machine the repair state machine
+ /// @param[in] i_target the DIMM target
+ /// @param[in] i_rank the rank
+ /// @param[in,out] io_repairs_applied 8-bit mask, where a bit set means that rank had repairs applied
+ /// @param[in,out] io_repairs_exceeded 2-bit mask, where a bit set means that DIMM had more bad bits than could be repaired
+ /// @return FAPI2_RC_SUCCESS if and only if ok
+ ///
+ fapi2::ReturnCode multiple_bad_dq(repair_state_machine<T>& io_machine,
+ const fapi2::Target<T>& i_target,
+ const uint64_t i_rank,
+ fapi2::buffer<uint8_t>& io_repairs_applied,
+ fapi2::buffer<uint8_t>& io_repairs_exceeded) override;
+};
+
+///
+/// @class mss::symbol_mark_plus_unrepaired_dq
+/// @brief repair_state class for when only a symbol mark has been used, and one DQ bit remains unrepaired
+/// @tparam T, the fapi2 target type of the DIMM
+///
+template< fapi2::TargetType T >
+class symbol_mark_plus_unrepaired_dq : public repair_state<T>
+{
+ public:
+ /// @brief default contructor
+ symbol_mark_plus_unrepaired_dq() = default;
+ /// @brief default destructor
+ ~symbol_mark_plus_unrepaired_dq() = default;
+
+ ///
+ /// @brief Perform a repair for a single bad DQ bit in a nibble
+ /// @param[in,out] io_machine the repair state machine
+ /// @param[in] i_target the DIMM target
+ /// @param[in] i_rank the rank
+ /// @param[in] i_dq the DQ bit index
+ /// @param[in,out] io_repairs_applied 8-bit mask, where a bit set means that rank had repairs applied
+ /// @param[in,out] io_repairs_exceeded 2-bit mask, where a bit set means that DIMM had more bad bits than could be repaired
+ /// @return FAPI2_RC_SUCCESS if and only if ok
+ ///
+ fapi2::ReturnCode one_bad_dq(repair_state_machine<T>& io_machine,
+ const fapi2::Target<T>& i_target,
+ const uint64_t i_rank,
+ const uint64_t i_dq,
+ fapi2::buffer<uint8_t>& io_repairs_applied,
+ fapi2::buffer<uint8_t>& io_repairs_exceeded) override;
+
+ ///
+ /// @brief Perform a repair for multiple bad DQ bits in a nibble
+ /// @param[in,out] io_machine the repair state machine
+ /// @param[in] i_target the DIMM target
+ /// @param[in] i_rank the rank
+ /// @param[in,out] io_repairs_applied 8-bit mask, where a bit set means that rank had repairs applied
+ /// @param[in,out] io_repairs_exceeded 2-bit mask, where a bit set means that DIMM had more bad bits than could be repaired
+ /// @return FAPI2_RC_SUCCESS if and only if ok
+ ///
+ fapi2::ReturnCode multiple_bad_dq(repair_state_machine<T>& io_machine,
+ const fapi2::Target<T>& i_target,
+ const uint64_t i_rank,
+ fapi2::buffer<uint8_t>& io_repairs_applied,
+ fapi2::buffer<uint8_t>& io_repairs_exceeded) override;
+};
+
+///
+/// @class mss::chip_mark_only
+/// @brief repair_state class for when only a chip mark has been used
+/// @tparam T, the fapi2 target type of the DIMM
+///
+template< fapi2::TargetType T >
+class chip_mark_only : public repair_state<T>
+{
+ public:
+ /// @brief default contructor
+ chip_mark_only() = default;
+ /// @brief default destructor
+ ~chip_mark_only() = default;
+
+ ///
+ /// @brief Perform a repair for a single bad DQ bit in a nibble
+ /// @param[in,out] io_machine the repair state machine
+ /// @param[in] i_target the DIMM target
+ /// @param[in] i_rank the rank
+ /// @param[in] i_dq the DQ bit index
+ /// @param[in,out] io_repairs_applied 8-bit mask, where a bit set means that rank had repairs applied
+ /// @param[in,out] io_repairs_exceeded 2-bit mask, where a bit set means that DIMM had more bad bits than could be repaired
+ /// @return FAPI2_RC_SUCCESS if and only if ok
+ ///
+ fapi2::ReturnCode one_bad_dq(repair_state_machine<T>& io_machine,
+ const fapi2::Target<T>& i_target,
+ const uint64_t i_rank,
+ const uint64_t i_dq,
+ fapi2::buffer<uint8_t>& io_repairs_applied,
+ fapi2::buffer<uint8_t>& io_repairs_exceeded) override;
+
+ ///
+ /// @brief Perform a repair for multiple bad DQ bits in a nibble
+ /// @param[in,out] io_machine the repair state machine
+ /// @param[in] i_target the DIMM target
+ /// @param[in] i_rank the rank
+ /// @param[in,out] io_repairs_applied 8-bit mask, where a bit set means that rank had repairs applied
+ /// @param[in,out] io_repairs_exceeded 2-bit mask, where a bit set means that DIMM had more bad bits than could be repaired
+ /// @return FAPI2_RC_SUCCESS if and only if ok
+ ///
+ fapi2::ReturnCode multiple_bad_dq(repair_state_machine<T>& io_machine,
+ const fapi2::Target<T>& i_target,
+ const uint64_t i_rank,
+ fapi2::buffer<uint8_t>& io_repairs_applied,
+ fapi2::buffer<uint8_t>& io_repairs_exceeded) override;
+};
+
+///
+/// @class mss::chip_mark_only
+/// @brief repair_state class for when both a chip mark and a symbol mark have been used
+/// @tparam T, the fapi2 target type of the DIMM
+///
+template< fapi2::TargetType T >
+class chip_and_symbol_mark : public repair_state<T>
+{
+ public:
+ /// @brief default contructor
+ chip_and_symbol_mark() = default;
+ /// @brief default destructor
+ ~chip_and_symbol_mark() = default;
+
+ ///
+ /// @brief Perform a repair for a single bad DQ bit in a nibble
+ /// @param[in,out] io_machine the repair state machine
+ /// @param[in] i_target the DIMM target
+ /// @param[in] i_rank the rank
+ /// @param[in] i_dq the DQ bit index
+ /// @param[in,out] io_repairs_applied 8-bit mask, where a bit set means that rank had repairs applied
+ /// @param[in,out] io_repairs_exceeded 2-bit mask, where a bit set means that DIMM had more bad bits than could be repaired
+ /// @return FAPI2_RC_SUCCESS if and only if ok
+ ///
+ fapi2::ReturnCode one_bad_dq(repair_state_machine<T>& io_machine,
+ const fapi2::Target<T>& i_target,
+ const uint64_t i_rank,
+ const uint64_t i_dq,
+ fapi2::buffer<uint8_t>& io_repairs_applied,
+ fapi2::buffer<uint8_t>& io_repairs_exceeded) override;
+
+ ///
+ /// @brief Perform a repair for multiple bad DQ bits in a nibble
+ /// @param[in,out] io_machine the repair state machine
+ /// @param[in] i_target the DIMM target
+ /// @param[in] i_rank the rank
+ /// @param[in,out] io_repairs_applied 8-bit mask, where a bit set means that rank had repairs applied
+ /// @param[in,out] io_repairs_exceeded 2-bit mask, where a bit set means that DIMM had more bad bits than could be repaired
+ /// @return FAPI2_RC_SUCCESS if and only if ok
+ ///
+ fapi2::ReturnCode multiple_bad_dq(repair_state_machine<T>& io_machine,
+ const fapi2::Target<T>& i_target,
+ const uint64_t i_rank,
+ fapi2::buffer<uint8_t>& io_repairs_applied,
+ fapi2::buffer<uint8_t>& io_repairs_exceeded) override;
+};
+
+///
+/// @class mss::repair_state_machine
+/// @brief state machine class used in restore_repairs_helper
+/// @tparam T, the fapi2 target type of the DIMM
+///
+template< fapi2::TargetType T >
+class repair_state_machine
+{
+ public:
+ /// @brief constructor
+ repair_state_machine()
+ : iv_repair_state(std::make_shared<no_fails<T>>()) {}
+
+ /// @brief default destructor
+ ~repair_state_machine() = default;
+
+ ///
+ /// @brief Perform a repair for a single bad DQ bit in a nibble
+ /// @param[in] i_target the DIMM target
+ /// @param[in] i_rank the rank
+ /// @param[in] i_dq the DQ bit index
+ /// @param[in,out] io_repairs_applied 8-bit mask, where a bit set means that rank had repairs applied
+ /// @param[in,out] io_repairs_exceeded 2-bit mask, where a bit set means that DIMM had more bad bits than could be repaired
+ /// @return FAPI2_RC_SUCCESS if and only if ok
+ ///
+ fapi2::ReturnCode one_bad_dq(const fapi2::Target<T>& i_target,
+ const uint64_t i_rank,
+ const uint64_t i_dq,
+ fapi2::buffer<uint8_t>& io_repairs_applied,
+ fapi2::buffer<uint8_t>& io_repairs_exceeded);
+
+ ///
+ /// @brief Perform a repair for multiple bad DQ bits in a nibble
+ /// @param[in] i_target the DIMM target
+ /// @param[in] i_rank the rank
+ /// @param[in,out] io_repairs_applied 8-bit mask, where a bit set means that rank had repairs applied
+ /// @param[in,out] io_repairs_exceeded 2-bit mask, where a bit set means that DIMM had more bad bits than could be repaired
+ /// @return FAPI2_RC_SUCCESS if and only if ok
+ ///
+ fapi2::ReturnCode multiple_bad_dq(const fapi2::Target<T>& i_target,
+ const uint64_t i_rank,
+ fapi2::buffer<uint8_t>& io_repairs_applied,
+ fapi2::buffer<uint8_t>& io_repairs_exceeded);
+
+ ///
+ /// @brief Update the state of the state machine
+ /// @param[in] i_state shared pointer to the new state
+ ///
+ void update_state(std::shared_ptr<repair_state<T>> i_state)
+ {
+ iv_repair_state = i_state;
+ }
+
+ private:
+ std::shared_ptr<repair_state<T>> iv_repair_state;
+};
+
+// TODO RTC: 157753 tparam R can be pulled from an MCA trait once we have it
+///
+/// @brief Restore symbol and chip marks according to BAD_DQ_BITMAP attribute, helper function for unit testing
+/// @tparam T, the fapi2 target type of the DIMM (derived)
+/// @tparam R the maximum rank per DIMM
+/// @tparam B the number of bytes per rank in the bad_dq_bitmap attribute
+/// @param[in] i_target A target representing a DIMM
+/// @param[in] i_bad_bits the bad bits values from the VPD, for the specified DIMM
+/// @param[in,out] io_repairs_applied 8-bit mask, where a bit set means that rank had repairs applied
+/// @param[in,out] io_repairs_exceeded 2-bit mask, where a bit set means that DIMM had more bad bits than could be repaired
+/// @return FAPI2_RC_SUCCESS if and only if ok
+///
+template< fapi2::TargetType T, uint64_t R, uint64_t B >
+fapi2::ReturnCode restore_repairs_helper( const fapi2::Target<T>& i_target,
+ const uint8_t i_bad_bits[R][B],
+ fapi2::buffer<uint8_t>& io_repairs_applied,
+ fapi2::buffer<uint8_t>& io_repairs_exceeded);
+
+///
+/// @brief Restore symbol and chip marks according to BAD_DQ_BITMAP attribute
+/// @tparam T, the fapi2 target type of the port (derived)
+/// @param[in] i_target A target representing a port
+/// @param[out] o_repairs_applied bit mask, where a bit set means a rank had repairs applied (bit0 = rank0, etc)
+/// @param[out] o_repairs_exceeded bit mask, where a bit set means a DIMM had more bad bits than could be repaired (bit0 = DIMM0 etc)
+/// @return FAPI2_RC_SUCCESS if and only if ok
+///
+template< fapi2::TargetType T >
+fapi2::ReturnCode restore_repairs( const fapi2::Target<T>& i_target,
+ fapi2::buffer<uint8_t>& o_repairs_applied,
+ fapi2::buffer<uint8_t>& o_repairs_exceeded);
+
}// mss
#endif
diff --git a/src/import/chips/p9/procedures/hwp/memory/lib/shared/mss_const.H b/src/import/chips/p9/procedures/hwp/memory/lib/shared/mss_const.H
index e37d07576..5eee66a56 100644
--- a/src/import/chips/p9/procedures/hwp/memory/lib/shared/mss_const.H
+++ b/src/import/chips/p9/procedures/hwp/memory/lib/shared/mss_const.H
@@ -51,6 +51,7 @@ enum sizes
MAX_DIMM_PER_PORT = 2,
MAX_RANK_PER_DIMM = 4,
NIBBLES_PER_DP = 4,
+ NIBBLES_PER_BYTE = 2,
BITS_PER_NIBBLE = 4,
BITS_PER_BYTE = 8,
BITS_PER_DQS = 2, ///< Differential clock pair
@@ -65,6 +66,7 @@ enum sizes
MAX_DQ_NIBBLES_X4 = MAX_DQ_BITS / BITS_PER_NIBBLE, ///< For x4's there are 18 DQ nibbles for DQ 72 bits
MARK_STORE_COUNT = 8, ///< Elements in a VPD mark/store array
+ BAD_DQ_BYTE_COUNT = 10, ///< Elements in a BAD_DQ_BITMAP attribute array
BYTES_PER_GB = 1000000000, ///< Multiplier to go from GB to B
T_PER_MT = 1000000, ///< Multiplier to go from MT/s to T/s
diff --git a/src/import/chips/p9/procedures/hwp/memory/p9_mss_memdiag.C b/src/import/chips/p9/procedures/hwp/memory/p9_mss_memdiag.C
index 8efc158b4..0bdb4de8f 100644
--- a/src/import/chips/p9/procedures/hwp/memory/p9_mss_memdiag.C
+++ b/src/import/chips/p9/procedures/hwp/memory/p9_mss_memdiag.C
@@ -37,21 +37,25 @@
#include <p9_mss_memdiag.H>
#include <lib/utils/poll.H>
+#include <lib/utils/find.H>
#include <lib/utils/count_dimm.H>
#include <lib/mcbist/address.H>
#include <lib/mcbist/memdiags.H>
#include <lib/mcbist/mcbist.H>
+#include <lib/mc/port.H>
+#include <lib/ecc/ecc.H>
using fapi2::TARGET_TYPE_MCBIST;
using fapi2::TARGET_TYPE_SYSTEM;
+using fapi2::TARGET_TYPE_MCA;
extern "C"
{
-///
-/// @brief Pattern test the DRAM
-/// @param[in] i_target the McBIST of the ports of the dram you're training
-/// @return FAPI2_RC_SUCCESS iff ok
-///
+ ///
+ /// @brief Pattern test the DRAM
+ /// @param[in] i_target the McBIST of the ports of the dram you're training
+ /// @return FAPI2_RC_SUCCESS iff ok
+ ///
fapi2::ReturnCode p9_mss_memdiag( const fapi2::Target<TARGET_TYPE_MCBIST>& i_target )
{
FAPI_INF("Start memdiag");
@@ -67,6 +71,54 @@ extern "C"
uint8_t is_sim = false;
FAPI_TRY( FAPI_ATTR_GET(fapi2::ATTR_IS_SIMULATION, fapi2::Target<TARGET_TYPE_SYSTEM>(), is_sim) );
+ // Read the bad_dq_bitmap attribute and place corresponding symbol and chip marks
+ for (const auto& l_mca : mss::find_targets<TARGET_TYPE_MCA>(i_target))
+ {
+ fapi2::buffer<uint8_t> l_repairs_applied;
+ fapi2::buffer<uint8_t> l_repairs_exceeded;
+ std::vector<uint64_t> l_ranks;
+
+ FAPI_TRY( mss::restore_repairs( l_mca, l_repairs_applied, l_repairs_exceeded) );
+
+ // assert if we have exceeded the allowed repairs
+ for (const auto& l_dimm : mss::find_targets<fapi2::TARGET_TYPE_DIMM>(l_mca))
+ {
+ FAPI_ASSERT( !(l_repairs_exceeded.getBit(mss::index(l_dimm))),
+ fapi2::MSS_MEMDIAGS_REPAIRS_EXCEEDED().set_TARGET(l_dimm),
+ "p9_mss_memdiag bad bit repairs exceeded %s", mss::c_str(l_dimm) );
+ }
+
+#ifdef __HOSTBOOT_MODULE
+ // assert if both chip and symbol marks exist for any given rank
+ FAPI_TRY( mss::rank::ranks(l_mca, l_ranks) );
+
+ for (const auto l_rank : l_ranks)
+ {
+ if (l_repairs_applied.getBit(l_rank))
+ {
+ uint64_t l_galois = 0;
+ mss::states l_confirmed = mss::NO;
+ // check for chip mark in hardware mark store
+ FAPI_TRY( mss::ecc::get_hwms(l_mca, l_rank, l_galois, l_confirmed) );
+
+ if (l_confirmed)
+ {
+ auto l_type = mss::ecc::fwms::mark_type::CHIP;
+ auto l_region = mss::ecc::fwms::mark_region::DISABLED;
+ auto l_addr = mss::mcbist::address(0);
+ // check for symbol mark in firmware mark store
+ FAPI_TRY( mss::ecc::get_fwms(l_mca, l_rank, l_galois, l_type, l_region, l_addr) );
+
+ FAPI_ASSERT( l_region == mss::ecc::fwms::mark_region::DISABLED,
+ fapi2::MSS_MEMDIAGS_CHIPMARK_AND_SYMBOLMARK().set_TARGET(l_mca).set_RANK(l_rank),
+ "p9_mss_memdiag both chip mark and symbol mark on rank %d: %s", l_rank, mss::c_str(l_mca) );
+ }
+ }
+ }
+
+#endif
+ }
+
// We start the sf_init (write 0's) and it'll tickle the MCBIST complete FIR. PRD will see that
// and start a background scrub.
FAPI_TRY( memdiags::sf_init(i_target, mss::mcbist::PATTERN_0) );
OpenPOWER on IntegriCloud