From 93192aff25f4df076c810cc3502d1dd066173d5b Mon Sep 17 00:00:00 2001 From: Louis Stermole Date: Tue, 25 Sep 2018 08:44:40 -0500 Subject: Add row repair access functions and attr switches for p9c Change-Id: I07d38475165278a4f0400ee8f5fc38bc5b7b7552 CQ:SW445411 Depends-On: I728a494f91f1f460c0700bbeeca47a0e5739622f Reviewed-on: http://rchgit01.rchland.ibm.com/gerrit1/66599 Tested-by: Jenkins Server Tested-by: HWSV CI Tested-by: PPE CI Reviewed-by: STEPHEN GLANCY Reviewed-by: ANDRE A. MARIN Tested-by: Hostboot CI Reviewed-by: Jennifer A. Stofer Reviewed-on: http://rchgit01.rchland.ibm.com/gerrit1/66667 Tested-by: Jenkins OP Build CI Tested-by: Jenkins OP HW Tested-by: FSP CI Jenkins Reviewed-by: Daniel M. Crowell --- .../procedures/hwp/memory/p9c_mss_ddr4_funcs.C | 1 + .../procedures/hwp/memory/p9c_mss_ddr4_funcs.H | 1 + .../procedures/hwp/memory/p9c_mss_draminit_mc.C | 5 + .../centaur/procedures/hwp/memory/p9c_mss_funcs.H | 1 + .../procedures/hwp/memory/p9c_mss_row_repair.C | 393 ++++++++++++++++++++- .../procedures/hwp/memory/p9c_mss_row_repair.H | 72 ++++ .../procedures/hwp/memory/p9c_mss_row_repair.mk | 2 + .../xml/error_info/p9c_memory_errors.xml | 55 +++ .../fapi2/xml/attribute_info/system_attributes.xml | 3 +- 9 files changed, 527 insertions(+), 6 deletions(-) diff --git a/src/import/chips/centaur/procedures/hwp/memory/p9c_mss_ddr4_funcs.C b/src/import/chips/centaur/procedures/hwp/memory/p9c_mss_ddr4_funcs.C index 1ed21be5d..a3b8190dc 100755 --- a/src/import/chips/centaur/procedures/hwp/memory/p9c_mss_ddr4_funcs.C +++ b/src/import/chips/centaur/procedures/hwp/memory/p9c_mss_ddr4_funcs.C @@ -4788,6 +4788,7 @@ fapi_try_exit: /// @param[in] i_delay delay associated with this instruction /// @param[in,out] io_instruction_number position in CCS program in which to insert MRS command (will be incremented) /// @return FAPI2_RC_SUCCESS iff successful +/// @note MR should be selected using i_addr.bank with constants from dimmConsts.H fapi2::ReturnCode add_mrs_to_ccs_ddr4(const fapi2::Target& i_target_mba, const access_address i_addr, const uint32_t i_delay, diff --git a/src/import/chips/centaur/procedures/hwp/memory/p9c_mss_ddr4_funcs.H b/src/import/chips/centaur/procedures/hwp/memory/p9c_mss_ddr4_funcs.H index 56525f340..e2d87c0b9 100755 --- a/src/import/chips/centaur/procedures/hwp/memory/p9c_mss_ddr4_funcs.H +++ b/src/import/chips/centaur/procedures/hwp/memory/p9c_mss_ddr4_funcs.H @@ -219,6 +219,7 @@ fapi2::ReturnCode setup_b_side_ccs(const fapi2::Target& /// @param[in] i_delay delay associated with this instruction /// @param[in,out] io_instruction_number position in CCS program in which to insert MRS command (will be incremented) /// @return FAPI2_RC_SUCCESS iff successful +/// @note MR should be selected using i_addr.bank with constants from dimmConsts.H fapi2::ReturnCode add_mrs_to_ccs_ddr4(const fapi2::Target& i_target_mba, const access_address i_addr, const uint32_t i_delay, diff --git a/src/import/chips/centaur/procedures/hwp/memory/p9c_mss_draminit_mc.C b/src/import/chips/centaur/procedures/hwp/memory/p9c_mss_draminit_mc.C index 7366c3f84..16f7af1cc 100644 --- a/src/import/chips/centaur/procedures/hwp/memory/p9c_mss_draminit_mc.C +++ b/src/import/chips/centaur/procedures/hwp/memory/p9c_mss_draminit_mc.C @@ -51,6 +51,7 @@ #include #include #include +#include #include #include #include @@ -92,6 +93,10 @@ extern "C" { FAPI_INF( "%s +++ Setting up adr inversion for port 1 +++", mss::c_str(i_target)); FAPI_TRY(mss_enable_addr_inversion(l_mba), "---Error During ADR Inversion"); + //Step Two.3: Apply row repairs on each MBA's DIMM + FAPI_INF( "%s +++ Applying sPPR row repairs +++", mss::c_str(i_target)); + FAPI_TRY(p9c_mss_deploy_row_repairs(l_mba), "---Error During Row Reapirs"); + // Step Three: Enable Refresh FAPI_INF( "%s +++ Enabling Refresh +++", mss::c_str(i_target)); FAPI_TRY(fapi2::getScom(l_mba, CEN_MBA_MBAREF0Q, l_mba01_ref0q_data_buffer_64)); diff --git a/src/import/chips/centaur/procedures/hwp/memory/p9c_mss_funcs.H b/src/import/chips/centaur/procedures/hwp/memory/p9c_mss_funcs.H index 454cdb8fc..d53e72730 100755 --- a/src/import/chips/centaur/procedures/hwp/memory/p9c_mss_funcs.H +++ b/src/import/chips/centaur/procedures/hwp/memory/p9c_mss_funcs.H @@ -38,6 +38,7 @@ #include /// @brief Struct to contain DRAM address bits +/// @note For DDR4, bank should be set to {BA0, BA1, BG0, BG1} struct access_address { uint32_t row_addr; diff --git a/src/import/chips/centaur/procedures/hwp/memory/p9c_mss_row_repair.C b/src/import/chips/centaur/procedures/hwp/memory/p9c_mss_row_repair.C index ed4841acb..64e67f33f 100644 --- a/src/import/chips/centaur/procedures/hwp/memory/p9c_mss_row_repair.C +++ b/src/import/chips/centaur/procedures/hwp/memory/p9c_mss_row_repair.C @@ -26,8 +26,12 @@ #include #include #include +#include #include #include +#include +#include +#include using namespace fapi2; @@ -38,6 +42,7 @@ extern "C" /// @param[in] i_port port for repair /// @param[in] i_mrank master rank of address to repair /// @param[in] i_srank slave rank of address to repair + /// @param[in] i_bg bank group bits of address to repair /// @param[in] i_bank bank bits of address to repair /// @param[in] i_row row bits of address to repair /// @param[in] i_dram_bitmap bitmap of DRAMs selected for repair (b'1 to repair, b'0 to not repair) @@ -46,6 +51,7 @@ extern "C" const uint8_t i_port, const uint8_t i_mrank, const uint8_t i_srank, + const uint8_t i_bg, const uint8_t i_bank, const uint32_t i_row, const uint32_t i_dram_bitmap) @@ -57,10 +63,6 @@ extern "C" constexpr uint32_t DISABLE_PPR = 0; constexpr uint8_t TMOD = 24; - // This is the value to shift the input DRAM position to the last 20 bits of l_write_pattern - constexpr uint8_t DRAM_START_BIT = 44; - constexpr uint8_t DRAM_LEN = 64 - DRAM_START_BIT; - const std::vector MR0_SHADOW_REGS = { CEN_MBA_DDRPHY_PC_MR0_PRI_RP0_P0, @@ -86,7 +88,8 @@ extern "C" {0x03FF, 0, i_mrank, i_srank, MRS0_BA, i_port}, }; - access_address l_addr = {i_row, 0, i_mrank, i_srank, i_bank, i_port}; + const uint8_t l_bg_bank = (i_bank << 2) | i_bg; + access_address l_addr = {i_row, 0, i_mrank, i_srank, l_bg_bank, i_port}; fapi2::buffer l_row; fapi2::buffer l_bank; fapi2::buffer l_saved_mr0; @@ -236,4 +239,384 @@ extern "C" return fapi2::current_err; } + /// @brief Clear a row repair entry from the VPD data + /// @param[in] i_rank master rank + /// @param[in,out] io_row_repair_data data for this DIMM/rank from the VPD + /// @return FAPI2_RC_SUCCESS iff successful + fapi2::ReturnCode clear_row_repair_entry(const uint8_t i_rank, + uint8_t (&io_row_repair_data)[MAX_RANKS_PER_DIMM][ROW_REPAIR_BYTES_PER_RANK]) + { + constexpr uint8_t CLEAR_REPAIR_VALID_IN_BYTE = 0xFE; + + FAPI_ASSERT(i_rank < MAX_RANKS_PER_DIMM, + fapi2::CEN_RANK_OUT_OF_BOUNDS(). + set_RANK(i_rank), + "Rank %d supplied to clear_row_repair_entry is out of bounds", + i_rank); + + // Clear the valid bit in the entry for this DIMM/rank and write it back + io_row_repair_data[i_rank][ROW_REPAIR_BYTES_PER_RANK - 1] &= CLEAR_REPAIR_VALID_IN_BYTE; + + return fapi2::FAPI2_RC_SUCCESS; + + fapi_try_exit: + return fapi2::current_err; + } + + /// @brief Decode a row repair entry from an encoded buffer + /// @param[in] i_repair row repair data buffer + /// @param[out] o_dram DRAM position + /// @param[out] o_srank slave rank + /// @param[out] o_bg bank group + /// @param[out] o_bank bank address + /// @param[out] o_row row address + /// @return true if the repair request is valid, false otherwise + bool valid_row_repair_entry( const fapi2::buffer i_repair, + uint8_t& o_dram, + uint8_t& o_srank, + uint8_t& o_bg, + uint8_t& o_bank, + uint32_t& o_row ) + { + i_repair.extractToRight(o_dram); + i_repair.extractToRight(o_srank); + i_repair.extractToRight(o_bg); + i_repair.extractToRight(o_bank); + i_repair.extractToRight(o_row); + return i_repair.getBit(); + } + + /// @brief Build a table of PPR row repairs from attribute data for a given DIMM + /// @param[in] i_target DIMM target + /// @param[in] i_dram_width the DRAM width + /// @param[in] i_row_repair_data array of row repair attribute values for the DIMM + /// @param[out] o_repairs_per_dimm array of row repair data buffers + /// @param[in,out] io_dram_bad_in_ranks array of how many ranks in which each DRAM was found to need a repair + /// @return FAPI2_RC_SUCCESS iff successful + fapi2::ReturnCode build_row_repair_table(const fapi2::Target& i_target, + const uint8_t i_dram_width, + const uint8_t i_row_repair_data[MAX_RANKS_PER_DIMM][ROW_REPAIR_BYTES_PER_RANK], + std::vector>& o_repairs_per_dimm, + uint8_t io_dram_bad_in_ranks[MC_MAX_DRAMS_PER_RANK_X4]) + { + const uint8_t l_num_dram = (i_dram_width == fapi2::ENUM_ATTR_EFF_DRAM_WIDTH_X8) ? + MAX_DRAMS_PER_RANK_X8 : + (MC_MAX_DRAMS_PER_RANK_X4); + + o_repairs_per_dimm.clear(); + + for (uint8_t l_rank = 0; l_rank < MAX_RANKS_PER_DIMM; ++l_rank) + { + fapi2::buffer l_row_repair_entry; + + // Convert each entry from an array of bytes into a fapi2::buffer + for (uint8_t l_byte = 0; l_byte < ROW_REPAIR_BYTES_PER_RANK; ++l_byte) + { + FAPI_TRY(l_row_repair_entry.insertFromRight(i_row_repair_data[l_rank][l_byte], + l_byte * BITS_PER_BYTE, + BITS_PER_BYTE)); + } + + // Insert row repair request into list (valid or not, so we can index by DIMM rank) + o_repairs_per_dimm.push_back(l_row_repair_entry); + + uint8_t l_dram = 0; + uint8_t l_srank = 0; + uint8_t l_bg = 0; + uint8_t l_bank = 0; + uint32_t l_row = 0; + + if (valid_row_repair_entry(l_row_repair_entry, l_dram, l_srank, l_bg, l_bank, l_row)) + { + FAPI_INF("Found valid row repair request in VPD for DIMM %s, DRAM %d, mrank %d, srank %d, bg %d, bank %d, row 0x%05x", + mss::c_str(i_target), l_dram, l_rank, l_srank, l_bg, l_bank, l_row); + + // Do some sanity checking here + FAPI_ASSERT(l_dram < l_num_dram, + fapi2::CEN_ROW_REPAIR_ENTRY_OUT_OF_BOUNDS(). + set_DIMM_TARGET(i_target). + set_DRAM(l_dram). + set_MRANK(l_rank). + set_SRANK(l_srank). + set_BANK_GROUP(l_bg). + set_BANK(l_bank). + set_ROW(l_row), + "%s VPD contained out of bounds row repair entry: DRAM: %d mrank %d srank %d bg %d bank %d row 0x%05x", + mss::c_str(i_target), l_dram, l_rank, l_srank, l_bg, l_bank, l_row); + + // Add this rank to the total number of ranks this DRAM appears in + ++io_dram_bad_in_ranks[l_dram]; + } + } + + fapi_try_exit: + return fapi2::current_err; + } + + /// @brief Clear the corresponding bad_bits after a row repair operation + /// @param[in] i_dram_width the DRAM width + /// @param[in] i_dram the DRAM index + /// @param[in,out] io_bad_bits array bad bits data from VPD + /// @return FAPI2_RC_SUCCESS iff successful + fapi2::ReturnCode clear_bad_dq_for_row_repair(const uint8_t i_dram_width, + const uint8_t i_dram, + uint8_t (&io_bad_bits)[DIMM_DQ_RANK_BITMAP_SIZE]) + { + // The DRAM index in ATTR_ROW_REPAIR_DATA is relative to Centaur perspective. + // The bad_bits attribute is as well, so we can just index into the bad bits array + // using the DRAM index + const uint8_t l_byte = (i_dram_width == fapi2::ENUM_ATTR_EFF_DRAM_WIDTH_X8) ? + i_dram : + i_dram / MAX_NIBBLES_PER_BYTE; + uint8_t l_mask = 0; + + if (i_dram_width == fapi2::ENUM_ATTR_EFF_DRAM_WIDTH_X4) + { + l_mask = (i_dram % MAX_NIBBLES_PER_BYTE == 0) ? 0x0F : 0xF0; + } + + // Protect our array index + FAPI_ASSERT(l_byte < DIMM_DQ_RANK_BITMAP_SIZE, + fapi2::CEN_DRAM_INDEX_OUT_OF_BOUNDS(). + set_DRAM(i_dram), + "DRAM index %d supplied to clear_bad_dq_for_row_repair is out of bounds", + i_dram); + + io_bad_bits[l_byte] &= l_mask; + + return fapi2::FAPI2_RC_SUCCESS; + + fapi_try_exit: + return fapi2::current_err; + } + + /// + /// @brief Create an error log and return with a good error code if a valid row repair is found + /// @param[in] i_target the DIMM target + /// @param[in] i_rank the master rank + /// @return successful error code + /// + fapi2::ReturnCode repairs_disabled_error_helper(const fapi2::Target& i_target, + const uint8_t i_rank, + const fapi2::buffer i_repair) + { + uint8_t l_dram = 0; + uint8_t l_srank = 0; + uint8_t l_bg = 0; + uint8_t l_bank = 0; + uint32_t l_row = 0; + + FAPI_ASSERT(!valid_row_repair_entry(i_repair, l_dram, l_srank, l_bg, l_bank, l_row), + fapi2::CEN_ROW_REPAIR_WITH_MNFG_REPAIRS_DISABLED(). + set_DIMM_TARGET(i_target). + set_RANK(i_rank), + "%s Row repair valid for rank %d but DRAM repairs are disabled in MNFG flags", + mss::c_str(i_target), i_rank); + return fapi2::FAPI2_RC_SUCCESS; + + fapi_try_exit: + // We've found a valid row repair - log it as predictive, so we get callouts in MFG test but don't fail out + fapi2::logError(fapi2::current_err, fapi2::FAPI2_ERRL_SEV_PREDICTIVE); + fapi2::current_err = fapi2::FAPI2_RC_SUCCESS; + return fapi2::FAPI2_RC_SUCCESS; + } + + /// @brief Deploy enough PPR row repairs to test all spare rows + /// @param[in] i_target_mba mba target + /// @return FAPI2_RC_SUCCESS iff successful + fapi2::ReturnCode p9c_mss_activate_all_spare_rows(const fapi2::Target& i_target_mba) + { + uint8_t l_ranks_configed[MAX_PORTS_PER_MBA][MAX_DIMM_PER_PORT] = {0}; + uint8_t num_mranks[MAX_PORTS_PER_MBA][MAX_DIMM_PER_PORT] = {0}; + uint8_t num_ranks[MAX_PORTS_PER_MBA][MAX_DIMM_PER_PORT] = {0}; + + FAPI_INF("%s Deploying row repairs to test all spare rows", mss::c_str(i_target_mba)); + + for (const auto& l_dimm : mss::find_targets(i_target_mba)) + { + uint8_t l_port = 0; + uint8_t l_dimm_index = 0; + uint8_t l_num_sranks = 0; + fapi2::buffer l_dram_bitmap; + + // Set all DRAM select bits so we get repairs on all DRAMs + l_dram_bitmap.setBit(); + + FAPI_TRY(FAPI_ATTR_GET(fapi2::ATTR_CEN_MBA_PORT, l_dimm, l_port)); + FAPI_TRY(FAPI_ATTR_GET(fapi2::ATTR_CEN_MBA_DIMM, l_dimm, l_dimm_index)); + FAPI_TRY(FAPI_ATTR_GET(fapi2::ATTR_CEN_EFF_DIMM_RANKS_CONFIGED, i_target_mba, l_ranks_configed)); + FAPI_TRY(FAPI_ATTR_GET(fapi2::ATTR_CEN_EFF_NUM_MASTER_RANKS_PER_DIMM, i_target_mba, num_mranks)); + FAPI_TRY(FAPI_ATTR_GET(fapi2::ATTR_CEN_EFF_NUM_RANKS_PER_DIMM, i_target_mba, num_ranks)); + + // The number of slave ranks per DIMM is simply the number of total ranks divided by the number of master ranks + l_num_sranks = num_ranks[l_port][l_dimm_index] / num_mranks[l_port][l_dimm_index]; + + for (uint8_t l_mrank = 0; l_mrank < MAX_RANKS_PER_DIMM; ++l_mrank) + { + if (l_ranks_configed[l_port][l_dimm_index] & (0x80 >> l_mrank)) + { + for (uint8_t l_srank = 0; l_srank < l_num_sranks; ++l_srank) + { + uint8_t l_port_rank = 0; + // Note: setting row = rank so we don't use row0 for every repair + uint32_t l_row = l_mrank; + // Note: DIMM can only support one repair per BG, so we use BG=0 and BA=0 + uint8_t l_bg = 0; + uint8_t l_bank = 0; + + l_port_rank = (l_dimm_index * MAX_RANKS_PER_DIMM) + l_mrank; + + FAPI_TRY(p9c_mss_row_repair(i_target_mba, l_port, l_port_rank, l_srank, l_bg, l_bank, l_row, l_dram_bitmap)); + } + } + } + } + + fapi_try_exit: + return fapi2::current_err; + } + + /// @brief Deploy PPR row repairs, if supported, according to VPD attributes + /// @param[in] i_target_mba mba target + /// @return FAPI2_RC_SUCCESS iff successful + fapi2::ReturnCode p9c_mss_deploy_row_repairs(const fapi2::Target& i_target_mba) + { + bool l_sppr_supported = true; + uint64_t l_mnfg_flags = 0; + uint8_t l_dram_width = 0; + uint8_t l_dram_bad_in_ranks[MC_MAX_DRAMS_PER_RANK_X4] = {0}; + uint8_t l_ranks_configed[MAX_PORTS_PER_MBA][MAX_DIMM_PER_PORT] = {0}; + uint8_t l_dram = 0; + uint8_t l_srank = 0; + uint8_t l_bg = 0; + uint8_t l_bank = 0; + uint32_t l_row = 0; + + // This table contains a row repair entry for each DIMM/mrank combination + std::map, std::vector>> l_row_repairs; + + FAPI_TRY(FAPI_ATTR_GET(fapi2::ATTR_MNFG_FLAGS, fapi2::Target(), l_mnfg_flags)); + FAPI_TRY(FAPI_ATTR_GET(fapi2::ATTR_CEN_EFF_DIMM_RANKS_CONFIGED, i_target_mba, l_ranks_configed)); + FAPI_TRY(FAPI_ATTR_GET(fapi2::ATTR_CEN_EFF_DRAM_WIDTH, i_target_mba, l_dram_width)); + + // If row repairs are not supported, we're done + for (const auto& l_dimm : mss::find_targets(i_target_mba)) + { + FAPI_TRY(is_sPPR_supported(l_dimm, l_sppr_supported)); + + if (!l_sppr_supported) + { + FAPI_INF("%s Skipping row repair deployment since it's not supported in the MRW", mss::c_str(i_target_mba)); + return fapi2::FAPI2_RC_SUCCESS; + } + } + + // If mnfg flag is set to test all spare rows, we need to do row repair on all dimm/ranks/DRAMs + if (l_mnfg_flags & fapi2::ENUM_ATTR_MNFG_FLAGS_MNFG_TEST_ALL_SPARE_DRAM_ROWS) + { + return p9c_mss_activate_all_spare_rows(i_target_mba); + } + + // Get row repair data from attribute and build table + for (const auto& l_dimm : mss::find_targets(i_target_mba)) + { + uint8_t l_row_repair_data[MAX_RANKS_PER_DIMM][ROW_REPAIR_BYTES_PER_RANK] = {0}; + std::vector> l_repairs_per_dimm; + + FAPI_TRY(FAPI_ATTR_GET(fapi2::ATTR_ROW_REPAIR_DATA, l_dimm, l_row_repair_data)); + + FAPI_TRY(build_row_repair_table(l_dimm, l_dram_width, l_row_repair_data, l_repairs_per_dimm, l_dram_bad_in_ranks)); + l_row_repairs.insert(std::make_pair(l_dimm, l_repairs_per_dimm)); + } + + // If DRAM repairs are disabled (mnfg flag), we're done (but need to callout DIMM if it has row repairs in VPD) + if (l_mnfg_flags & fapi2::ENUM_ATTR_MNFG_FLAGS_MNFG_DISABLE_DRAM_REPAIRS) + { + FAPI_INF("%s DRAM repairs are disabled, so skipping row repair deployment", mss::c_str(i_target_mba)); + + for (const auto l_pair : l_row_repairs) + { + const auto& l_dimm = l_pair.first; + const auto& l_repairs = l_pair.second; + + for (uint8_t l_rank = 0; l_rank < MAX_RANKS_PER_DIMM; ++l_rank) + { + // If we have a valid repair, call out this DIMM + FAPI_TRY(repairs_disabled_error_helper(l_dimm, l_rank, l_repairs[l_rank])); + } + } + + return fapi2::FAPI2_RC_SUCCESS; + } + + // Iterate through DRAM repairs structure + for (const auto l_pair : l_row_repairs) + { + const auto& l_dimm = l_pair.first; + const auto& l_repairs = l_pair.second; + uint8_t l_port = 0; + uint8_t l_total_ranks_on_port = 0; + uint8_t l_row_repair_data[MAX_RANKS_PER_DIMM][ROW_REPAIR_BYTES_PER_RANK] = {0}; + + FAPI_TRY(FAPI_ATTR_GET(fapi2::ATTR_CEN_MBA_PORT, l_dimm, l_port)); + FAPI_TRY(FAPI_ATTR_GET(fapi2::ATTR_ROW_REPAIR_DATA, l_dimm, l_row_repair_data)); + + for (uint8_t l_dimm_index = 0; l_dimm_index < MAX_DIMM_PER_PORT; ++l_dimm_index) + { + l_total_ranks_on_port += l_ranks_configed[l_port][l_dimm_index]; + } + + for (uint8_t l_rank = 0; l_rank < MAX_RANKS_PER_DIMM; ++l_rank) + { + if (valid_row_repair_entry(l_repairs[l_rank], l_dram, l_srank, l_bg, l_bank, l_row)) + { + // If a DRAM position is marked bad in VPD for all ranks, skip row repair and clear row repair entry from VPD + // as this means the DRAM position has not been calibrated during draminit_training (Centaur workaround) + if (l_dram_bad_in_ranks[l_dram] == l_total_ranks_on_port) + { + FAPI_INF("%s DRAM position %d is bad in all ranks. Skipping row repairs for this DRAM.", + mss::c_str(i_target_mba), l_dram); + + FAPI_TRY(clear_row_repair_entry(l_rank, l_row_repair_data)); + + continue; + } + + // Deploy row repair and clear bad DQs + uint8_t l_dimm_index = 0; + uint8_t l_port_rank = 0; + uint8_t l_bad_bits[DIMM_DQ_RANK_BITMAP_SIZE] = {0}; // 10 byte array of bad bits + fapi2::buffer l_dram_bitmap; + + FAPI_TRY(FAPI_ATTR_GET(fapi2::ATTR_CEN_MBA_DIMM, l_dimm, l_dimm_index)); + l_port_rank = (l_dimm_index * MAX_RANKS_PER_DIMM) + l_rank; + + FAPI_TRY(l_dram_bitmap.setBit(DRAM_START_BIT + l_dram)); + + FAPI_INF("Deploying row repair on DIMM %s, DRAM %d, mrank %d, srank %d, bg %d, bank %d, row 0x%05x", + mss::c_str(l_dimm), l_dram, l_rank, l_srank, l_bg, l_bank, l_row); + FAPI_TRY(p9c_mss_row_repair(i_target_mba, l_port, l_port_rank, l_srank, l_bg, l_bank, l_row, l_dram_bitmap)); + + // Clear bad DQ bits for this port, DIMM, rank that will be fixed by this row repair + FAPI_INF("Updating bad bits on DIMM %s, DRAM %d, mrank %d, srank %d, bg %d, bank %d, row 0x%05x", + mss::c_str(l_dimm), l_dram, l_rank, l_srank, l_bg, l_bank, l_row); + + FAPI_TRY(dimmGetBadDqBitmap(i_target_mba, l_port, l_dimm_index, l_rank, l_bad_bits), + "Error from dimmGetBadDqBitmap on %s.", mss::c_str(i_target_mba)); + + FAPI_TRY(clear_bad_dq_for_row_repair(l_dram_width, l_dram, l_bad_bits)); + + FAPI_TRY(dimmSetBadDqBitmap(i_target_mba, l_port, l_dimm_index, l_rank, l_bad_bits), + "Error from dimmGetBadDqBitmap on %s.", mss::c_str(i_target_mba)); + } + } + + // Set the row repair attribute with any changes + FAPI_TRY(FAPI_ATTR_SET(fapi2::ATTR_ROW_REPAIR_DATA, l_dimm, l_row_repair_data)); + } + + fapi_try_exit: + return fapi2::current_err; + } + } diff --git a/src/import/chips/centaur/procedures/hwp/memory/p9c_mss_row_repair.H b/src/import/chips/centaur/procedures/hwp/memory/p9c_mss_row_repair.H index aa113d762..22fe5b5b2 100644 --- a/src/import/chips/centaur/procedures/hwp/memory/p9c_mss_row_repair.H +++ b/src/import/chips/centaur/procedures/hwp/memory/p9c_mss_row_repair.H @@ -26,15 +26,36 @@ #define __P9C_MSS_ROW_REPAIR__ #include +#include typedef fapi2::ReturnCode (*p9c_mss_row_repair_FP_t)(const fapi2::Target& i_target_mba, const uint8_t i_port, const uint8_t i_mrank, const uint8_t i_srank, + const uint8_t i_bg, const uint8_t i_bank, const uint32_t i_row, const uint32_t i_dram_bitmap); +// These consts describe the organization of ATTR_ROW_REPAIR_DATA +constexpr size_t ROW_REPAIR_BYTES_PER_RANK = 4; +constexpr size_t DRAM_POS = 0; +constexpr size_t DRAM_POS_LEN = 5; +constexpr size_t SRANK = 5; +constexpr size_t SRANK_LEN = 3; +constexpr size_t BANK_GROUP = 8; +constexpr size_t BANK_GROUP_LEN = 2; +constexpr size_t BANK = 10; +constexpr size_t BANK_LEN = 3; +constexpr size_t ROW_ADDR = 13; +constexpr size_t ROW_ADDR_LEN = 18; +constexpr size_t REPAIR_VALID = 31; +constexpr uint8_t MC_MAX_DRAMS_PER_RANK_X4 = MAX_DRAMS_PER_RANK_X4 + 1; + +// This is the value to shift the input DRAM position to the last 20 bits of l_write_pattern +constexpr uint8_t DRAM_START_BIT = 44; +constexpr uint8_t DRAM_LEN = 64 - DRAM_START_BIT; + extern "C" { @@ -43,6 +64,7 @@ extern "C" /// @param[in] i_port port for repair /// @param[in] i_mrank master rank of address to repair /// @param[in] i_srank slave rank of address to repair + /// @param[in] i_bg bank group bits of address to repair /// @param[in] i_bank bank bits of address to repair /// @param[in] i_row row bits of address to repair /// @param[in] i_dram_bitmap bitmap of DRAMs selected for repair (b'1 to repair, b'0 to not repair) @@ -51,10 +73,60 @@ extern "C" const uint8_t i_port, const uint8_t i_mrank, const uint8_t i_srank, + const uint8_t i_bg, const uint8_t i_bank, const uint32_t i_row, const uint32_t i_dram_bitmap); + + /// @brief Deploy PPR row repairs, if supported, according to VPD attributes + /// @param[in] i_target_mba mba target + /// @return FAPI2_RC_SUCCESS iff successful + fapi2::ReturnCode p9c_mss_deploy_row_repairs(const fapi2::Target& i_target_mba); + + /// @brief Clear the corresponding bad_bits after a row repair operation + /// @param[in] i_dram_width the DRAM width + /// @param[in] i_dram the DRAM index + /// @param[in,out] io_bad_bits array bad bits data from VPD + /// @return FAPI2_RC_SUCCESS iff successful + fapi2::ReturnCode clear_bad_dq_for_row_repair(const uint8_t i_dram_width, + const uint8_t i_dram, + uint8_t (&io_bad_bits)[DIMM_DQ_RANK_BITMAP_SIZE]); + + /// @brief Clear a row repair entry from the VPD data + /// @param[in] i_rank master rank + /// @param[in,out] io_row_repair_data data for this DIMM/rank from the VPD + /// @return FAPI2_RC_SUCCESS iff successful + fapi2::ReturnCode clear_row_repair_entry(const uint8_t i_rank, + uint8_t (&io_row_repair_data)[MAX_RANKS_PER_DIMM][ROW_REPAIR_BYTES_PER_RANK]); + + /// @brief Decode a row repair entry from an encoded buffer + /// @param[in] i_repair row repair data buffer + /// @param[out] o_dram DRAM position + /// @param[out] o_srank slave rank + /// @param[out] o_bg bank group + /// @param[out] o_bank bank address + /// @param[out] o_row row address + /// @return true if the repair request is valid, false otherwise + bool valid_row_repair_entry( const fapi2::buffer i_repair, + uint8_t& o_dram, + uint8_t& o_srank, + uint8_t& o_bg, + uint8_t& o_bank, + uint32_t& o_row ); + + /// @brief Build a table of PPR row repairs from attribute data for a given DIMM + /// @param[in] i_target DIMM target + /// @param[in] i_dram_width the DRAM width + /// @param[in] i_row_repair_data array of row repair attribute values for the DIMM + /// @param[out] o_repairs_per_dimm array of row repair data buffers + /// @param[in,out] io_dram_bad_in_ranks array of how many ranks in which each DRAM was found to need a repair + /// @return FAPI2_RC_SUCCESS iff successful + fapi2::ReturnCode build_row_repair_table(const fapi2::Target& i_target, + const uint8_t i_dram_width, + const uint8_t i_row_repair_data[MAX_RANKS_PER_DIMM][ROW_REPAIR_BYTES_PER_RANK], + std::vector>& o_repairs_per_dimm, + uint8_t io_dram_bad_in_ranks[MC_MAX_DRAMS_PER_RANK_X4]); } #endif diff --git a/src/import/chips/centaur/procedures/hwp/memory/p9c_mss_row_repair.mk b/src/import/chips/centaur/procedures/hwp/memory/p9c_mss_row_repair.mk index b291f595b..a8d501d3b 100644 --- a/src/import/chips/centaur/procedures/hwp/memory/p9c_mss_row_repair.mk +++ b/src/import/chips/centaur/procedures/hwp/memory/p9c_mss_row_repair.mk @@ -28,4 +28,6 @@ PROCEDURE=p9c_mss_row_repair $(eval $(call ADD_MEMORY_INCDIRS,$(PROCEDURE))) lib${PROCEDURE}_DEPLIBS+=p9c_mss_ddr4_funcs lib${PROCEDURE}_DEPLIBS+=p9c_mss_funcs +lib${PROCEDURE}_DEPLIBS+=p9c_dimmBadDqBitmapFuncs +lib${PROCEDURE}_DEPLIBS+=p9c_mss_rowRepairFuncs $(call BUILD_PROCEDURE) diff --git a/src/import/chips/centaur/procedures/xml/error_info/p9c_memory_errors.xml b/src/import/chips/centaur/procedures/xml/error_info/p9c_memory_errors.xml index 61ebbb840..c41c0b970 100644 --- a/src/import/chips/centaur/procedures/xml/error_info/p9c_memory_errors.xml +++ b/src/import/chips/centaur/procedures/xml/error_info/p9c_memory_errors.xml @@ -378,6 +378,61 @@ + + RC_CEN_ROW_REPAIR_WITH_MNFG_REPAIRS_DISABLED + Row repairs were requested but DRAM repairs are disabled in MNFG flags + DIMM_TARGET + RANK + + DIMM_TARGET + HIGH + + + CODE + LOW + + + + + RC_CEN_RANK_OUT_OF_BOUNDS + Rank supplied to clear_row_repair_entry is out of bounds + RANK + + CODE + HIGH + + + + + RC_CEN_DRAM_INDEX_OUT_OF_BOUNDS + DRAM index supplied to clear_bad_dq_for_row_repair is out of bounds + DRAM + + CODE + HIGH + + + + + RC_CEN_ROW_REPAIR_ENTRY_OUT_OF_BOUNDS + VPD contained out of bounds row repair entry + DIMM_TARGET + DRAM + MRANK + SRANK + BANK_GROUP + BANK + ROW + + DIMM_TARGET + HIGH + + + CODE + LOW + + + diff --git a/src/import/hwpf/fapi2/xml/attribute_info/system_attributes.xml b/src/import/hwpf/fapi2/xml/attribute_info/system_attributes.xml index 7cec7fdb9..90559adb8 100644 --- a/src/import/hwpf/fapi2/xml/attribute_info/system_attributes.xml +++ b/src/import/hwpf/fapi2/xml/attribute_info/system_attributes.xml @@ -5,7 +5,7 @@ - + @@ -58,6 +58,7 @@ MNFG_NO_FLAG = 0x0000000000000000, MNFG_THRESHOLDS = 0x0000000000000001, + MNFG_TEST_ALL_SPARE_DRAM_ROWS = 0x0000000000000040, MNFG_DISABLE_DRAM_REPAIRS = 0x0000000000000080, MNFG_ENABLE_STANDARD_PATTERN_TEST = 0x0000000000000200, MNFG_DISABLE_FABRIC_eREPAIR = 0x0000000000000800, -- cgit v1.2.1