/* IBM_PROLOG_BEGIN_TAG */ /* This is an automatically generated prolog. */ /* */ /* $Source: chips/p9/procedures/hwp/memory/lib/phy/cal_timers.H $ */ /* */ /* IBM CONFIDENTIAL */ /* */ /* EKB Project */ /* */ /* COPYRIGHT 2015,2016 */ /* [+] International Business Machines Corp. */ /* */ /* */ /* The source code for this program is not published or otherwise */ /* divested of its trade secrets, irrespective of what has been */ /* deposited with the U.S. Copyright Office. */ /* */ /* IBM_PROLOG_END_TAG */ /// /// @file cal_timers.H /// @brief Subroutines to calculate the duration of training operations /// // *HWP HWP Owner: Brian Silver // *HWP HWP Backup: Andre Marin // *HWP Team: Memory // *HWP Level: 2 // *HWP Consumed by: FSP:HB #ifndef _MSS_CAL_TIMERS_H_ #define _MSS_CAL_TIMERS_H_ #include #include #include #include namespace mss { /// /// @brief Return the cycles taken for write leveling on a single rank /// @tparam T the target type used to get attributes /// @param[in] i_target used to get attributes /// @return uint64_t, the cycles taken for write leveling on a single rank /// template uint64_t wr_lvl_cycles(const fapi2::Target& i_target ) { const uint64_t TWLO_TWLOE = mss::twlo_twloe(i_target); // This step runs for approximately (80 + TWLO_TWLOE) x NUM_VALID_SAMPLES x (384/(BIG_STEP + 1) + // (2 x (BIG_STEP + 1))/(SMALL_STEP + 1)) + 20 memory clock cycles per rank. const uint64_t l_wr_lvl_cycles = (80 + TWLO_TWLOE) * WR_LVL_NUM_VALID_SAMPLES * (384 / (WR_LVL_BIG_STEP + 1) + (2 * (WR_LVL_BIG_STEP + 1)) / (WR_LVL_SMALL_STEP + 1)) + 20; FAPI_DBG("wr_lvl_cycles: %llu(%lluns) (%llu, %llu, %llu, %llu)", l_wr_lvl_cycles, mss::cycles_to_ns(i_target, l_wr_lvl_cycles), TWLO_TWLOE, WR_LVL_NUM_VALID_SAMPLES, WR_LVL_BIG_STEP, WR_LVL_SMALL_STEP); return l_wr_lvl_cycles; } /// /// @brief Return the cycles taken for DQS align on a single rank /// @tparam T the target type used to get attributes /// @param[in] i_target used to get attributes /// @return uint64_t, the cycles taken for DQS align on a single rank /// template uint64_t dqs_align_cycles(const fapi2::Target& i_target ) { // This step runs for approximately 6 x 600 x 4 DRAM clocks per rank pair. const uint64_t l_dqs_align_cycles = 6 * 600 * 4; FAPI_DBG("dqs_align_cycles: %llu(%lluns)", l_dqs_align_cycles, mss::cycles_to_ns(i_target, l_dqs_align_cycles)); return l_dqs_align_cycles; } /// /// @brief Return the cycles taken for RD clock align on a single rank /// @tparam T the target type used to get attributes /// @param[in] i_target used to get attributes /// @return uint64_t, the cycles taken for RD clock align on a single rank /// template uint64_t rdclk_align_cycles(const fapi2::Target& i_target ) { // This step runs for approximately 24 x ((1024/COARSE_CAL_STEP_SIZE + 4 x COARSE_CAL_STEP_SIZE) x 4 + 32) DRAM // clocks per rank pair const uint64_t l_rdclk_align_cycles = 24 * ((1024 / COARSE_CAL_STEP_SIZE + 4 * COARSE_CAL_STEP_SIZE) * 4 + 32); FAPI_DBG("rdclk_align_cycles: %llu(%lluns) (%llu)", l_rdclk_align_cycles, mss::cycles_to_ns(i_target, l_rdclk_align_cycles), COARSE_CAL_STEP_SIZE); return l_rdclk_align_cycles; } /// /// @brief Return the cycles taken for read centering on a single rank /// @tparam T the target type used to get attributes /// @param[in] i_target used to get attributes /// @return uint64_t, the cycles taken for read centering on a single rank /// template uint64_t read_ctr_cycles(const fapi2::Target& i_target ) { // This step runs for approximately 6 x (512/COARSE_CAL_STEP_SIZE + 4 x (COARSE_CAL_STEP_SIZE + // 4 x CONSEQ_PASS)) x 24 DRAM clocks per rank pair. const uint64_t l_read_ctr_cycles = 6 * (512 / COARSE_CAL_STEP_SIZE + 4 * (COARSE_CAL_STEP_SIZE + 4 * CONSEQ_PASS)) * 24; FAPI_DBG("read_ctr_cycles %llu(%lluns) (%llu, %llu)", l_read_ctr_cycles, mss::cycles_to_ns(i_target, l_read_ctr_cycles), COARSE_CAL_STEP_SIZE, CONSEQ_PASS); return l_read_ctr_cycles; } /// /// @brief Return the cycles taken for write centering on a single rank /// @tparam T the target type used to get attributes /// @param[in] i_target used to get attributes /// @return uint64_t, the cycles taken for write centering on a single rank /// template uint64_t write_ctr_cycles(const fapi2::Target& i_target ) { // 1000 + (NUM_VALID_SAMPLES * (FW_WR_RD + FW_RD_WR + 16) * // (1024/(SMALL_STEP +1) + 128/(BIG_STEP +1)) + 2 * (BIG_STEP+1)/(SMALL_STEP+1)) x 24 DRAM // clocks per rank pair. uint64_t l_write_ctr_cycles = 1000 + (WR_LVL_NUM_VALID_SAMPLES * (WR_CNTR_FW_WR_RD + WR_CNTR_FW_RD_WR + 16) * (1024 / (WR_LVL_SMALL_STEP + 1) + 128 / (WR_LVL_BIG_STEP + 1)) + 2 * (WR_LVL_BIG_STEP + 1) / (WR_LVL_SMALL_STEP + 1)) * 24; FAPI_DBG("write_ctr_cycles: %lu(%luns) (%u, %u, %u, %u, %u)", l_write_ctr_cycles, mss::cycles_to_ns(i_target, l_write_ctr_cycles), WR_LVL_NUM_VALID_SAMPLES, WR_CNTR_FW_WR_RD, WR_CNTR_FW_RD_WR, WR_LVL_BIG_STEP, WR_LVL_SMALL_STEP); return l_write_ctr_cycles; } /// /// @brief Return the cycles taken for coarse write on a single rank /// @tparam T the target type used to get attributes /// @param[in] i_target used to get attributes /// @return uint64_t, the cycles taken for coarse write on a single rank /// template uint64_t coarse_wr_cycles(const fapi2::Target& i_target ) { // The run length given here is the maximum run length for this calibration algorithm. // This step runs for approximately 40 DRAM clocks per rank pair. static const uint64_t l_coarse_wr_cycles = 40; FAPI_DBG("coarse_wr_cycles: %llu(%lluns)", l_coarse_wr_cycles, mss::cycles_to_ns(i_target, l_coarse_wr_cycles)); return l_coarse_wr_cycles; } /// /// @brief Return the cycles taken for coarse read on a single rank /// @tparam T the target type used to get attributes /// @param[in] i_target used to get attributes /// @return uint64_t, the cycles taken for coarse rd on a single rank /// template uint64_t coarse_rd_cycles(const fapi2::Target& i_target ) { // The run length given here is the maximum run length for this calibration algorithm. // This step runs for approximately 32 DRAM clocks per rank pair. static const uint64_t l_coarse_rd_cycles = 32; FAPI_DBG("coarse_rd_cycles: %llu(%lluns)", l_coarse_rd_cycles, mss::cycles_to_ns(i_target, l_coarse_rd_cycles)); return l_coarse_rd_cycles; } /// /// @brief Configure the polling intervals with the proper timeings based on the cal steps enabled /// @tparam T the fapi2::TargetType of the port /// @param[in] i_target the port target /// @param[in] i_poll the poll_parameters to update /// @param[in] i_cal_steps_enabled a fapi2::buffer representing the cal steps enabled /// @return FAPI2_RC_SUCCESS iff everything is OK /// template inline fapi2::ReturnCode cal_timer_setup(const fapi2::Target& i_target, poll_parameters& i_poll, const fapi2::buffer& i_cal_steps_enabled) { // First, calculate the total number of cycles this cal should take if everything // runs to completion uint64_t l_total_cycles = i_cal_steps_enabled.getBit() ? wr_lvl_cycles(i_target) : 0; l_total_cycles += i_cal_steps_enabled.getBit() ? dqs_align_cycles(i_target) : 0; l_total_cycles += i_cal_steps_enabled.getBit() ? rdclk_align_cycles(i_target) : 0; l_total_cycles += i_cal_steps_enabled.getBit() ? read_ctr_cycles(i_target) : 0; l_total_cycles += i_cal_steps_enabled.getBit() ? write_ctr_cycles(i_target) : 0; l_total_cycles += i_cal_steps_enabled.getBit() ? coarse_wr_cycles(i_target) : 0; l_total_cycles += i_cal_steps_enabled.getBit() ? coarse_rd_cycles(i_target) : 0; // Now we have to decide if we're going to abort on an error or keep going. If we keep going, // then we want our initial delay to be the expected time to completion - we don't have much // of a choice. If we abort on error then we want to have a small initial delay, and poll more // times with longer durations between the polling. #define THRASH_CCS_IP_IN_SIM 1 #ifdef THRASH_CCS_IP_IN_SIM i_poll.iv_initial_delay = mss::cycles_to_ns(i_target, 1); i_poll.iv_initial_sim_delay = mss::cycles_to_simcycles(1); i_poll.iv_delay = DELAY_1US; i_poll.iv_sim_delay = mss::cycles_to_simcycles(mss::ns_to_cycles(i_target, DELAY_1US)); // Round up to the cycles left after the initial delay int64_t l_ns_left = std::max(0L, (int64_t(mss::cycles_to_ns(i_target, l_total_cycles)) - int64_t(i_poll.iv_initial_delay))); i_poll.iv_poll_count = l_ns_left / i_poll.iv_delay; i_poll.iv_poll_count += l_ns_left % i_poll.iv_delay ? 0 : 1; // Don't let the poll count be 0 - that just makes for a bad day. i_poll.iv_poll_count = (i_poll.iv_poll_count == 0) ? 1 : i_poll.iv_poll_count; #else if (CAL_ABORT_ON_ERROR) { // We don't want to wait too long for the initial check, just some hueristics here i_poll.iv_initial_delay = mss::cycles_to_ns(i_target, l_total_cycles / 8); i_poll.iv_initial_sim_delay = mss::cycles_to_simcycles(l_total_cycles / 8); // Delay 1us between polls, and setup the poll count so // iv_initial_delay + (iv_delay * iv_poll_count) == l_total_cycles; i_poll.iv_delay = DELAY_1US; i_poll.iv_sim_delay = mss::cycles_to_simcycles(mss::ns_to_cycles(i_target, DELAY_1US)); // Round up to the cycles left after the initial delay int64_t l_ns_left = std::max(0L, (int64_t(mss::cycles_to_ns(i_target, l_total_cycles)) - int64_t(i_poll.iv_initial_delay))); i_poll.iv_poll_count = l_ns_left / i_poll.iv_delay; i_poll.iv_poll_count += l_ns_left % i_poll.iv_delay ? 0 : 1; // Don't let the poll count be 0 - that just makes for a bad day. i_poll.iv_poll_count = (i_poll.iv_poll_count == 0) ? 1 : i_poll.iv_poll_count; } else { i_poll.iv_initial_delay = mss::cycles_to_ns(i_target, l_total_cycles); i_poll.iv_initial_sim_delay = mss::cycles_to_simcycles(l_total_cycles); } #endif FAPI_INF("cal abort on error? %s. tc: %luc, id: %luns(%lusc), d: %lu(%lusc), pc: %lu", (CAL_ABORT_ON_ERROR ? "yup" : "nope"), l_total_cycles, i_poll.iv_initial_delay, i_poll.iv_initial_sim_delay, i_poll.iv_delay, i_poll.iv_sim_delay, i_poll.iv_poll_count); return fapi2::FAPI2_RC_SUCCESS; } } #endif