/* IBM_PROLOG_BEGIN_TAG */ /* This is an automatically generated prolog. */ /* */ /* $Source: src/import/generic/memory/lib/ccs/ccs.H $ */ /* */ /* OpenPOWER HostBoot Project */ /* */ /* Contributors Listed Below - COPYRIGHT 2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ /* Licensed under the Apache License, Version 2.0 (the "License"); */ /* you may not use this file except in compliance with the License. */ /* You may obtain a copy of the License at */ /* */ /* http://www.apache.org/licenses/LICENSE-2.0 */ /* */ /* Unless required by applicable law or agreed to in writing, software */ /* distributed under the License is distributed on an "AS IS" BASIS, */ /* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */ /* implied. See the License for the specific language governing */ /* permissions and limitations under the License. */ /* */ /* IBM_PROLOG_END_TAG */ /// /// @file ccs.H /// @brief Run and manage the CCS engine /// // *HWP HWP Owner: Matthew Hickman // *HWP HWP Backup: Andre Marin // *HWP Team: Memory // *HWP Level: 3 // *HWP Consumed by: HB:FSP #ifndef _MSS_CCS_H_ #define _MSS_CCS_H_ #include #include #include #include #include #include #include #include namespace mss { static constexpr uint64_t CKE_HIGH = 0b1111; static constexpr uint64_t CKE_LOW = 0b0000; // CKE setup for rank 0-7 to support // Currently only support 0, 1, 4, 5 // Not supported ranks will always get 0 // For self_refresh_entry_command() static constexpr uint64_t CKE_ARY_SRE[] = { // 0, 1, 2, 3, 0b0111, 0b1011, 0, 0, // 4, 5, 6, 7 0b0111, 0b1011, 0, 0 }; // For self_refresh_exit_command() static constexpr uint64_t CKE_ARY_SRX[] = { // 0, 1, 2, 3, 0b1000, 0b0100, 0, 0, // 4, 5, 6, 7 0b1000, 0b0100, 0, 0 }; namespace ccs { enum rank_configuration { DUAL_DIRECT = 0, QUAD_ENCODED = 1, // Note: we don't include QUAD_DIRECT in here // That's because it uses 4 CS and is board wiring dependent // Not sure if it would use CS23 or CID01 for CS2/3 }; /// /// @class instruction_t /// @brief Class for ccs instructions /// @tparam T fapi2::TargetType representing the target of the CCS instructions /// @note A ccs instruction is data (array 0) and some control information (array 1)cc /// class instruction_t { private: using TT = ccsTraits; public: fapi2::buffer arr0; fapi2::buffer arr1; // The MCA indexed rank on which to operate. If this is invalid, all ranks will be disabled uint64_t iv_rank; // We want to have a switch to update rank or not. A user might want to setup CS in some weird way // In that case, they don't want us "fixing" their CS values // We'll default the rank to be updated - we want to send out CS properly bool iv_update_rank; /// /// @brief intstruction_t ctor /// @param[in] i_rank the rank this instruction is headed for /// @param[in] i_arr0 the initial value for arr0, defaults to 0 /// @param[in] i_arr1 the initial value for arr1, defaults to 0 /// @param[in] i_update_rank true if the rank should be updated before being sent, defaults to true /// instruction_t( const uint64_t i_rank = NO_CHIP_SELECT_ACTIVE, const fapi2::buffer i_arr0 = 0, const fapi2::buffer i_arr1 = 0, const bool i_update_rank = true): arr0(i_arr0), arr1(i_arr1), iv_rank(i_rank), iv_update_rank(i_update_rank) { // Skip setting up the rank if the user doesn't want us to if(iv_update_rank) { // Set the chip selects to be 1's (not active) // We'll fix these up before executing the instructions arr0.insertFromRight(0b11); arr0.insertFromRight(0b11); } } /// /// @brief Updates the rank based upon the passed in rank configuration encoding /// @param[in] i_target the port target for this instruction - for error logging /// @param[in] i_rank_config the rank configuration /// @return fapi2::ReturnCode fapi2::FAPI2_RC_SUCCESS if ok /// fapi2::ReturnCode configure_rank(const fapi2::Target& i_target, const rank_configuration i_rank_config ) { // If this instrunction is set to not update the rank, then don't update the rank if(!iv_update_rank) { return fapi2::FAPI2_RC_SUCCESS; } // Regardless of rank configurations, if we have NO_CHIP_SELECT_ACTIVE, deactivate all CS if(iv_rank == NO_CHIP_SELECT_ACTIVE) { arr0.insertFromRight(0b11); arr0.insertFromRight(0b11); return fapi2::FAPI2_RC_SUCCESS; } // First, check rank - we need to make sure that we have a valid rank FAPI_ASSERT(iv_rank < TT::CCS_MAX_MRANK_PER_PORT, fapi2::MSS_INVALID_RANK() .set_PORT_TARGET(i_target) .set_RANK(iv_rank) .set_FUNCTION(generic_ffdc_codes::CCS_INST_CONFIGURE_RANK), "%s rank out of bounds rank%u", mss::c_str(i_target), iv_rank); // Now the fun happens and we can deal with the actual encoding // If we're quad mode, setup the encoding accordingly if(i_rank_config == rank_configuration::QUAD_ENCODED) { // CS 0/1 are first, while CID0/1 are second // In quad enabled mode, CID acts as a "package select" // It selects R0/2 vs R1/3 // CS0 vs CS1 selects the low vs high rank in the package // CS0 will select rank 0/1 // CS1 will select rank 2/3 const auto l_dimm_rank = mss::index(iv_rank); const bool l_is_dimm0 = iv_rank < TT::CCS_MAX_RANK_PER_DIMM; constexpr uint64_t NON_DIMM_CS = 0b11; // Assigns the CS based upon which DIMM we're at const auto CS01 = l_is_dimm0 ? TT::CS_N[l_dimm_rank].first : NON_DIMM_CS; const auto CS23 = l_is_dimm0 ? NON_DIMM_CS : TT::CS_N[l_dimm_rank].first; // Setup that rank arr0.insertFromRight(CS01); arr0.insertFromRight(CS23); arr0.insertFromRight(TT::CS_N[l_dimm_rank].second); } // Otherwise, setup for dual-direct mode (our only other supported mode at the moment) else { const auto l_dimm_rank = mss::index(iv_rank); const bool l_is_dimm0 = iv_rank < TT::CCS_MAX_RANK_PER_DIMM; // Assigns the CS based upon which DIMM we're at const auto CS01 = l_is_dimm0 ? TT::CS_ND[l_dimm_rank].first : TT::CS_ND[l_dimm_rank].second; const auto CS23 = l_is_dimm0 ? TT::CS_ND[l_dimm_rank].second : TT::CS_ND[l_dimm_rank].first; // Setup that rank arr0.insertFromRight(CS01); arr0.insertFromRight(CS23); // Check that we don't have a rank out of bounds case here // We can only have that if // 1) we are DIMM1 // 2) our DIMM rank is greater than the maximum allowed number of ranks on DIMM1 // So, we pass always if we're DIMM0, or if our DIMM rank is less than the maximum number of DIMM's on rank 1 FAPI_ASSERT(l_dimm_rank < TT::CCS_MAX_RANKS_DIMM1 || l_is_dimm0, fapi2::MSS_INVALID_RANK() .set_PORT_TARGET(i_target) .set_RANK(iv_rank) .set_FUNCTION(generic_ffdc_codes::CCS_INST_CONFIGURE_RANK), "%s rank out of bounds rank%u", mss::c_str(i_target), iv_rank); } return fapi2::FAPI2_RC_SUCCESS; fapi_try_exit: return fapi2::current_err; } /// /// @brief Equals comparison operator /// @param[in] i_rhs - the instruction to compare to /// @return True if both instructions are equal /// inline bool operator==( const instruction_t& i_rhs ) const { return arr0 == i_rhs.arr0 && arr1 == i_rhs.arr1 && iv_rank == i_rhs.iv_rank && iv_update_rank == i_rhs.iv_update_rank; } }; /// /// @brief Determines our rank configuration type /// @param[in] i_target the MCA target on which to operate /// @param[out] o_rank_config the rank configuration /// @return fapi2::ReturnCode fapi2::FAPI2_RC_SUCCESS if ok /// inline fapi2::ReturnCode get_rank_config(const fapi2::Target& i_target, rank_configuration& o_rank_config) { typedef ccsTraits TT; constexpr uint8_t QUAD_RANK_ENABLE = 4; o_rank_config = rank_configuration::DUAL_DIRECT; uint8_t l_num_master_ranks[TT::CCS_MAX_DIMM_PER_PORT] = {}; FAPI_TRY(TT::get_rank_config_attr(i_target, l_num_master_ranks)); // We only need to check DIMM0 // Our number of ranks should be the same between DIMM's 0/1 // Check if we have the right number for encoded mode o_rank_config = l_num_master_ranks[0] == QUAD_RANK_ENABLE ? rank_configuration::QUAD_ENCODED : rank_configuration::DUAL_DIRECT; fapi_try_exit: return fapi2::current_err; } /// /// @brief Determines our rank configuration type across all ports /// @param[in] i_target the MCA target on which to operate /// @param[out] o_rank_config the rank configuration /// @return fapi2::ReturnCode fapi2::FAPI2_RC_SUCCESS if ok /// inline fapi2::ReturnCode get_rank_config(const fapi2::Target& i_target, std::vector& o_rank_config) { typedef ccsTraits TT; o_rank_config.clear(); // Create one per port, we then use relative indexing to get us the number we need o_rank_config = std::vector(TT::PORTS_PER_MC_TARGET); for(const auto& l_port : mss::find_targets(i_target)) { rank_configuration l_config; FAPI_TRY(get_rank_config(l_port, l_config)); o_rank_config[mss::relative_pos(l_port)] = l_config; } return fapi2::FAPI2_RC_SUCCESS; fapi_try_exit: return fapi2::current_err; } /// /// @brief A class representing a series of CCS instructions, and the /// CCS engine parameters associated with running the instructions /// @tparam T fapi2::TargetType representing the fapi2 target which /// @tparam P fapi2::TargetType representing the port /// contains the CCS engine class program { private: using TT = ccsTraits; public: // Setup our poll parameters so the CCS executer can see // whether to use the delays in the instruction stream or not program(): iv_poll(0, 0) {} // Vector of instructions std::vector< instruction_t > iv_instructions; poll_parameters iv_poll; // Vector of polling probes std::vector< poll_probe > iv_probes; }; /// /// @brief Common setup for all MRS/RCD instructions /// @tparam TT the CCS traits of the chiplet which executes the CCS instruction /// @param[in,out] i_arr0 fapi2::buffer representing the ARR0 of the instruction /// static void mrs_rcd_helper( fapi2::buffer& i_arr0 ) { using TT = ccsTraits; // // Generic DDR4 MRS setup (RCD is an MRS) // // CKE is high Note: P8 set all 4 of these high - not sure if that's correct. BRS i_arr0.insertFromRight(CKE_HIGH); // ACT is high i_arr0.setBit(); // RAS, CAS, WE low i_arr0.clearBit() .template clearBit() .template clearBit(); } /// /// @brief Setup activate command instruction /// @tparam TT the CCS traits of the chiplet which executes the CCS instruction /// @param[in] i_target the DIMM this instruction is headed for /// @param[in] i_rank the rank on this dimm /// inline instruction_t act_command( const uint64_t i_rank ) { using TT = ccsTraits; fapi2::buffer l_boilerplate_arr0; fapi2::buffer l_boilerplate_arr1; // Set all CKE to high l_boilerplate_arr0.insertFromRight(CKE_HIGH); // ACT is high l_boilerplate_arr0.clearBit(); // RAS low, CAS low, WE low l_boilerplate_arr0.clearBit() .template clearBit() .template clearBit(); // Just leaving the row addresses to all 0 for now // row, bg, ba set to 0 l_boilerplate_arr0.clearBit(); l_boilerplate_arr0.clearBit(); l_boilerplate_arr0.clearBit(); l_boilerplate_arr0.clearBit(); l_boilerplate_arr0.clearBit(); l_boilerplate_arr0.clearBit(); return instruction_t(i_rank, l_boilerplate_arr0, l_boilerplate_arr1); } /// /// @brief Create, initialize an RCD (RCW - JEDEC) CCS command /// @tparam TT the CCS traits of the chiplet which executes the CCS instruction /// @param[in] i_target the DIMM this instruction is headed for /// @param[in] i_turn_on_cke flag that states whether we want CKE on for this RCW (defaulted to true) /// @return the RCD CCS instruction /// @note THIS IS DDR4 ONLY RIGHT NOW. We can (and possibly should) specialize this /// for the controller (Nimbus v Centaur) and then correct for DRAM generation (not included /// in this template definition) /// inline instruction_t rcd_command( const fapi2::Target& i_target, const bool i_sim, const bool i_turn_on_cke = true) { using TT = ccsTraits; fapi2::buffer rcd_boilerplate_arr0; fapi2::buffer rcd_boilerplate_arr1; // // Generic DDR4 MRS setup (RCD is an MRS) // mrs_rcd_helper(rcd_boilerplate_arr0); // Not adding i_turn_on_cke in the mrs_rcd helper because we only need this // for RCWs and there is no need to complicate/change the MRS cmd API with // uneeded functionality. Little duplication, but this isolates the change. if( !i_sim ) { const uint64_t l_cke = i_turn_on_cke ? CKE_HIGH : CKE_LOW; rcd_boilerplate_arr0.insertFromRight(l_cke); } // // RCD setup // // DDR4: Set BG1 to 0 during an MRS. // BG0, BA1:BA0 to 0b111 selects RCW (aka MR7). rcd_boilerplate_arr0.clearBit() .template insertFromRight(0b11) .template setBit(); // RCD always goes to the 0th rank on the DIMM; either 0 or 4. return instruction_t((mss::index(i_target) == 0) ? 0 : 4, rcd_boilerplate_arr0, rcd_boilerplate_arr1); } /// /// @brief Create, initialize an MRS CCS command /// @tparam TT the CCS traits of the chiplet which executes the CCS instruction /// @param[in] i_target the DIMM this instruction is headed for /// @param[in] i_rank the rank on this dimm /// @param[in] i_mrs the specific MRS /// @return the MRS CCS instruction /// @note THIS IS DDR4 ONLY RIGHT NOW. We can (and possibly should) specialize this /// for the controller (Nimbus v Centaur) and then correct for DRAM generation (not included /// in this template definition) /// inline instruction_t mrs_command ( const uint64_t i_rank, const uint64_t i_mrs ) { using TT = ccsTraits; fapi2::buffer rcd_boilerplate_arr0; fapi2::buffer rcd_boilerplate_arr1; fapi2::buffer mrs(i_mrs); // // Generic DDR4 MRS setup (RCD is an MRS) // mrs_rcd_helper(rcd_boilerplate_arr0); // // MRS setup // // DDR4: Set BG1 to 0. BG0, BA1:BA0 to i_mrs rcd_boilerplate_arr0.clearBit(); mss::swizzle(mrs, rcd_boilerplate_arr0); FAPI_DBG("mrs rcd boiler 0x%016lx 0x%llx", uint8_t(mrs), uint64_t(rcd_boilerplate_arr0)); return instruction_t(i_rank, rcd_boilerplate_arr0, rcd_boilerplate_arr1); } /// /// @brief Create, initialize a JEDEC Device Deselect CCS command /// @param[in] i_idle the idle time to the next command (default to 0) /// @return the Device Deselect CCS instruction /// @note THIS IS DDR4 ONLY RIGHT NOW. We can (and possibly should) specialize this /// for the controller (Nimbus v Centaur) and then correct for DRAM generation (not included /// in this template definition) /// inline instruction_t des_command(const uint16_t i_idle = 0) { using TT = ccsTraits; fapi2::buffer rcd_boilerplate_arr0; fapi2::buffer rcd_boilerplate_arr1; // ACT is high. It's a no-care in the spec but it seems to raise questions when // people look at the trace, so lets set it high. rcd_boilerplate_arr0.setBit(); // CKE is high Note: P8 set all 4 of these high - not sure if that's correct. BRS rcd_boilerplate_arr0.insertFromRight(CKE_HIGH); // Insert idle rcd_boilerplate_arr1.template insertFromRight( i_idle ); // ACT is high no-care // RAS, CAS, WE no-care // Device Deslect wants CS_n always high (select nothing using rank NO_CHIP_SELECT_ACTIVE) return instruction_t( NO_CHIP_SELECT_ACTIVE, rcd_boilerplate_arr0, rcd_boilerplate_arr1); } /// /// @brief Converts an ODT attribute to CCS array input /// @tparam TT the CCS traits of the chiplet which executes the CCS instruction /// @param[in] i_attr_value ODT attribute value /// @return CCS value for the ODT's /// inline uint8_t convert_odt_attr_to_ccs(const fapi2::buffer& i_attr_value) { using TT = ccsTraits; // ODT value buffer fapi2::buffer l_ccs_value; l_ccs_value.template writeBit(i_attr_value.template getBit()) .template writeBit(i_attr_value.template getBit()) .template writeBit(i_attr_value.template getBit()) .template writeBit(i_attr_value.template getBit()) .template writeBit(i_attr_value.template getBit()); return uint8_t(l_ccs_value); } /// /// @brief Create, initialize an ODT CCS command /// @tparam TT the CCS traits of the chiplet which executes the CCS instruction /// @param[in] i_odt_values CCS defined ODT values /// @param[in] i_cycles the number of cycles to hold the ODT for - defaults to DEFAULT_ODT_CYCLE_LEN /// @return the Device Deselect CCS instruction /// @note This technically is not a JEDEC command, but is needed for CCS to hold the ODT cycles /// CCS by design does not repeat or latch ODT's appropriately /// As such, it's up to the programmers to hold the ODT's appropriately /// This "command" will greatly help us do that /// template< typename TT = ccsTraits > inline instruction_t odt_command(const uint8_t i_odt_values, const uint64_t i_cycles = TT::DEFAULT_ODT_CYCLE_LEN) { auto l_odt_cmd = des_command(); l_odt_cmd.arr0.template insertFromRight(i_odt_values); l_odt_cmd.arr1.template insertFromRight(i_cycles); return l_odt_cmd; } /// /// @brief Create, initialize a NTTM read CCS command /// @tparam TT the CCS traits of the chiplet which executes the CCS instruction /// @return the Device Deselect CCS instruction /// @note need to setup 4 cycles delay /// inline instruction_t nttm_read_command() { using TT = ccsTraits; // get the des_command auto l_command = des_command(); // set to CCS_INST_ARR1 register l_command.arr1.template setBit(); l_command.arr1.template insertFromRight(TT::NTTM_READ_DELAY); return l_command; } /// /// @brief Create, initialize a JEDEC Device Power Down Entry CCS command /// @tparam TT the CCS traits of the chiplet which executes the CCS instruction /// @return the Device Deselect CCS instruction /// @note THIS IS DDR4 ONLY RIGHT NOW. We can (and possibly should) specialize this /// for the controller (Nimbus v Centaur) and then correct for DRAM generation (not included /// in this template definition) /// inline instruction_t pde_command() { using TT = ccsTraits; fapi2::buffer rcd_boilerplate_arr0; fapi2::buffer rcd_boilerplate_arr1; // Power Down Entry just like a DES, but we set CKE low instruction_t l_inst = des_command(); // CKE is low. Note: P8 set all 4 of these low - not sure if that's correct. l_inst.arr0.template insertFromRight(CKE_LOW); l_inst.arr1.template insertFromRight( TT::TIMING_TCPDED ); return l_inst; } /// /// @brief Setup ZQ Long instruction /// @param[in] i_rank the rank on this dimm /// @param[in] i_idle the idle time to the next command (default to 0) /// @return the MRS CCS instruction /// @note THIS IS DDR4 ONLY RIGHT NOW. We can (and possibly should) specialize this /// for the controller (Nimbus v Centaur) and then correct for DRAM generation (not included /// in this template definition) /// inline instruction_t zqcl_command( const uint64_t i_rank, const uint16_t i_idle = 0 ) { using TT = ccsTraits; fapi2::buffer l_boilerplate_arr0; fapi2::buffer l_boilerplate_arr1; // CKE is high Note: P8 set all 4 of these high - not sure if that's correct. BRS l_boilerplate_arr0.insertFromRight(CKE_HIGH); // ACT is high l_boilerplate_arr0.setBit(); // RAS/CAS high, WE low l_boilerplate_arr0.setBit() .template setBit() .template clearBit(); // ADDR10/AP is high l_boilerplate_arr0.setBit(); // Insert idle l_boilerplate_arr1.template insertFromRight( i_idle ); return instruction_t(i_rank, l_boilerplate_arr0, l_boilerplate_arr1); } /// /// @brief Setup read command helper function /// @param[in] i_rank the rank on this dimm /// @param[in] i_bank_addr bank address bits [BG0:BG1] = [62:63] (right aligned) /// @param[in] i_bank_group_addr bank group address bits [BA0:BA1] = [62:63] (right aligned) /// @param[in] i_column_addr column address bits [A0:A9] = [54:63] (right aligned) /// @return the read command CCS instruction /// @note THIS IS DDR4 ONLY RIGHT NOW. We can (and possibly should) specialize this /// for the controller (Nimbus v Centaur) and then correct for DRAM generation (not included /// in this template definition) /// static fapi2::buffer read_cmd_boilerplate( const uint64_t i_rank, const fapi2::buffer& i_bank_addr = 0, const fapi2::buffer& i_bank_group_addr = 0, const fapi2::buffer& i_column_addr = 0) { using TT = ccsTraits; // TODO - RTC 166175 Encapsulate command truth table in a subclass for ccs.H fapi2::buffer l_boilerplate_arr0; // CKE is high Note: P8 set all 4 of these high - not sure if that's correct. AAM l_boilerplate_arr0.insertFromRight(CKE_HIGH); // ACT is high l_boilerplate_arr0.setBit(); // RAS high, CAS low, WE high l_boilerplate_arr0.setBit() .template clearBit() .template setBit(); l_boilerplate_arr0.insertFromRight(i_bank_addr); // Bank Group takes a little effort - the bits aren't contiguous constexpr uint64_t BG0_BIT = 62; constexpr uint64_t BG1_BIT = 63; l_boilerplate_arr0.writeBit(i_bank_group_addr.getBit()) .template writeBit(i_bank_group_addr.getBit()); // CA is A[0:9] l_boilerplate_arr0.insertFromRight(i_column_addr); return l_boilerplate_arr0; } /// /// @brief Setup write command (Fixed BL8 or BC4) instruction /// @tparam TT the CCS traits of the chiplet which executes the CCS instruction /// @param[in] i_rank the rank on this dimm /// @param[in] i_bank_addr bank address bits [BA0:BA1] = [62:63] (right aligned) /// @param[in] i_bank_group_addr bank group address bits [BG0:BG1] = [62:63] (right aligned) /// @param[in] i_column_addr column address bits [A0:A9] = [54:63] (right aligned) /// @return the write command CCS instruction /// @note THIS IS DDR4 ONLY RIGHT NOW. We can (and possibly should) specialize this /// for the controller (Nimbus v Centaur) and then correct for DRAM generation (not included /// in this template definition) /// inline instruction_t wr_command( const uint64_t i_rank, const fapi2::buffer& i_bank_addr = 0, const fapi2::buffer& i_bank_group_addr = 0, const fapi2::buffer& i_column_addr = 0) { using TT = ccsTraits; // WR's and RD's are very similar, so we just use the RD command boiler plate and modify the command to a WR fapi2::buffer l_boilerplate_arr0 = read_cmd_boilerplate(i_rank, i_bank_addr, i_bank_group_addr, i_column_addr); fapi2::buffer l_boilerplate_arr1; // RAS high, CAS low, WE low l_boilerplate_arr0.setBit() .template clearBit() .template clearBit(); return instruction_t(i_rank, l_boilerplate_arr0, l_boilerplate_arr1); } /// /// @brief Setup read command (Fixed BL8 or BC4) instruction /// @tparam TT the CCS traits of the chiplet which executes the CCS instruction /// @param[in] i_rank the rank on this dimm /// @param[in] i_bank_addr bank address bits [BA0:BA1] = [62:63] (right aligned) /// @param[in] i_bank_group_addr bank group address bits [BG0:BG1] = [62:63] (right aligned) /// @param[in] i_column_addr column address bits [A0:A9] = [54:63] (right aligned) /// @return the read command CCS instruction /// @note THIS IS DDR4 ONLY RIGHT NOW. We can (and possibly should) specialize this /// for the controller (Nimbus v Centaur) and then correct for DRAM generation (not included /// in this template definition) /// inline instruction_t rd_command( const uint64_t i_rank, const fapi2::buffer& i_bank_addr = 0, const fapi2::buffer& i_bank_group_addr = 0, const fapi2::buffer& i_column_addr = 0) { using TT = ccsTraits; fapi2::buffer l_boilerplate_arr0; fapi2::buffer l_boilerplate_arr1; l_boilerplate_arr0 = read_cmd_boilerplate(i_rank, i_bank_addr, i_bank_group_addr, i_column_addr); // Setup ADDR10/AP based on read type l_boilerplate_arr0.clearBit(); return instruction_t(i_rank, l_boilerplate_arr0, l_boilerplate_arr1); } /// /// @brief Setup read w/auto precharge command (Fixed BL8 or BC4) instruction /// @tparam TT the CCS traits of the chiplet which executes the CCS instruction /// @param[in] i_rank the rank on this dimm /// @param[in] i_bank_addr bank address bits [BG0:BG1] = [62:63] (right aligned) /// @param[in] i_bank_group_addr bank group address bits [BA0:BA1] = [62:63] (right aligned) /// @param[in] i_column_addr column address bits [A0:A9] = [54:63] (right aligned) /// @return the read command CCS instruction /// @note THIS IS DDR4 ONLY RIGHT NOW. We can (and possibly should) specialize this /// for the controller (Nimbus v Centaur) and then correct for DRAM generation (not included /// in this template definition) /// inline instruction_t rda_command( const uint64_t i_rank, const fapi2::buffer& i_bank_addr = 0, const fapi2::buffer& i_bank_group_addr = 0, const fapi2::buffer& i_column_addr = 0) { using TT = ccsTraits; fapi2::buffer l_boilerplate_arr0; fapi2::buffer l_boilerplate_arr1; l_boilerplate_arr0 = read_cmd_boilerplate(i_rank, i_bank_addr, i_bank_group_addr, i_column_addr); // Setup ADDR10/AP based on read type l_boilerplate_arr0.setBit(); return instruction_t(i_rank, l_boilerplate_arr0, l_boilerplate_arr1); } /// /// @brief Setup precharge all banks command instruction /// @tparam TT the CCS traits of the chiplet which executes the CCS instruction /// @param[in] i_rank the rank on this dimm /// @param[in] i_idle the idle time to the next command (default to 0) /// @return the precharge all banks command CCS instruction /// @note THIS IS DDR4 ONLY RIGHT NOW. We can (and possibly should) specialize this /// for the controller (Nimbus v Centaur) and then correct for DRAM generation (not included /// in this template definition) /// inline instruction_t precharge_all_command( const uint64_t i_rank, const uint16_t i_idle = 0 ) { using TT = ccsTraits; fapi2::buffer l_boilerplate_arr0; fapi2::buffer l_boilerplate_arr1; // CKE is high Note: P8 set all 4 of these high - not sure if that's correct. AAM l_boilerplate_arr0.insertFromRight(CKE_HIGH); // ACT is high l_boilerplate_arr0.setBit(); // RAS low, CAS high, WE low l_boilerplate_arr0.clearBit() .template setBit() .template clearBit(); // Setup ADDR10/AP high l_boilerplate_arr0.setBit(); // Insert idle l_boilerplate_arr1.template insertFromRight( i_idle ); // From DDR4 Spec table 17: // All other bits from the command truth table or 'V', for valid (1 or 0) return instruction_t(i_rank, l_boilerplate_arr0, l_boilerplate_arr1); } /// /// @brief Setup self-refresh entry command instruction /// @tparam TT the CCS traits of the chiplet which executes the CCS instruction /// @param[in] i_rank the rank on this dimm /// @param[in] i_idle the idle time to the next command (default to 0) /// @return the self-refresh entry command CCS instruction /// @note THIS IS FOR DDR4 NON-LRDIMM ONLY RIGHT NOW /// inline instruction_t self_refresh_entry_command( const uint64_t i_rank, const uint16_t i_idle = 0 ) { using TT = ccsTraits; fapi2::buffer l_boilerplate_arr0; fapi2::buffer l_boilerplate_arr1; // Set all CKE to high except the rank passed in l_boilerplate_arr0.insertFromRight(CKE_ARY_SRE[i_rank]); // ACT is high l_boilerplate_arr0.setBit(); // RAS low, CAS low, WE high l_boilerplate_arr0.clearBit() .template clearBit() .template setBit(); // Insert idle l_boilerplate_arr1.template insertFromRight( i_idle ); // From DDR4 Spec table 17: // All other bits from the command truth table are 'V', for valid (1 or 0) return instruction_t(i_rank, l_boilerplate_arr0, l_boilerplate_arr1); } /// /// @brief Setup self-refresh exit using NOP command instruction /// @tparam TT the CCS traits of the chiplet which executes the CCS instruction /// @param[in] i_rank the rank on this dimm /// @param[in] i_idle the idle time to the next command (default to 0) /// @return the self-refresh exit command CCS instruction /// @note Using NOP in case SDRAM is in gear down mode and max power saving mode exit /// @note THIS IS FOR DDR4 NON-LRDIMM ONLY RIGHT NOW /// inline instruction_t self_refresh_exit_command( const uint64_t i_rank, const uint16_t i_idle = 0 ) { using TT = ccsTraits; fapi2::buffer l_boilerplate_arr0; fapi2::buffer l_boilerplate_arr1; // Set all CKE to low except the rank passed in l_boilerplate_arr0.insertFromRight(CKE_ARY_SRX[i_rank]); // ACT is high l_boilerplate_arr0.setBit(); // RAS high, CAS high, WE high l_boilerplate_arr0.setBit() .template setBit() .template setBit(); // Insert idle l_boilerplate_arr1.template insertFromRight( i_idle ); // From DDR4 Spec table 17: // All other bits from the command truth table are 'V', for valid (1 or 0) return instruction_t(i_rank, l_boilerplate_arr0, l_boilerplate_arr1); } /// /// @brief Setup refresh command instruction /// @tparam T the target type of the chiplet which executes the CCS instruction /// @tparam TT the CCS traits of the chiplet which executes the CCS instruction /// @param[in] i_target the DIMM this instruction is headed for /// @param[in] i_rank the rank on this dimm /// @param[in] i_idle the idle time to the next command (default to 0) /// @return the self-refresh entry command CCS instruction /// @note THIS IS FOR DDR4 NON-LRDIMM ONLY RIGHT NOW /// inline instruction_t refresh_command( const uint64_t i_rank, const uint16_t i_idle = 0 ) { using TT = ccsTraits; // Refresh is self-refresh entry with CKE high auto l_refresh_template = self_refresh_entry_command(i_rank, i_idle); // CKE is high l_refresh_template.arr0.template insertFromRight(CKE_HIGH); return l_refresh_template; } // // These functions are a little sugar to keep callers from doing the traits-dance to get the // appropriate bit field // /// /// @brief Select the port(s) to be used by the CCS /// @tparam MC the memory controller type which executes the CCS instruction /// @tparam T the target type of the chiplet which executes the CCS instruction /// @tparam TT the CCS traits of the chiplet which executes the CCS instruction /// @param[in] i_target the target to effect /// @param[in] i_ports the buffer representing the ports /// template< mss::mc_type MC, fapi2::TargetType T, typename TT = ccsTraits > fapi2::ReturnCode select_ports( const fapi2::Target& i_target, uint64_t i_ports); /// /// @brief User sets to a '1'b to tell the Hdw to stop CCS whenever failure occurs. When a /// '0'b, Hdw will continue CCS even if a failure occurs. /// @tparam T the target type of the chiplet which executes the CCS instruction /// @tparam TT the CCS traits of the chiplet which executes the CCS instruction /// @param[in] the target to effect /// @param[in,out] io_buffer the buffer representing the mode register /// @param[in] i_value true iff stop whenever failure occurs. /// template< fapi2::TargetType T, typename TT = ccsTraits > inline void stop_on_err( const fapi2::Target&, fapi2::buffer& io_buffer, const states i_value) { io_buffer.writeBit(i_value); } /// /// @brief Disable ECC checking on the CCS arrays /// @tparam T the target type of the chiplet which executes the CCS instruction /// @tparam TT the CCS traits of the chiplet which executes the CCS instruction /// @param[in] the target to effect /// @param[in,out] io_buffer the buffer representing the mode register /// template< fapi2::TargetType T, typename TT = ccsTraits > inline void disable_ecc( const fapi2::Target&, fapi2::buffer& io_buffer) { io_buffer.setBit() .template setBit(); } /// /// @brief User sets to a '1'b to force the Hdw to ignore any array ue or sue errors /// during CCS command fetching. /// @tparam T the target type of the chiplet which executes the CCS instruction /// @tparam TT the CCS traits of the chiplet which executes the CCS instruction /// @param[in] the target to effect /// @param[in,out] io_buffer the buffer representing the mode register /// @param[in] i_value true iff ignore any array ue or sue errors. /// template< fapi2::TargetType T, typename TT = ccsTraits > inline void ue_disable( const fapi2::Target&, fapi2::buffer& io_buffer, const states i_value) { io_buffer.writeBit(i_value); } /// /// @brief User sets to a '1'b to force the Hdw to delay parity a cycle /// @tparam T the target type of the chiplet which executes the CCS instruction /// @tparam TT the CCS traits of the chiplet which executes the CCS instruction /// @param[in] the target to effect /// @param[in,out] io_buffer the buffer representing the mode register /// @param[in] i_value mss::ON iff delay parity a cycle /// template< fapi2::TargetType T, typename TT = ccsTraits > inline void parity_after_cmd( const fapi2::Target&, fapi2::buffer& io_buffer, const states i_value) { io_buffer.writeBit(i_value); } /// /// @brief DDr calibration counter /// @tparam T the target type of the chiplet which executes the CCS instruction /// @tparam TT the CCS traits of the chiplet which executes the CCS instruction /// @param[in] the target to effect /// @param[in,out] io_buffer the buffer representing the mode register /// @param[in] i_count the count to wait for DDR cal to complete. /// @param[in] i_mult the DDR calibration time multiplaction factor /// template< fapi2::TargetType T, typename TT = ccsTraits > inline void cal_count( const fapi2::Target&, fapi2::buffer& io_buffer, const uint64_t i_count, const uint64_t i_mult) { io_buffer.insertFromRight(i_count); io_buffer.insertFromRight(i_mult); } /// /// @brief Copy CKE signals to CKE Spare on both ports NOTE: DOESN'T APPLY FOR NIMBUS. NO /// SPARE CHIPS TO COPY TO. 0 - Spare CKEs not copied with values from CKE(0:1) and /// CKE(4:5) 1 - Port A CKE(0:1) copied to Port A CKE(2:3), Port A CKE(4:5) copied /// to Port A CKE(6:7), Port B CKE(0:1) copied to Port B CKE(2:3) and Port B CKE(4:5) /// copied to Port B CKE(6:7) /// @tparam T the fapi2::TargetType - derived /// @tparam TT the ccsTraits associated with T - derived /// @param[in] i_target the target to effect /// @param[in,out] io_buffer the buffer representing the mode register /// @param[in] i_value mss::ON iff Copy CKE signals to CKE Spare on both ports /// @note no-op for p9n /// template< fapi2::TargetType T, typename TT = ccsTraits > void copy_cke_to_spare_cke( const fapi2::Target&, fapi2::buffer& io_buffer, const states i_value); /// /// @brief Read the modeq register appropriate for this target /// @tparam T the target type of the chiplet which executes the CCS instruction /// @tparam TT the CCS traits of the chiplet which executes the CCS instruction /// @param[in] i_target the target to effect /// @param[in,out] io_buffer the buffer representing the mode register /// @return FAPI2_RC_SUCCSS iff ok /// template< fapi2::TargetType T, typename TT = ccsTraits > inline fapi2::ReturnCode read_mode( const fapi2::Target& i_target, fapi2::buffer& io_buffer) { return mss::getScom(i_target, TT::MODEQ_REG, io_buffer); } /// /// @brief Write the modeq register appropriate for this target /// @tparam T the target type of the chiplet which executes the CCS instruction /// @tparam TT the CCS traits of the chiplet which executes the CCS instruction /// @param[in] i_target the target to effect /// @param[in] i_buffer the buffer representing the mode register /// @return FAPI2_RC_SUCCSS iff ok /// template< fapi2::TargetType T, typename TT = ccsTraits > inline fapi2::ReturnCode write_mode( const fapi2::Target& i_target, const fapi2::buffer& i_buffer) { return mss::putScom(i_target, TT::MODEQ_REG, i_buffer); } /// /// @brief config the NTTM /// @tparam T the target type of the chiplet which executes the CCS instruction /// @tparam TT the CCS traits of the chiplet which executes the CCS instruction /// @param[in] i_mcbist the target to operate /// @param[in] i_nttm_mode NTTM we need to turn on or off (i.e. ON, OFF) /// @return fapi2::ReturnCode fapi2::FAPI2_RC_SUCCESS if ok /// template< fapi2::TargetType T, typename TT = ccsTraits > inline fapi2::ReturnCode configure_nttm( const fapi2::Target& i_target, const mss::states i_nttm_mode) { fapi2::buffer l_data; FAPI_TRY(read_mode(i_target, l_data)); l_data.writeBit(i_nttm_mode); FAPI_TRY(write_mode(i_target, l_data)); fapi_try_exit: return fapi2::current_err; } /// /// @brief Execute a set of CCS instructions - multiple ports /// @tparam P the port type for this CCS engine /// @tparam MC the MC type on which to operate /// @param[in] i_program the vector of instructions /// @param[in] i_ports the vector of ports /// @return FAPI2_RC_SUCCSS iff ok /// template< fapi2::TargetType P, mss::mc_type MC> fapi2::ReturnCode cleanup_from_execute(const ccs::program& i_program, const std::vector< fapi2::Target

>& i_ports); /// /// @brief Start or stop the CCS engine /// @tparam T the target type of the chiplet which executes the CCS instruction /// @tparam TT the CCS traits of the chiplet which executes the CCS instruction /// @param[in] i_target The MCBIST containing the CCS engine /// @param[in] i_start_stop bool MSS_CCS_START for starting MSS_CCS_STOP otherwise /// @return FAPI2_RC_SUCCESS iff success /// template< fapi2::TargetType T, typename TT = ccsTraits > fapi2::ReturnCode start_stop( const fapi2::Target& i_target, const bool i_start_stop ) { fapi2::buffer l_buf; // Do we need to read this? We are setting the only bit defined in the scomdef? BRS FAPI_TRY(mss::getScom(i_target, TT::CNTLQ_REG, l_buf)); FAPI_TRY( mss::putScom(i_target, TT::CNTLQ_REG, i_start_stop ? l_buf.setBit() : l_buf.setBit()) ); fapi_try_exit: return fapi2::current_err; } /// /// @brief Determine the CCS failure type /// @tparam T the target type of the chiplet which executes the CCS instruction /// @tparam P the target of the CCS instruction (the port) /// @tparam TT the CCS traits of the chiplet which executes the CCS instruction /// @param[in] i_target MC target /// @param[in] i_type the failure type /// @param[in] i_port The port the CCS instruction is training /// @return ReturnCode associated with the fail. /// @note FFDC is handled here, caller doesn't need to do it /// template< fapi2::TargetType T = DEFAULT_MC_TARGET, fapi2::TargetType P = DEFAULT_MEM_PORT_TARGET, typename TT = ccsTraits > fapi2::ReturnCode fail_type( const fapi2::Target& i_target, const uint64_t i_type, const fapi2::Target

& i_port ); /// /// @brief Execute a CCS array already loaded in to the engine /// @tparam T the target type of the chiplet which executes the CCS instruction /// @tparam P the target of the CCS instruction (the port) /// @tparam TT the CCS traits of the chiplet which executes the CCS instruction /// @param[in] i_target the target to effect /// @param[in] i_program the MCBIST ccs program - to get the polling parameters /// @param[in] i_port the port associated with the MCBIST array /// @return FAPI2_RC_SUCCSS iff ok /// template< fapi2::TargetType T, fapi2::TargetType P, typename TT = ccsTraits > fapi2::ReturnCode execute_inst_array(const fapi2::Target& i_target, ccs::program& i_program, const fapi2::Target

& i_port) { fapi2::buffer status; FAPI_TRY(start_stop(i_target, mss::START), "%s Error in execute_inst_array", mss::c_str(i_port) ); mss::poll(i_target, TT::STATQ_REG, i_program.iv_poll, [&status](const size_t poll_remaining, const fapi2::buffer& stat_reg) -> bool { FAPI_DBG("ccs statq 0x%016lx, remaining: %d", stat_reg, poll_remaining); status = stat_reg; return status.getBit() != 1; }, i_program.iv_probes); // Check for done and success. DONE being the only bit set. if (status == TT::STAT_QUERY_SUCCESS) { FAPI_INF("%s CCS Executed Successfully.", mss::c_str(i_port) ); goto fapi_try_exit; } // So we failed or we're still in progress. Mask off the fail bits // and run this through the FFDC generator. FAPI_TRY(fail_type(i_target, status & TT::STAT_ERR_MASK, i_port), "Error in execute_inst_array" ); fapi_try_exit: return fapi2::current_err; } /// /// @brief Updates the initial delays based upon the total delays passed in /// @tparam fapi2::TargetType T the type of the target running CCS /// @tparam MC the memory controller type running CCS /// @param[in] i_target the target type on which to operate /// @param[in] i_delay the calculated delays from CCS /// @param[in,out] io_program the program for which to update the delays /// @return FAPI2_RC_SUCCSS iff ok /// template< fapi2::TargetType T, mss::mc_type MC = DEFAULT_MC_TYPE > fapi2::ReturnCode update_initial_delays( const fapi2::Target& i_target, const uint64_t i_delay, ccs::program& io_program); /// /// @brief Execute a set of CCS instructions - multiple ports /// @tparam T the target type of the chiplet which executes the CCS instruction /// @tparam P the port type for this CCS engine /// @tparam TT the CCS traits of the chiplet which executes the CCS instruction /// @param[in] i_target the target to effect /// @param[in] i_program the vector of instructions /// @param[in] i_ports the vector of ports /// @return FAPI2_RC_SUCCSS iff ok /// template< fapi2::TargetType T, fapi2::TargetType P, typename TT = ccsTraits > fapi2::ReturnCode execute( const fapi2::Target& i_target, ccs::program& i_program, const std::vector< fapi2::Target

>& i_ports) { // Subtract one for the idle we insert at the end constexpr size_t CCS_INSTRUCTION_DEPTH = TT::CCS_ARRAY_LEN - 1; constexpr uint64_t CCS_ARR0_ZERO = TT::CCS_ARR0_START; constexpr uint64_t CCS_ARR1_ZERO = TT::CCS_ARR1_START; ccs::instruction_t l_des = ccs::des_command(); FAPI_INF("loading ccs instructions (%d) for %s", i_program.iv_instructions.size(), mss::c_str(i_target)); auto l_inst_iter = i_program.iv_instructions.begin(); std::vector l_rank_configs; FAPI_TRY(get_rank_config(i_target, l_rank_configs)); // Stop the CCS engine just for giggles - it might be running ... FAPI_TRY( start_stop(i_target, mss::states::STOP), "Error in ccs::execute" ); FAPI_ASSERT( mss::poll(i_target, TT::STATQ_REG, poll_parameters(), [](const size_t poll_remaining, const fapi2::buffer& stat_reg) -> bool { FAPI_INF("ccs statq (stop) 0x%llx, remaining: %d", stat_reg, poll_remaining); return stat_reg.getBit() != 1; }), TT::setup_trying_to_stop_err(i_target) ); while (l_inst_iter != i_program.iv_instructions.end()) { // Kick off the CCS engine - per port. No broadcast mode for CCS (per Shelton 9/23/15) for (const auto& p : i_ports) { const auto l_port_index = mss::relative_pos(p); size_t l_inst_count = 0; uint64_t l_total_delay = 0; uint64_t l_delay = 0; uint64_t l_repeat = 0; uint8_t l_current_cke = 0; // Shove the instructions into the CCS engine, in 32 instruction chunks, and execute them for (; l_inst_iter != i_program.iv_instructions.end() && l_inst_count < CCS_INSTRUCTION_DEPTH; ++l_inst_count, ++l_inst_iter) { // First, update the current instruction's chip selects for the current port FAPI_TRY(l_inst_iter->configure_rank(p, l_rank_configs[l_port_index]), "Error in rank config"); l_inst_iter->arr0.extractToRight(l_current_cke); // Make sure this instruction leads to the next. Notice this limits this mechanism to pretty // simple (straight line) CCS programs. Anything with a loop or such will need another mechanism. l_inst_iter->arr1.insertFromRight(l_inst_count + 1); FAPI_TRY( mss::putScom(i_target, CCS_ARR0_ZERO + l_inst_count, l_inst_iter->arr0), "Error in ccs::execute" ); FAPI_TRY( mss::putScom(i_target, CCS_ARR1_ZERO + l_inst_count, l_inst_iter->arr1), "Error in ccs::execute" ); // arr1 contains a specification of the delay and repeat after this instruction, as well // as a repeat. Total up the delays as we go so we know how long to wait before polling // the CCS engine for completion l_inst_iter->arr1.extractToRight(l_delay); l_inst_iter->arr1.extractToRight(l_repeat); l_total_delay += l_delay * (l_repeat + 1); FAPI_INF("css inst %d: 0x%016lX 0x%016lX (0x%lx, 0x%lx) delay: 0x%x (0x%x) %s", l_inst_count, l_inst_iter->arr0, l_inst_iter->arr1, CCS_ARR0_ZERO + l_inst_count, CCS_ARR1_ZERO + l_inst_count, l_delay, l_total_delay, mss::c_str(i_target)); } // Updates the initial delays FAPI_TRY(update_initial_delays(i_target, l_total_delay, i_program), "Error in ccs::execute"); FAPI_INF("executing ccs instructions (%d:%d, %d) for %s", i_program.iv_instructions.size(), l_inst_count, i_program.iv_poll.iv_initial_delay, mss::c_str(i_target)); // Deselect l_des.arr0.insertFromRight(l_current_cke); // Insert a DES as our last instruction. DES is idle state anyway and having this // here as an instruction forces the CCS engine to wait the delay specified in // the last instruction in this array (which it otherwise doesn't do.) l_des.arr1.setBit(); FAPI_TRY( mss::putScom(i_target, CCS_ARR0_ZERO + l_inst_count, l_des.arr0), "Error in ccs::execute" ); FAPI_TRY( mss::putScom(i_target, CCS_ARR1_ZERO + l_inst_count, l_des.arr1), "Error in ccs::execute" ); FAPI_INF("css inst %d fixup: 0x%016lX 0x%016lX (0x%lx, 0x%lx) %s", l_inst_count, l_des.arr0, l_des.arr1, CCS_ARR0_ZERO + l_inst_count, CCS_ARR1_ZERO + l_inst_count, mss::c_str(i_target)); FAPI_INF("executing CCS array for port %d (%s)", l_port_index, mss::c_str(p)); FAPI_TRY( select_ports( i_target, l_port_index), "Error in ccs execute" ); FAPI_TRY( execute_inst_array(i_target, i_program, p), "Error in ccs execute" ); } } // Cleans up after executing the CCS program (runs workarounds if needed) FAPI_TRY((cleanup_from_execute(i_program, i_ports))); fapi_try_exit: i_program.iv_instructions.clear(); return fapi2::current_err; } /// /// @brief Execute a set of CCS instructions - single port /// @tparam T the target type of the chiplet which executes the CCS instruction /// @tparam P the target of the CCS instruction (the port) /// @tparam TT the CCS traits of the chiplet which executes the CCS instruction /// @param[in] i_target the target to effect /// @param[in] i_program the vector of instructions /// @param[in] i_port The target that's being programmed by the array /// @return FAPI2_RC_SUCCSS iff ok /// template< fapi2::TargetType T, fapi2::TargetType P, typename TT = ccsTraits > fapi2::ReturnCode execute( const fapi2::Target& i_target, ccs::program& i_program, const fapi2::Target

& i_port) { // Mmm. Might want to find a better way to do this - seems expensive. BRS std::vector< fapi2::Target

> l_ports{ i_port }; return execute(i_target, i_program, l_ports); } /// /// @brief Query the status of the CCS engine /// @tparam T the target type of the chiplet which executes the CCS instruction /// @tparam TT the CCS traits of the chiplet which executes the CCS instruction /// @param[in] i_target The MCBIST containing the CCS engine /// @param[out] io_status The query result first being the result, second the type /// @return FAPI2_RC_SUCCESS iff success /// template< fapi2::TargetType T, typename TT = ccsTraits > fapi2::ReturnCode status_query( const fapi2::Target& i_target, std::pair& io_status ); } // ends namespace ccs } // ends namespace mss #endif