diff options
Diffstat (limited to 'src/usr/mmio/mmio_explorer.C')
-rw-r--r-- | src/usr/mmio/mmio_explorer.C | 474 |
1 files changed, 474 insertions, 0 deletions
diff --git a/src/usr/mmio/mmio_explorer.C b/src/usr/mmio/mmio_explorer.C new file mode 100644 index 000000000..9565c8341 --- /dev/null +++ b/src/usr/mmio/mmio_explorer.C @@ -0,0 +1,474 @@ +/* IBM_PROLOG_BEGIN_TAG */ +/* This is an automatically generated prolog. */ +/* */ +/* $Source: src/usr/mmio/mmio_explorer.C $ */ +/* */ +/* OpenPOWER HostBoot Project */ +/* */ +/* Contributors Listed Below - COPYRIGHT 2019 */ +/* [+] International Business Machines Corp. */ +/* */ +/* */ +/* Licensed under the Apache License, Version 2.0 (the "License"); */ +/* you may not use this file except in compliance with the License. */ +/* You may obtain a copy of the License at */ +/* */ +/* http://www.apache.org/licenses/LICENSE-2.0 */ +/* */ +/* Unless required by applicable law or agreed to in writing, software */ +/* distributed under the License is distributed on an "AS IS" BASIS, */ +/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */ +/* implied. See the License for the specific language governing */ +/* permissions and limitations under the License. */ +/* */ +/* IBM_PROLOG_END_TAG */ +#include <devicefw/driverif.H> +#include <errl/errlentry.H> +#include <errl/errlmanager.H> +#include <errl/errludtarget.H> +#include <errl/errludlogregister.H> +#include <explorer_scom_addresses.H> +#include <lib/inband/exp_inband.H> +#include <mmio/mmio_reasoncodes.H> + +// Trace definition +extern trace_desc_t* g_trac_mmio; + +using namespace TARGETING; + +namespace MMIOEXP +{ + +#define MMIOEXP_SCOM2OFFSET(_SCOM_ADDR) \ + (mss::exp::ib::EXPLR_IB_MMIO_OFFSET | (_SCOM_ADDR << 3)) + +/** + * @brief Possible Open CAPI response codes for config operations + */ +enum +{ + OCAPI_RETRY_REQUEST = 0x2, + OCAPI_DATA_ERROR = 0x8, + OCAPI_UNSUPPORTED_OP_LENGTH = 0x9, + OCAPI_BAD_ADDRESS = 0xB, + OCAPI_FAILED = 0xE, +}; + +/** + * @brief Possible PCB error codes for non-config operations + */ +enum +{ + PCB_OK = 0x0, + PCB_INVALID_ADDRESS = 0x4, + PCB_PARITY_ERROR = 0x6, + PCB_TIMEOUT = 0x7, +}; + +/** + * @brief bit-field definitions for MCFGERR register + */ +typedef union mcfgerrReg +{ + struct + { + uint64_t reserved :16; + uint64_t resp_code :4; + uint64_t bdi :1; + uint64_t error_type :3; + uint64_t device :5; + uint64_t function :3; + uint64_t dev_func_mismatch :1; + uint64_t detect_bad_op :1; + uint64_t tbit_is_1 :1; + uint64_t data_is_bad :1; + uint64_t pl_is_invalid :1; + uint64_t bad_op_or_align :1; + uint64_t addr_no_implemented:1; + uint64_t rdata_vld :1; + uint64_t tbit :1; + uint64_t plen :3; + uint64_t portnun :2; + uint64_t dl :2; + uint64_t capptag :16; + }; + uint64_t word64; +}mcfgerrReg_t; + +/** + * @brief bit-field definitions for GIF2PCB_ERROR register + */ +typedef union gif2pcbErrorReg +{ + struct + { + uint64_t parity_error_rsp_info :1; + uint64_t parity_error_rsp_data_0 :1; + uint64_t parity_error_rsp_data_1 :1; + uint64_t parity_error_rsp_data_2 :1; + uint64_t parity_error_rsp_data_3 :1; + uint64_t timeout_error :1; + uint64_t int_addr_access_error :1; + uint64_t invalid_access :1; + uint64_t pcb_err_code :3; + uint64_t axi_read_addr_parity_error :1; + uint64_t axi_write_addr_parity_error :1; + uint64_t axi_write_data_parity_error_31_24 :1; + uint64_t axi_write_data_parity_error_23_16 :1; + uint64_t axi_write_data_parity_error_15_8 :1; + uint64_t axi_write_data_parity_error_7_0 :1; + uint64_t pib2gif_parity_error :1; + uint64_t reserved :46; + }; + struct + { + uint64_t used_bits :18; + uint64_t unused_bits :46; + }; + uint64_t word64; +}gif2pcbErrorReg_t; + +/** + * @brief bit-field definitions for PIB2GIF_ERROR register + */ +typedef union pib2gifErrorReg +{ + struct + { + uint64_t parity_error_req_data_0:1; + uint64_t parity_error_req_data_1:1; + uint64_t parity_error_req_data_2:1; + uint64_t parity_error_req_data_3:1; + uint64_t parity_error_req_addr_0:1; + uint64_t parity_error_req_addr_1:1; + uint64_t parity_error_req_ctrl:1; + uint64_t timeout_error:1; + uint64_t int_addr_access_error:1; + uint64_t parity_error_on_fsm:1; + uint64_t parity_error_on_reg0:1; + uint64_t parity_error_on_reg1:1; + uint64_t parity_error_on_reg2:1; + uint64_t parity_error_on_reg3:1; + uint64_t parity_error_on_reg4:1; + uint64_t parity_error_on_reg5:1; + uint64_t invalid_address_error:1; + uint64_t reserved1:15; + uint64_t gif2pcb_error:18; + uint64_t reserved2:14; + }; + uint64_t word64; +}pib2gifErrorReg_t; + +// Explorer MMIO addresses only have 35 bits +constexpr uint64_t MASK_35BITS = 0x7FFFFFFFFull; + + +/******************************************************************************* + * + * See header file for comments + */ +errlHndl_t checkExpError(const TargetHandle_t i_expTarget, + const uint64_t i_va, + const uint64_t i_accessLimit, + const uint64_t i_offset, + DeviceFW::OperationType i_opType, + bool& o_errorAddressMatches, + bool& o_errorAddressIsZero) +{ + errlHndl_t l_err = nullptr; + uint64_t l_errAddr = 0; + bool l_errorAddressMatches = false; + bool l_errorAddressIsZero = false; + const char* l_regStr = nullptr; + + // NOTE: mmio_memcpy could be doing multiple transactions. This means + // we need to test the explorer error address register against a + // range of values instead of a single value. + // NOTE: Explorer only uses the low 35 bits of the address for MMIO access + const uint64_t l_mmioAddr35Lo = i_va & MASK_35BITS; + const uint64_t l_mmioAddr35Hi = (i_va + i_accessLimit) & MASK_35BITS; + + do + { + // For access to CONFIG space, the MCFGERRA scom register + // contains the first failing address. + if(i_offset < mss::exp::ib::EXPLR_IB_MMIO_OFFSET) + { + auto l_reqSize = sizeof(l_errAddr); + l_err = DeviceFW::deviceRead( + i_expTarget, + &l_errAddr, + l_reqSize, + DEVICE_SCOM_ADDRESS(EXPLR_MMIO_MCFGERRA)); + if(l_err) + { + TRACFCOMP(g_trac_mmio, ERR_MRK + "checkExpError: getscom(MCFGERRA) failed." + " huid[0x%08x]", get_huid(i_expTarget)); + break; + } + l_regStr = "MCFGERRA"; + } + // Otherwise, we are accessing a non-config address and, if there + // is a failure, the MMIO address will show up in the lower 35 bits of + // the MMIOERR register + else + { + // If the transaction was a read to this error register then + // we already know that it failed. Don't keep trying to + // read it or we could end up in a recursive loop. + if((i_opType == DeviceFW::READ) && + (i_offset == MMIOEXP_SCOM2OFFSET(EXPLR_MMIO_MMIOERR))) + { + break; + } + auto l_reqSize = sizeof(l_errAddr); + l_err = DeviceFW::deviceRead( + i_expTarget, + &l_errAddr, + l_reqSize, + DEVICE_SCOM_ADDRESS(EXPLR_MMIO_MMIOERR)); + if(l_err) + { + TRACFCOMP(g_trac_mmio, ERR_MRK + "checkExpError: getscom(MMIOERR) failed." + " huid[0x%08x]", get_huid(i_expTarget)); + break; + } + l_regStr = "MMIOERR"; + } + + // Check if error address from explorer is zero, meaning that + // explorer did not detect an error. + if(l_errAddr == 0) + { + l_errorAddressIsZero = true; + } + + // Check if 35-bit error address is outside our transaction + // access range + const uint64_t l_errAddr35 = l_errAddr & MASK_35BITS; + if((l_errAddr35 < l_mmioAddr35Lo) || + (l_errAddr35 >= l_mmioAddr35Hi)) + { + TRACDCOMP(g_trac_mmio, + "checkExpError: %s: 0x%09llx is not between 0x%09llx and" + " 0x%09llx on huid[0x%08x]", + l_regStr, l_errAddr, l_mmioAddr35Lo, + l_mmioAddr35Hi, get_huid(i_expTarget)); + // Error address is outside our transaction range so this error + // was not caused by our transaction. + break; + } + + TRACFCOMP(g_trac_mmio, ERR_MRK + "checkExpError: %s: 0x%09llx is between 0x%09llx and" + " 0x%09llx on huid[0x%08x]", + l_regStr, l_errAddr35, l_mmioAddr35Lo, + l_mmioAddr35Hi, get_huid(i_expTarget)); + l_errorAddressMatches = true; + + // NOTE: These registers cannot be cleared without resetting the chip. + + }while(0); + + o_errorAddressMatches = l_errorAddressMatches; + o_errorAddressIsZero = l_errorAddressIsZero; + return l_err; +} + +/******************************************************************************* + * + * See header file for comments + */ +errlHndl_t determineExpCallouts(const TargetHandle_t i_expTarget, + const uint64_t i_offset, + DeviceFW::OperationType i_opType, + errlHndl_t i_err, + bool& o_fwFailure) +{ + bool l_fwFailure = false; //default to a hw failure + errlHndl_t l_err = nullptr; + size_t l_reqSize = 0; + ERRORLOG::ErrlUserDetailsLogRegister l_regDump(i_expTarget); + + do + { + // If the transaction was a read to any of these error registers, + // that we're about to read then we know that it already failed. + // Don't keep trying to read it or we could end up in a recursive + // loop. + if(i_opType == DeviceFW::READ) + { + switch(i_offset) + { + case MMIOEXP_SCOM2OFFSET(EXPLR_MMIO_MCFGERR): + case MMIOEXP_SCOM2OFFSET(EXPLR_MMIO_MCFGERRA): + case MMIOEXP_SCOM2OFFSET(EXPLR_MMIO_MMIOERR): + case MMIOEXP_SCOM2OFFSET(EXPLR_TP_MB_UNIT_TOP_PIB2GIF_ERROR_REG): + case MMIOEXP_SCOM2OFFSET(EXPLR_TP_MB_UNIT_TOP_GIF2PCB_ERROR_REG): + case MMIOEXP_SCOM2OFFSET(EXPLR_MMIO_MFIR): + case MMIOEXP_SCOM2OFFSET(EXPLR_MMIO_MFIRWOF): + TRACFCOMP(g_trac_mmio, + "determineExpCallouts: recursive loop detected:" + " OCMB[0x%08x] offset[0x%016llx]", + TARGETING::get_huid(i_expTarget), i_offset); + /*@ + * @errortype + * @moduleid MMIO::MOD_DETERMINE_EXP_CALLOUTS + * @reasoncode MMIO::RC_BAD_MMIO_READ + * @userdata1 OCMB huid + * @userdata2 Address offset + * @devdesc OCMB MMIO read failed + * @custdesc Unexpected memory subsystem firmware + * error. + */ + l_err = new ERRORLOG::ErrlEntry( + ERRORLOG::ERRL_SEV_UNRECOVERABLE, + MMIO::MOD_DETERMINE_EXP_CALLOUTS, + MMIO::RC_BAD_MMIO_READ, + TARGETING::get_huid(i_expTarget), + i_offset, + ERRORLOG::ErrlEntry::NO_SW_CALLOUT); + break; + + default: + break; + } + if(l_err) + { + break; + } + } + + // Check if this is an access to config space + if(i_offset < mss::exp::ib::EXPLR_IB_MMIO_OFFSET) + { + mcfgerrReg_t l_reg; + + TRACFCOMP(g_trac_mmio, + "determineExpCallouts: getting callouts for failed config" + " space transaction on OCMB[0x%08x]", get_huid(i_expTarget)); + + // Read the Explorer MCFGERR register + // NOTE: This register is not clearable + l_reqSize = sizeof(l_reg.word64); + l_err = DeviceFW::deviceRead( + i_expTarget, + &l_reg.word64, + l_reqSize, + DEVICE_SCOM_ADDRESS(EXPLR_MMIO_MCFGERR)); + if(l_err) + { + TRACFCOMP(g_trac_mmio, ERR_MRK + "determineExpCallouts: getscom(MCFGERR) failed" + " on OCMB[0x%08x]", get_huid(i_expTarget)); + break; + } + + TRACFCOMP(g_trac_mmio, + "determineExpCallouts: MCFGERR: 0x%016llx on" + " OCMB[0x%08x]", l_reg.word64, get_huid(i_expTarget)); + + // Extract the OCAPI response code from the register + switch(l_reg.resp_code) + { + // Firmware Errors + case OCAPI_UNSUPPORTED_OP_LENGTH: + case OCAPI_BAD_ADDRESS: + l_fwFailure = true; + break; + + // This one could be caused by a bad address (FW) if there is + // a device/function mismatch. Otherwise, it's bad HW. + case OCAPI_FAILED: + if(l_reg.dev_func_mismatch) + { + l_fwFailure = true; + break; + } + break; + + // Everything else is HW failure + default: + break; + } + + // Dump some regs specific to config failures + l_regDump.addDataBuffer(&l_reg.word64, sizeof(l_reg.word64), + DEVICE_SCOM_ADDRESS(EXPLR_MMIO_MCFGERR)); + l_regDump.addData(DEVICE_SCOM_ADDRESS(EXPLR_MMIO_MCFGERRA)); + break; + } + + // We were accessing a SCOM reg, MSCC reg, or SRAM + + pib2gifErrorReg_t l_pib2gif; + gif2pcbErrorReg_t l_gif2pcb; + + TRACFCOMP(g_trac_mmio, + "determineExpCallouts: getting callouts for failed MMIO space" + " transaction on OCMB[0x%08x]", get_huid(i_expTarget)); + + // Read the PIB2GIF error reg + // NOTE: This register is ONLY accessible through MMIO path, not I2C. + l_reqSize = sizeof(l_pib2gif.word64); + l_err = DeviceFW::deviceRead( + i_expTarget, + &l_pib2gif.word64, + l_reqSize, + DEVICE_SCOM_ADDRESS(EXPLR_TP_MB_UNIT_TOP_PIB2GIF_ERROR_REG)); + if(l_err) + { + TRACFCOMP(g_trac_mmio, ERR_MRK + "determineExpCallouts: getscom(PIB2GIF_ERROR_REG) failed" + " on OCMB[0x%08x]", get_huid(i_expTarget)); + break; + } + + TRACFCOMP(g_trac_mmio, + "determineExpCallouts: PIB2GIF_ERROR_REG: 0x%016llx" + " on OCMB[0x%08x]", l_pib2gif.word64, get_huid(i_expTarget)); + + // The pib2gif error register contains a copy of the gif2pcb error reg. + // No need to read it again, just copy it into our struct. + l_gif2pcb.word64 = 0; + l_gif2pcb.used_bits = l_pib2gif.gif2pcb_error; + + TRACFCOMP(g_trac_mmio, + "determineExpCallouts: GIF2PCB_ERROR_REG: 0x%016llx" + " on OCMB[0x%08x]", l_gif2pcb.word64, get_huid(i_expTarget)); + + // Check for software errors + if((l_pib2gif.invalid_address_error) || + (l_gif2pcb.invalid_access) || + (l_gif2pcb.pcb_err_code == PCB_INVALID_ADDRESS)) + { + l_fwFailure = true; + } + + // dump some regs specific to MMIO failures + l_regDump.addDataBuffer(&l_pib2gif.word64, sizeof(l_pib2gif.word64), + DEVICE_SCOM_ADDRESS(EXPLR_TP_MB_UNIT_TOP_PIB2GIF_ERROR_REG)); + l_regDump.addData( + DEVICE_SCOM_ADDRESS(EXPLR_TP_MB_UNIT_TOP_GIF2PCB_ERROR_REG)); + l_regDump.addData(DEVICE_SCOM_ADDRESS(EXPLR_MMIO_MMIOERR)); + break; + }while(0); + + if(!l_err) + { + // Dump some registers common to both types of transaction types + l_regDump.addData(DEVICE_SCOM_ADDRESS(EXPLR_MMIO_MFIR)); + l_regDump.addData(DEVICE_SCOM_ADDRESS(EXPLR_MMIO_MFIRWOF)); + + // Add our register dump to the error log. + l_regDump.addToLog(i_err); + } + + // Notify caller of HW or FW failure + o_fwFailure = l_fwFailure; + return l_err; +} + +}; // End MMIOEXP namespace |