diff options
-rw-r--r-- | src/include/usr/mmio/mmio_reasoncodes.H | 7 | ||||
-rw-r--r-- | src/makefile | 1 | ||||
-rw-r--r-- | src/usr/mmio/makefile | 11 | ||||
-rw-r--r-- | src/usr/mmio/mmio.C | 1157 | ||||
-rw-r--r-- | src/usr/mmio/mmio_explorer.C | 474 | ||||
-rw-r--r-- | src/usr/mmio/mmio_explorer.H | 90 | ||||
-rw-r--r-- | src/usr/mmio/test/makefile | 6 | ||||
-rw-r--r-- | src/usr/mmio/test/mmiotest.H | 64 |
8 files changed, 1460 insertions, 350 deletions
diff --git a/src/include/usr/mmio/mmio_reasoncodes.H b/src/include/usr/mmio/mmio_reasoncodes.H index 86ff60b5b..6a96547ac 100644 --- a/src/include/usr/mmio/mmio_reasoncodes.H +++ b/src/include/usr/mmio/mmio_reasoncodes.H @@ -36,6 +36,11 @@ namespace MMIO MOD_MMIO_PERFORM_OP = 0x02, MOD_MMIO_GET_PROC_SCOM = 0x03, MOD_MMIO_SET_PROC_SCOM = 0x04, + MOD_VALIDATE_OCMB_MMIO_OP = 0x05, + MOD_MMIO_CHAN_CHECKSTOP = 0x06, + MOD_CHECK_OCMB_ERROR = 0x07, + MOD_DETERMINE_CALLOUTS = 0x08, + MOD_DETERMINE_EXP_CALLOUTS = 0x09, }; enum MMIOReasonCode @@ -52,6 +57,8 @@ namespace MMIO RC_BAD_MMIO_WRITE = MMIO_COMP_ID | 0x09, RC_PROC_NOT_FOUND = MMIO_COMP_ID | 0x0A, RC_BAR_OFFSET_MISMATCH = MMIO_COMP_ID | 0x0B, + RC_MMIO_CHAN_CHECKSTOP = MMIO_COMP_ID | 0x0C, + RC_UNSUPPORTED_CHIPID = MMIO_COMP_ID | 0x0D, }; }; diff --git a/src/makefile b/src/makefile index 48f4b30be..fb09a08bc 100644 --- a/src/makefile +++ b/src/makefile @@ -266,6 +266,7 @@ TESTCASE_MODULES += $(if $(CONFIG_EARLY_TESTCASES) && $(FSP_BUILD) ,,testnvram) TESTCASE_MODULES += $(if $(CONFIG_AXONE_BRING_UP),,testsmf) TESTCASE_MODULES += testexpaccess TESTCASE_MODULES += testexpupd +TESTCASE_MODULES += testmmio #****************************************************************** #KNOWN ISSUES (I might let these run but there is something wrong) diff --git a/src/usr/mmio/makefile b/src/usr/mmio/makefile index cea686f85..0787dbe50 100644 --- a/src/usr/mmio/makefile +++ b/src/usr/mmio/makefile @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2013,2018 +# Contributors Listed Below - COPYRIGHT 2013,2019 # [+] International Business Machines Corp. # # @@ -25,12 +25,21 @@ ROOTPATH = ../../.. MODULE = mmio +SUBDIRS += test.d + EXTRAINCDIR += ${ROOTPATH}/src/import/chips/p9/common/include/ +EXTRAINCDIR += ${ROOTPATH}/src/import/chips/p9/procedures/hwp/ffdc/ +EXTRAINCDIR += ${ROOTPATH}/src/import/chips/common/ +EXTRAINCDIR += ${ROOTPATH}/src/import/ +EXTRAINCDIR += ${ROOTPATH}/src/import/chips/common/utils/imageProcs/ +EXTRAINCDIR += ${ROOTPATH}/src/import/chips/ocmb/explorer/common/include/ +EXTRAINCDIR += ${ROOTPATH}/src/import/chips/ocmb/explorer/procedures/hwp/memory/ EXTRAINCDIR += ${ROOTPATH}/src/import/hwpf/fapi2/include/ EXTRAINCDIR += ${ROOTPATH}/src/include/usr/fapi2/ #include unique object modules OBJS += mmio.o +OBJS += mmio_explorer.o VPATH += .. include $(ROOTPATH)/config.mk diff --git a/src/usr/mmio/mmio.C b/src/usr/mmio/mmio.C index f6fc42b3d..fffdacbc6 100644 --- a/src/usr/mmio/mmio.C +++ b/src/usr/mmio/mmio.C @@ -27,6 +27,7 @@ #include <errl/errlentry.H> #include <errl/errlmanager.H> #include <errl/errludtarget.H> +#include <errl/errludlogregister.H> #include <targeting/common/predicates/predicates.H> #include <targeting/common/utilFilter.H> #include <targeting/common/targetservice.H> @@ -41,30 +42,48 @@ #include <p9a_mc_scom_addresses_fld.H> #include <error_info_defs.H> +#include "mmio_explorer.H" +#include <utils/chipids.H> + // Trace definition trace_desc_t* g_trac_mmio = NULL; TRAC_INIT(&g_trac_mmio, MMIO_COMP_NAME, 2*KILOBYTE, TRACE::BUFFER_SLOW); #define OMI_PER_MC 8 +using namespace TARGETING; + namespace MMIO { +// TODO RTC 201493 - Remove these consts once HW group has defined them. +static const uint8_t P9A_MC_DSTLFIR_SUBCHANNEL_A_FAIL_ACTION = 20; +static const uint8_t P9A_MC_DSTLFIR_SUBCHANNEL_B_FAIL_ACTION = 21; + // Helper function declarations (definitions at the bottom of this file) static -TARGETING::TargetHandle_t getParentProc(TARGETING::TargetHandle_t i_target); +TargetHandle_t getParentProc(TargetHandle_t i_ocmbTarget); static -errlHndl_t getProcScom(TARGETING::TargetHandle_t i_target, +errlHndl_t getProcScom(TargetHandle_t i_ocmbTarget, uint64_t i_scomAddr, uint64_t &o_scomData); -static -errlHndl_t setProcScom(TARGETING::TargetHandle_t i_target, + +// NOTE: removed static qualifier to prevent compiler from complaining about +// the function not being used. +errlHndl_t setProcScom(TargetHandle_t i_ocmbTarget, uint64_t i_scomAddr, uint64_t i_scomData); static void *mmio_memcpy(void *vdest, const void *vsrc, size_t len); +/******************************************************************************* + * + * @brief Setup the MMIO BAR registers for all OCMB chips in the system + * + * @return nullptr on success, failure otherwise. + * + */ errlHndl_t mmioSetup() { errlHndl_t l_err = nullptr; @@ -77,17 +96,17 @@ errlHndl_t mmioSetup() // // loop through all the Memory Channels (MC Targets) // call allocate of 32 GB virtual memory space with mmio_dev_map() for each MC - TARGETING::TargetHandleList l_mcTargetList; - getAllChiplets(l_mcTargetList, TARGETING::TYPE_MC); + TargetHandleList l_mcTargetList; + getAllChiplets(l_mcTargetList, TYPE_MC); for (auto & l_mcTarget: l_mcTargetList) { uint32_t l_mcChipUnit = - l_mcTarget->getAttr<TARGETING::ATTR_CHIP_UNIT>(); + l_mcTarget->getAttr<ATTR_CHIP_UNIT>(); // Get the base BAR address for OpenCapi Memory Interfaces (OMIs) of this Memory Controller (MC) auto l_omiBaseAddr = - l_mcTarget->getAttr<TARGETING::ATTR_OMI_INBAND_BAR_BASE_ADDR_OFFSET>(); + l_mcTarget->getAttr<ATTR_OMI_INBAND_BAR_BASE_ADDR_OFFSET>(); // Apply the MMIO base offset so we get the real address uint64_t l_realAddr = ( l_omiBaseAddr | MMIO_BASE ); @@ -98,18 +117,18 @@ errlHndl_t mmioSetup() THIRTYTWO_GB)); TRACFCOMP ( g_trac_mmio, "MC%.02X (0x%.08X) MMIO BAR PHYSICAL ADDR = 0x%lX VIRTUAL ADDR = 0x%lX" , - l_mcChipUnit ? 0x23 : 0x01, TARGETING::get_huid(l_mcTarget), + l_mcChipUnit ? 0x23 : 0x01, get_huid(l_mcTarget), l_realAddr, l_virtAddr); // set VM_ADDR on each OCMB - TARGETING::TargetHandleList l_omiTargetList; - getChildChiplets(l_omiTargetList, l_mcTarget, TARGETING::TYPE_OMI); + TargetHandleList l_omiTargetList; + getChildChiplets(l_omiTargetList, l_mcTarget, TYPE_OMI); for (auto & l_omiTarget: l_omiTargetList) { // ATTR_CHIP_UNIT is relative to other OMI under this PROC uint32_t l_omiChipUnit = - l_omiTarget->getAttr<TARGETING::ATTR_CHIP_UNIT>(); + l_omiTarget->getAttr<ATTR_CHIP_UNIT>(); // Get the OMI position relative to other OMIs under its parent MC chiplet uint32_t l_omiPosRelativeToMc = l_omiChipUnit % OMI_PER_MC; @@ -136,8 +155,10 @@ errlHndl_t mmioSetup() // Calculated real address for this OMI is (BAR from MC attribute) + (currentOmiOffset) uint64_t l_calulatedRealAddr = l_omiBaseAddr + l_currentOmiOffset; - // Grab bar value from attribute to verify it matches our calculations - auto l_omiBarAttrVal = l_omiTarget->getAttr<TARGETING::ATTR_OMI_INBAND_BAR_BASE_ADDR_OFFSET>(); + // Grab bar value from attribute to verify it matches + // our calculations + auto l_omiBarAttrVal = l_omiTarget-> + getAttr<ATTR_OMI_INBAND_BAR_BASE_ADDR_OFFSET>(); if(l_omiBarAttrVal != l_calulatedRealAddr) { @@ -151,7 +172,7 @@ errlHndl_t mmioSetup() * @reasoncode MMIO::RC_BAR_OFFSET_MISMATCH * @userdata1 Calculated Bar Offset * @userdata2 Bar offset from attribute - * @devdesc mmioSetup> Mismatch between calculated map value + * @devdesc Mismatch between calculated map value * and what is in attribute xml * @custdesc Unexpected memory subsystem firmware error. */ @@ -172,16 +193,21 @@ errlHndl_t mmioSetup() uint64_t l_currentOmiVirtAddr = l_virtAddr + l_currentOmiOffset; // set VM_ADDR the associated OCMB - TARGETING::TargetHandleList l_ocmbTargetList; + TargetHandleList l_ocmbTargetList; getChildAffinityTargets(l_ocmbTargetList, l_omiTarget, - TARGETING::CLASS_CHIP, TARGETING::TYPE_OCMB_CHIP); + CLASS_CHIP, TYPE_OCMB_CHIP); assert(l_ocmbTargetList.size() == 1 , "OCMB chips list found for a given OMI != 1 as expected"); - TRACFCOMP(g_trac_mmio, "Setting HUID 0x%.08X MMIO vm addr to be 0x%lX , real address is 0x%lX", TARGETING::get_huid(l_ocmbTargetList[0]), - l_currentOmiVirtAddr, l_calulatedRealAddr | MMIO_BASE ); + TRACFCOMP(g_trac_mmio, + "Setting HUID 0x%.08X MMIO vm addr to be 0x%lX, real" + " address is 0x%lX", + get_huid(l_ocmbTargetList[0]), + l_currentOmiVirtAddr, + l_calulatedRealAddr | MMIO_BASE ); - l_ocmbTargetList[0]->setAttr<TARGETING::ATTR_MMIO_VM_ADDR>(l_currentOmiVirtAddr); + l_ocmbTargetList[0]-> + setAttr<ATTR_MMIO_VM_ADDR>(l_currentOmiVirtAddr); } } } while(0); @@ -194,42 +220,372 @@ errlHndl_t mmioSetup() // Direct OCMB reads and writes to the device's memory mapped memory. DEVICE_REGISTER_ROUTE(DeviceFW::WILDCARD, DeviceFW::MMIO, - TARGETING::TYPE_OCMB_CHIP, + TYPE_OCMB_CHIP, ocmbMmioPerformOp); -errlHndl_t ocmbMmioPerformOp(DeviceFW::OperationType i_opType, - TARGETING::TargetHandle_t i_target, - void* io_buffer, - size_t& io_buflen, - int64_t i_accessType, - va_list i_args) +/******************************************************************************* + * + * @brief Switch to using I2C instead of MMIO SCOMs for an OCMB + * + * @param[in] i_ocmbTarget Which OCMB to switch to using I2C + * + */ +void disableInbandScomsOcmb(const TargetHandle_t i_ocmbTarget) { - errlHndl_t l_err = nullptr; - uint64_t l_offset = va_arg(i_args, uint64_t); - uint64_t l_accessLimit = va_arg(i_args, uint64_t); + mutex_t* l_mutex = NULL; - TRACDCOMP(g_trac_mmio, ENTER_MRK"ocmbMmioPerformOp"); - TRACDCOMP(g_trac_mmio, INFO_MRK"op=%d, target=0x%.8X", - i_opType, TARGETING::get_huid(i_target)); - TRACDCOMP(g_trac_mmio, INFO_MRK"buffer=%p, length=%d, accessType=%ld", - io_buffer, io_buflen, i_accessType); - TRACDCOMP(g_trac_mmio, INFO_MRK"offset=0x%lX, accessLimit=%ld", - l_offset, l_accessLimit); + TRACFCOMP(g_trac_mmio, + "disableInbandScomsOcmb: switching to use I2C on OCMB 0x%08x", + get_huid(i_ocmbTarget)); - do + //don't mess with attributes without the mutex (just to be safe) + l_mutex = i_ocmbTarget->getHbMutexAttr<ATTR_IBSCOM_MUTEX>(); + mutex_lock(l_mutex); + + ScomSwitches l_switches = i_ocmbTarget->getAttr<ATTR_SCOM_SWITCHES>(); + l_switches.useInbandScom = 0; + l_switches.useI2cScom = 1; + + // Modify attribute + i_ocmbTarget->setAttr<ATTR_SCOM_SWITCHES>(l_switches); + mutex_unlock(l_mutex); +} + +/******************************************************************************* + * + * @brief Determine if we are on sub-channel A (OMI-0) or not. + * + * @param[in] Which OCMB target to query + * + * @return True if the OCMB target is on sub-channel A (OMI-0). False + * Otherwise. + * + */ +bool isSubChannelA(const TargetHandle_t i_ocmbTarget) +{ + const auto l_parentOMI = getImmediateParentByAffinity(i_ocmbTarget); + return (l_parentOMI->getAttr<ATTR_REL_POS>() == 0); +} + +/******************************************************************************* + * + * @brief Adds default callouts to error log for when further isolation + * cannot be performed. + * + * @param[in] Error log to add callouts to. + * @param[in] OCMB target to callout + * + */ +void addDefaultCallouts(errlHndl_t i_err, + const TargetHandle_t i_ocmbTarget) +{ + // Add OCMB as high priority + i_err->addHwCallout(i_ocmbTarget, + HWAS::SRCI_PRIORITY_HIGH, + HWAS::DECONFIG, + HWAS::GARD_NULL); + + // Add OMI bus + i_err->addHwCallout(getImmediateParentByAffinity(i_ocmbTarget), + HWAS::SRCI_PRIORITY_MED, + HWAS::DECONFIG, + HWAS::GARD_NULL); + + // Add code as low priority callout + i_err->addProcedureCallout(HWAS::EPUB_PRC_HB_CODE, + HWAS::SRCI_PRIORITY_LOW); +} + +/******************************************************************************* + * + * @brief Determine if the OCMB detected a failure on a specific MMIO + * transaction to the specified OCMB target. + * + * @param[in] i_ocmbTarget Handle for the target OCMB chip. + * @param[in] i_va Virtual address of the transaction to check + * @param[in] i_accessLimit The byte range of the transaction + * @param[in] i_offset The offset from the base address of the OCMB chip + * @param[in] i_opType The operation type (read or write) + * @param[out] o_errorAddressMatches Set to true if the OCMB chip detected a + * failure on our transaction. + * @param[out] o_errorAddressIsZero Set to true if no error has been detected + * yet. + * @return nullptr on succesful read of OCMB error status, non-null otherwise. + * + */ +errlHndl_t checkOcmbError(const TargetHandle_t i_ocmbTarget, + const uint64_t i_va, + const uint64_t i_accessLimit, + const uint64_t i_offset, + DeviceFW::OperationType i_opType, + bool& o_errorAddressMatches, + bool& o_errorAddressIsZero) +{ + errlHndl_t l_err = nullptr; + const auto l_ocmbChipId = i_ocmbTarget->getAttr<TARGETING::ATTR_CHIP_ID>(); + switch(l_ocmbChipId) { - uint64_t l_addr = i_target->getAttr<TARGETING::ATTR_MMIO_VM_ADDR>(); + case POWER_CHIPID::EXPLORER_16: + case POWER_CHIPID::GEMINI_16: + l_err = MMIOEXP::checkExpError(i_ocmbTarget, + i_va, + i_accessLimit, + i_offset, + i_opType, + o_errorAddressMatches, + o_errorAddressIsZero); + break; - TRACDCOMP(g_trac_mmio, INFO_MRK"MMIO Op l_addr=0x%lX ", l_addr); + default: + // Should never get here, but just in case... + TRACFCOMP(g_trac_mmio, ERR_MRK + "checkOcmbError: Unsupported chip ID[0x%08x] on OCMB[0x%08x]", + l_ocmbChipId, get_huid(i_ocmbTarget)); + /*@ + * @errortype + * @moduleid MMIO::MOD_CHECK_OCMB_ERROR + * @reasoncode MMIO::RC_UNSUPPORTED_CHIPID + * @userdata1 OCMB HUID + * @userdata2 OCMB chip ID + * @devdesc A MMIO operation was attempted + * on an unsupported OCMB chip. + * @custdesc Unexpected memory subsystem firmware error. + */ + l_err = new ERRORLOG::ErrlEntry( + ERRORLOG::ERRL_SEV_UNRECOVERABLE, + MMIO::MOD_CHECK_OCMB_ERROR, + MMIO::RC_UNSUPPORTED_CHIPID, + get_huid(i_ocmbTarget), + l_ocmbChipId, + ERRORLOG::ErrlEntry::ADD_SW_CALLOUT); + break; + } + return l_err; +} + +/******************************************************************************* + * + * @brief Collect additional failure data from the target OCMB chip and add + * appropriate FRU/Procedure callouts. + * + * @note Must call checkOcmbError to determine that a transaction failed before + * calling this function. + * + * @param[in] i_ocmbTarget Handle of OCMB to collect extra FFDC from + * @param[in] i_offset The offset of the transaction address + * on the OCMB chip. + * @param[in] i_opType The operation type (read or write) + * @param[in] i_err The error log for adding callouts/FFDC + * + */ +void determineCallouts(const TargetHandle_t i_ocmbTarget, + const uint64_t i_offset, + DeviceFW::OperationType i_opType, + errlHndl_t i_err) +{ + bool l_fwFailure = false; + errlHndl_t l_err = nullptr; + + const auto l_ocmbChipId = i_ocmbTarget->getAttr<TARGETING::ATTR_CHIP_ID>(); + switch(l_ocmbChipId) + { + case POWER_CHIPID::EXPLORER_16: + case POWER_CHIPID::GEMINI_16: + l_err = MMIOEXP::determineExpCallouts(i_ocmbTarget, + i_offset, + i_opType, + i_err, + l_fwFailure); + break; + default: + // Should never get here, but just in case... + TRACFCOMP(g_trac_mmio, ERR_MRK + "determineCallouts: Unsupported chip ID[0x%08x] on OCMB[0x%08x]", + l_ocmbChipId, get_huid(i_ocmbTarget)); + /*@ + * @errortype + * @moduleid MMIO::MOD_DETERMINE_CALLOUTS + * @reasoncode MMIO::RC_UNSUPPORTED_CHIPID + * @userdata1 OCMB HUID + * @userdata2 OCMB chip ID + * @devdesc A MMIO operation was attempted + * on an unsupported OCMB chip. + * @custdesc Unexpected memory subsystem firmware error. + */ + l_err = new ERRORLOG::ErrlEntry( + ERRORLOG::ERRL_SEV_UNRECOVERABLE, + MMIO::MOD_DETERMINE_CALLOUTS, + MMIO::RC_UNSUPPORTED_CHIPID, + get_huid(i_ocmbTarget), + l_ocmbChipId, + ERRORLOG::ErrlEntry::ADD_SW_CALLOUT); + break; + } + if(l_err) + { + TRACFCOMP(g_trac_mmio, + "determineCallouts: Couldn't isolate failure on" + " OCMB[0x%08x]", + get_huid(i_ocmbTarget)); + + // This error is secondary to the actual error. Log as informational + // and add default callouts + l_err->setSev(ERRORLOG::ERRL_SEV_INFORMATIONAL); + l_err->plid(i_err->plid()); + ERRORLOG::errlCommit(l_err, MMIO_COMP_ID); + addDefaultCallouts(i_err, i_ocmbTarget); + } + else + { + if(l_fwFailure) + { + TRACFCOMP(g_trac_mmio, + "determineCallouts: firmware error detected on" + " OCMB[0x%08x]", + get_huid(i_ocmbTarget)); + + // Add HB code as high priority callout + i_err->addProcedureCallout(HWAS::EPUB_PRC_HB_CODE, + HWAS::SRCI_PRIORITY_HIGH); + } + else + { + TRACFCOMP(g_trac_mmio, + "determineCallouts: hardware error detected on" + " OCMB[0x%08x]", + get_huid(i_ocmbTarget)); + + // Add OCMB as high priority callout + i_err->addHwCallout(i_ocmbTarget, + HWAS::SRCI_PRIORITY_HIGH, + HWAS::DECONFIG, + HWAS::GARD_NULL); + } + } +} + +/******************************************************************************* + * + * @brief Checks for a channel failure + * + * @param[in] i_ocmbTarget The OCMB to check for a channel failure + * @param[out] o_checkstopExists true if channel failed, false otherwise. + * + * @return nullptr if we were able to read the status. Non-nullptr if there + * was a SCOM failure in reading status. + */ +errlHndl_t checkChannelCheckstop(const TargetHandle_t i_ocmbTarget, + bool& o_checkstopExists) +{ + bool l_checkstopExists = false; + uint64_t l_scom_data = 0; + uint64_t l_scom_mask = 0; + + auto l_err = getProcScom(i_ocmbTarget, + P9A_MCC_DSTLFIR, + l_scom_data); + if (l_err) + { + TRACFCOMP(g_trac_mmio, ERR_MRK + "checkChannelCheckstop: getscom(P9A_MCC_DSTLFIR) failed" + " on OCMB[0x%08x]", get_huid(i_ocmbTarget)); + } + else + { + // Check for channel checkstop on our sub-channel + l_scom_mask = (isSubChannelA(i_ocmbTarget))? + (1ull << P9A_MC_DSTLFIR_SUBCHANNEL_A_FAIL_ACTION): + (1ull << P9A_MC_DSTLFIR_SUBCHANNEL_B_FAIL_ACTION); + if (l_scom_data & l_scom_mask) + { + // A channel checkstop has occurred. (our bus is down) + TRACFCOMP(g_trac_mmio, ERR_MRK + "checkChannelCheckstop: there was a channel checkstop on" + " OCMB[0x%08x], P9A_MCC_DSTLFIR=0x%llX", + get_huid(i_ocmbTarget), l_scom_data); + l_checkstopExists = true; + + } + } + o_checkstopExists = l_checkstopExists; + return l_err; +} + +/******************************************************************************* + * + * @brief Validates input parameters and state for an OCMB MMIO operation + * + * @param[in] i_opType Operation type, see DeviceFW::OperationType + * in driverif.H + * @param[in] i_ocmbTarget inband scom target + * @param[in] i_buffer pointer to read/write buffer + * @param[in] i_buflen size of i_buffer (in bytes) + * @param[in] i_addr The base virtual address of the the OCMB MMIO space + * @param[in] i_offset The offset of the config reg, scom reg, MSCC reg or + * SRAM to be accessed. + * @param[in/out] io_accessLimit The number of bytes to read/write per MMIO + * transaction. Will be set to i_buflen if + * io_accessLimit is zero. + * + * @return nullptr on success, failure otherwise. + */ +errlHndl_t validateOcmbMmioOp(DeviceFW::OperationType i_opType, + const TargetHandle_t i_ocmbTarget, + void* i_buffer, + size_t i_buflen, + const uint64_t i_addr, + const uint64_t i_offset, + uint64_t& io_accessLimit) +{ + errlHndl_t l_err = nullptr; + + do + { + // Check that this is a supported OCMB chip + const auto l_ocmbChipId = + i_ocmbTarget->getAttr<TARGETING::ATTR_CHIP_ID>(); + switch(l_ocmbChipId) + { + case POWER_CHIPID::EXPLORER_16: + case POWER_CHIPID::GEMINI_16: + break; + default: + TRACFCOMP(g_trac_mmio, ERR_MRK + "validateOcmbMmioOp: Unsupported chip ID[0x%08x] " + "on OCMB[0x%08x]", + l_ocmbChipId, get_huid(i_ocmbTarget)); + /*@ + * @errortype + * @moduleid MMIO::MOD_VALIDATE_OCMB_MMIO_OP + * @reasoncode MMIO::RC_UNSUPPORTED_CHIPID + * @userdata1 OCMB HUID + * @userdata2 OCMB chip ID + * @devdesc A MMIO operation was attempted + * on an unsupported OCMB chip. + * @custdesc Unexpected memory subsystem firmware error. + */ + l_err = new ERRORLOG::ErrlEntry( + ERRORLOG::ERRL_SEV_UNRECOVERABLE, + MMIO::MOD_VALIDATE_OCMB_MMIO_OP, + MMIO::RC_UNSUPPORTED_CHIPID, + get_huid(i_ocmbTarget), + l_ocmbChipId, + ERRORLOG::ErrlEntry::ADD_SW_CALLOUT); + break; + } + if(l_err) + { + break; + } - if (l_addr == 0) + if (i_addr == 0) { TRACFCOMP(g_trac_mmio, ERR_MRK - "ocmbMmioPerformOp: MMIO has not been initialized!"); + "validateOcmbMmioOp: MMIO has not been initialized!"); /*@ * @errortype - * @moduleid MMIO::MOD_MMIO_PERFORM_OP + * @moduleid MMIO::MOD_VALIDATE_OCMB_MMIO_OP * @reasoncode MMIO::RC_INVALID_SETUP * @userdata1[0:31] Target huid * @userdata1[32:63] Data Offset, if >= 4GB then subtract 2GB @@ -237,35 +593,35 @@ errlHndl_t ocmbMmioPerformOp(DeviceFW::OperationType i_opType, * @userdata2[0:0] Operation Type * @userdata2[28:31] Access Limit * @userdata2[32:63] Buffer Length - * @devdesc mmioPerformOp> A MMIO operation was attempted + * @devdesc A MMIO operation was attempted * before MMIO was initialized. * @custdesc Unexpected memory subsystem firmware error. */ l_err = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_UNRECOVERABLE, - MMIO::MOD_MMIO_PERFORM_OP, + MMIO::MOD_VALIDATE_OCMB_MMIO_OP, MMIO::RC_INVALID_SETUP, TWO_UINT32_TO_UINT64( - i_target->getAttr<TARGETING::ATTR_HUID>(), - (l_offset < (4 * GIGABYTE)) ? - (l_offset) : - (l_offset - (2 * GIGABYTE))), + get_huid(i_ocmbTarget), + (i_offset < (4 * GIGABYTE)) ? + (i_offset) : + (i_offset - (2 * GIGABYTE))), TWO_UINT32_TO_UINT64( - (i_opType << 31) | l_accessLimit, - io_buflen), + (i_opType << 31) | io_accessLimit, + i_buflen), ERRORLOG::ErrlEntry::ADD_SW_CALLOUT); break; } - if (io_buffer == nullptr) + if (i_buffer == nullptr) { TRACFCOMP(g_trac_mmio, ERR_MRK - "ocmbMmioPerformOp: buffer is invalid!"); + "validateOcmbMmioOp: buffer is invalid!"); /*@ * @errortype - * @moduleid MMIO::MOD_MMIO_PERFORM_OP + * @moduleid MMIO::MOD_VALIDATE_OCMB_MMIO_OP * @reasoncode MMIO::RC_INVALID_BUFFER * @userdata1[0:31] Target huid * @userdata1[32:63] Data Offset, if >= 4GB then subtract 2GB @@ -273,41 +629,41 @@ errlHndl_t ocmbMmioPerformOp(DeviceFW::OperationType i_opType, * @userdata2[0:0] Operation Type * @userdata2[28:31] Access Limit * @userdata2[32:63] Buffer Length - * @devdesc mmioPerformOp> Invalid data buffer for a MMIO + * @devdesc Invalid data buffer for a MMIO * operation. * @custdesc Unexpected memory subsystem firmware error. */ l_err = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_UNRECOVERABLE, - MMIO::MOD_MMIO_PERFORM_OP, + MMIO::MOD_VALIDATE_OCMB_MMIO_OP, MMIO::RC_INVALID_BUFFER, TWO_UINT32_TO_UINT64( - i_target->getAttr<TARGETING::ATTR_HUID>(), - (l_offset < (4 * GIGABYTE)) ? - (l_offset) : - (l_offset - (2 * GIGABYTE))), + get_huid(i_ocmbTarget), + (i_offset < (4 * GIGABYTE)) ? + (i_offset) : + (i_offset - (2 * GIGABYTE))), TWO_UINT32_TO_UINT64( - (i_opType << 31) | l_accessLimit, - io_buflen), + (i_opType << 31) | io_accessLimit, + i_buflen), ERRORLOG::ErrlEntry::ADD_SW_CALLOUT); break; } - switch (l_accessLimit) { + switch (io_accessLimit) { case 0: - l_accessLimit = io_buflen; // no access size restriction + io_accessLimit = i_buflen; // no access size restriction case 4: case 8: break; // expected values default: TRACFCOMP(g_trac_mmio, ERR_MRK - "ocmbMmioPerformOp: accessLimit(%ld) should be 0, 4 or 8!!!", - l_accessLimit); + "validateOcmbMmioOp: accessLimit(%ld) should be 0, 4 or 8!!!", + io_accessLimit); /*@ * @errortype - * @moduleid MMIO::MOD_MMIO_PERFORM_OP + * @moduleid MMIO::MOD_VALIDATE_OCMB_MMIO_OP * @reasoncode MMIO::RC_INVALID_ACCESS_LIMIT * @userdata1[0:31] Target huid * @userdata1[32:63] Data Offset, if >= 4GB then subtract 2GB @@ -315,22 +671,22 @@ errlHndl_t ocmbMmioPerformOp(DeviceFW::OperationType i_opType, * @userdata2[0:0] Operation Type * @userdata2[28:31] Access Limit * @userdata2[32:63] Buffer Length - * @devdesc mmioPerformOp> Specified access limit was + * @devdesc Specified access limit was * invalid for a MMIO operation. * @custdesc Unexpected memory subsystem firmware error. */ l_err = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_UNRECOVERABLE, - MMIO::MOD_MMIO_PERFORM_OP, + MMIO::MOD_VALIDATE_OCMB_MMIO_OP, MMIO::RC_INVALID_ACCESS_LIMIT, TWO_UINT32_TO_UINT64( - i_target->getAttr<TARGETING::ATTR_HUID>(), - (l_offset < (4 * GIGABYTE)) ? - (l_offset) : - (l_offset - (2 * GIGABYTE))), + get_huid(i_ocmbTarget), + (i_offset < (4 * GIGABYTE)) ? + (i_offset) : + (i_offset - (2 * GIGABYTE))), TWO_UINT32_TO_UINT64( - (i_opType << 31) | l_accessLimit, - io_buflen), + (i_opType << 31) | io_accessLimit, + i_buflen), ERRORLOG::ErrlEntry::ADD_SW_CALLOUT); break; } @@ -340,16 +696,16 @@ errlHndl_t ocmbMmioPerformOp(DeviceFW::OperationType i_opType, break; } - if (io_buflen < l_accessLimit) + if (i_buflen < io_accessLimit) { TRACFCOMP(g_trac_mmio, ERR_MRK - "ocmbMmioPerformOp: buffer is too small for the" + "validateOcmbMmioOp: buffer is too small for the" " request, buflen=%d, accessLimit=%ld", - io_buflen, l_accessLimit); + i_buflen, io_accessLimit); /*@ * @errortype - * @moduleid MMIO::MOD_MMIO_PERFORM_OP + * @moduleid MMIO::MOD_VALIDATE_OCMB_MMIO_OP * @reasoncode MMIO::RC_INSUFFICIENT_BUFFER * @userdata1[0:31] Target huid * @userdata1[32:63] Data Offset, if >= 4GB then subtract 2GB @@ -357,38 +713,38 @@ errlHndl_t ocmbMmioPerformOp(DeviceFW::OperationType i_opType, * @userdata2[0:0] Operation Type * @userdata2[28:31] Access Limit * @userdata2[32:63] Buffer Length - * @devdesc mmioPerformOp> Data buffer too small for a + * @devdesc Data buffer too small for a * MMIO operation. * @custdesc Unexpected memory subsystem firmware error. */ l_err = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_UNRECOVERABLE, - MMIO::MOD_MMIO_PERFORM_OP, + MMIO::MOD_VALIDATE_OCMB_MMIO_OP, MMIO::RC_INSUFFICIENT_BUFFER, TWO_UINT32_TO_UINT64( - i_target->getAttr<TARGETING::ATTR_HUID>(), - (l_offset < (4 * GIGABYTE)) ? - (l_offset) : - (l_offset - (2 * GIGABYTE))), + get_huid(i_ocmbTarget), + (i_offset < (4 * GIGABYTE)) ? + (i_offset) : + (i_offset - (2 * GIGABYTE))), TWO_UINT32_TO_UINT64( - (i_opType << 31) | l_accessLimit, - io_buflen), + (i_opType << 31) | io_accessLimit, + i_buflen), ERRORLOG::ErrlEntry::ADD_SW_CALLOUT); break; } - if (io_buflen % l_accessLimit) + if (i_buflen % io_accessLimit) { TRACFCOMP(g_trac_mmio, ERR_MRK - "ocmbMmioPerformOp: buffer length must be a" + "validateOcmbMmioOp: buffer length must be a" " multiple of the access limit," " buflen=%d, accessLimit=%ld", - io_buflen, l_accessLimit); + i_buflen, io_accessLimit); /*@ * @errortype - * @moduleid MMIO::MOD_MMIO_PERFORM_OP + * @moduleid MMIO::MOD_VALIDATE_OCMB_MMIO_OP * @reasoncode MMIO::RC_INCORRECT_BUFFER_LENGTH * @userdata1[0:31] Target huid * @userdata1[32:63] Data Offset, if >= 4GB then subtract 2GB @@ -396,38 +752,37 @@ errlHndl_t ocmbMmioPerformOp(DeviceFW::OperationType i_opType, * @userdata2[0:0] Operation Type * @userdata2[28:31] Access Limit * @userdata2[32:63] Buffer Length - * @devdesc mmioPerformOp> Buffer length not a multiple - * of access limit. + * @devdesc Buffer length not a multiple of access limit. * @custdesc Unexpected memory subsystem firmware error. */ l_err = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_UNRECOVERABLE, - MMIO::MOD_MMIO_PERFORM_OP, + MMIO::MOD_VALIDATE_OCMB_MMIO_OP, MMIO::RC_INCORRECT_BUFFER_LENGTH, TWO_UINT32_TO_UINT64( - i_target->getAttr<TARGETING::ATTR_HUID>(), - (l_offset < (4 * GIGABYTE)) ? - (l_offset) : - (l_offset - (2 * GIGABYTE))), + get_huid(i_ocmbTarget), + (i_offset < (4 * GIGABYTE)) ? + (i_offset) : + (i_offset - (2 * GIGABYTE))), TWO_UINT32_TO_UINT64( - (i_opType << 31) | l_accessLimit, - io_buflen), + (i_opType << 31) | io_accessLimit, + i_buflen), ERRORLOG::ErrlEntry::ADD_SW_CALLOUT); break; } - if (!(((l_offset >= 0) && (l_offset < (2 * GIGABYTE))) || - ((l_offset >= (4 * GIGABYTE)) && (l_offset < (6 * GIGABYTE))))) + if (!(((i_offset >= 0) && (i_offset < (2 * GIGABYTE))) || + ((i_offset >= (4 * GIGABYTE)) && (i_offset < (6 * GIGABYTE))))) { TRACFCOMP(g_trac_mmio, ERR_MRK - "ocmbMmioPerformOp: offset(0x%lX) must be" + "validateOcmbMmioOp: offset(0x%lX) must be" " either 0-2G or 4G-6G!", - l_offset); + i_offset); /*@ * @errortype - * @moduleid MMIO::MOD_MMIO_PERFORM_OP + * @moduleid MMIO::MOD_VALIDATE_OCMB_MMIO_OP * @reasoncode MMIO::RC_INVALID_OFFSET * @userdata1[0:31] Target huid * @userdata1[32:63] Data Offset, if >= 4GB then subtract 2GB @@ -435,38 +790,38 @@ errlHndl_t ocmbMmioPerformOp(DeviceFW::OperationType i_opType, * @userdata2[0:0] Operation Type * @userdata2[28:31] Access Limit * @userdata2[32:63] Buffer Length - * @devdesc mmioPerformOp> Invalid offset, requested + * @devdesc Invalid offset, requested * address was out of range for a MMIO operation. * @custdesc Unexpected memory subsystem firmware error. */ l_err = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_UNRECOVERABLE, - MMIO::MOD_MMIO_PERFORM_OP, + MMIO::MOD_VALIDATE_OCMB_MMIO_OP, MMIO::RC_INVALID_OFFSET, TWO_UINT32_TO_UINT64( - i_target->getAttr<TARGETING::ATTR_HUID>(), - (l_offset < (4 * GIGABYTE)) ? - (l_offset) : - (l_offset - (2 * GIGABYTE))), + get_huid(i_ocmbTarget), + (i_offset < (4 * GIGABYTE)) ? + (i_offset) : + (i_offset - (2 * GIGABYTE))), TWO_UINT32_TO_UINT64( - (i_opType << 31) | l_accessLimit, - io_buflen), + (i_opType << 31) | io_accessLimit, + i_buflen), ERRORLOG::ErrlEntry::ADD_SW_CALLOUT); break; } - if ( ((l_accessLimit == 4) || (l_accessLimit == 8)) && - ((l_offset % l_accessLimit) != 0) ) + if ( ((io_accessLimit == 4) || (io_accessLimit == 8)) && + ((i_offset % io_accessLimit) != 0) ) { TRACFCOMP(g_trac_mmio, ERR_MRK - "ocmbMmioPerformOp: offset must be aligned with access limit," + "validateOcmbMmioOp: offset must be aligned with access limit," " offset=0x%lX, accessLimit=%ld", - l_offset, l_accessLimit); + i_offset, io_accessLimit); /*@ * @errortype - * @moduleid MMIO::MOD_MMIO_PERFORM_OP + * @moduleid MMIO::MOD_VALIDATE_OCMB_MMIO_OP * @reasoncode MMIO::RC_INVALID_OFFSET_ALIGNMENT * @userdata1[0:31] Target huid * @userdata1[32:63] Data Offset, if >= 4GB then subtract 2GB @@ -474,73 +829,139 @@ errlHndl_t ocmbMmioPerformOp(DeviceFW::OperationType i_opType, * @userdata2[0:0] Operation Type * @userdata2[28:31] Access Limit * @userdata2[32:63] Buffer Length - * @devdesc mmioPerformOp> Requested MMIO address was not + * @devdesc Requested MMIO address was not * aligned properly for the associated device. * @custdesc Unexpected memory subsystem firmware error. */ l_err = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_UNRECOVERABLE, - MMIO::MOD_MMIO_PERFORM_OP, + MMIO::MOD_VALIDATE_OCMB_MMIO_OP, MMIO::RC_INVALID_OFFSET_ALIGNMENT, TWO_UINT32_TO_UINT64( - i_target->getAttr<TARGETING::ATTR_HUID>(), - (l_offset < (4 * GIGABYTE)) ? - (l_offset) : - (l_offset - (2 * GIGABYTE))), + get_huid(i_ocmbTarget), + (i_offset < (4 * GIGABYTE)) ? + (i_offset) : + (i_offset - (2 * GIGABYTE))), TWO_UINT32_TO_UINT64( - (i_opType << 31) | l_accessLimit, - io_buflen), + (i_opType << 31) | io_accessLimit, + i_buflen), ERRORLOG::ErrlEntry::ADD_SW_CALLOUT); break; } + }while(0); + return l_err; +} + + +/******************************************************************************* + * + * See comments in header file + * + */ +errlHndl_t ocmbMmioPerformOp(DeviceFW::OperationType i_opType, + TargetHandle_t i_ocmbTarget, + void* io_buffer, + size_t& io_buflen, + int64_t i_accessType, + va_list i_args) +{ + errlHndl_t l_err = nullptr; + uint64_t l_offset = va_arg(i_args, uint64_t); + uint64_t l_accessLimit = va_arg(i_args, uint64_t); + + TRACDCOMP(g_trac_mmio, ENTER_MRK"ocmbMmioPerformOp"); + TRACDCOMP(g_trac_mmio, INFO_MRK"op=%d, target=0x%.8X", + i_opType, get_huid(i_ocmbTarget)); + TRACDCOMP(g_trac_mmio, INFO_MRK"buffer=%p, length=%d, accessType=%ld", + io_buffer, io_buflen, i_accessType); + TRACDCOMP(g_trac_mmio, INFO_MRK"offset=0x%lX, accessLimit=%ld", + l_offset, l_accessLimit); + + do + { + uint64_t l_addr = i_ocmbTarget->getAttr<ATTR_MMIO_VM_ADDR>(); + + TRACDCOMP(g_trac_mmio, INFO_MRK"MMIO Op l_addr=0x%lX ", l_addr); - // TODO RTC 201493 - Remove these consts once HW group has defined them. - static const uint8_t P9A_MC_DSTLFIR_SUBCHANNEL_A_FAIL_ACTION = 20; - static const uint8_t P9A_MC_DSTLFIR_SUBCHANNEL_B_FAIL_ACTION = 21; + // Validate parameters for MMIO operation + l_err = validateOcmbMmioOp(i_opType, + i_ocmbTarget, + io_buffer, + io_buflen, + l_addr, + l_offset, + l_accessLimit); + if(l_err) + { + break; + } // read or write io_buflen bytes, l_accessLimit bytes at a time - uint8_t *mm_ptr = reinterpret_cast<uint8_t *>(l_addr + l_offset); - uint8_t *io_ptr = reinterpret_cast<uint8_t *>(io_buffer); - size_t bytes_read_or_written = 0; - for (size_t i = 0;i < io_buflen;i += l_accessLimit) + uint8_t* l_mmPtr = reinterpret_cast<uint8_t *>(l_addr + l_offset); + uint8_t* l_ioPtr = reinterpret_cast<uint8_t *>(io_buffer); + size_t l_bytesCopied = 0; + for (;l_bytesCopied < io_buflen; l_bytesCopied += l_accessLimit) { if (i_opType == DeviceFW::READ) { - mmio_memcpy(io_ptr + i, mm_ptr + i, l_accessLimit); + // Perform requested MMIO read + mmio_memcpy(l_ioPtr + l_bytesCopied, + l_mmPtr + l_bytesCopied, + l_accessLimit); eieio(); - if (!memcmp(io_ptr + i, + + // If there was a UE detected by the processor, a Load UE + // exception will be raised. Kernel code will detect + // that the exception occurred during an OCMB read and + // will write a unique pattern, MMIO_OCMB_UE_DETECTED, into + // the read buffer so that we can quickly know that the MMIO + // read failed. + if (memcmp(l_ioPtr + l_bytesCopied, &MMIO_OCMB_UE_DETECTED, sizeof(MMIO_OCMB_UE_DETECTED))) { - uint64_t scom_data = 0; - uint64_t scom_mask = 0; + //No read failure detected. Keep going. + continue; + } - TRACFCOMP(g_trac_mmio, ERR_MRK - "ocmbMmioPerformOp: unable to complete" - " MMIO read, SUE detected"); + //MMIO Read failed! + TRACFCOMP(g_trac_mmio, ERR_MRK + "ocmbMmioPerformOp: unable to complete" + " MMIO read of offset 0x%08x from OCMB 0x%08x", + l_offset, get_huid(i_ocmbTarget)); + + // Check for channel checkstops (this reads a processor reg) + bool l_checkstopExists = false; + l_err = checkChannelCheckstop(i_ocmbTarget, l_checkstopExists); + if(l_err) + { + // Couldn't deterimine if checkstop exists. + break; + } + if(l_checkstopExists) + { /*@ * @errortype * @moduleid MMIO::MOD_MMIO_PERFORM_OP - * @reasoncode MMIO::RC_BAD_MMIO_READ + * @reasoncode MMIO::RC_MMIO_CHAN_CHECKSTOP * @userdata1[0:31] Target huid * @userdata1[32:63] Data Offset, if >= 4GB then subtract * 2GB (allows offsets to fit in 32 bits) * @userdata2[0:0] Operation Type * @userdata2[28:31] Access Limit * @userdata2[32:63] Buffer Length - * @devdesc mmioPerformOp> MMIO read of an OCMB - * failed. - * @custdesc Unexpected memory subsystem firmware - * error. + * @devdesc OCMB MMIO read failed due to + * channel checkstop + * @custdesc Unexpected memory subsystem error. */ l_err = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_UNRECOVERABLE, - MMIO::MOD_MMIO_PERFORM_OP, + MMIO::MOD_MMIO_CHAN_CHECKSTOP, MMIO::RC_BAD_MMIO_READ, TWO_UINT32_TO_UINT64( - i_target->getAttr<TARGETING::ATTR_HUID>(), + get_huid(i_ocmbTarget), (l_offset < (4 * GIGABYTE)) ? (l_offset) : (l_offset - (2 * GIGABYTE))), @@ -548,198 +969,282 @@ errlHndl_t ocmbMmioPerformOp(DeviceFW::OperationType i_opType, (i_opType << 31) | l_accessLimit, io_buflen), ERRORLOG::ErrlEntry::NO_SW_CALLOUT); - // add OCMB to error log - l_err->addHwCallout(i_target, - HWAS::SRCI_PRIORITY_HIGH, - HWAS::DECONFIG, - HWAS::GARD_NULL); - l_err->addProcedureCallout(HWAS::EPUB_PRC_HB_CODE, - HWAS::SRCI_PRIORITY_LOW); - const auto plid = l_err->plid(); - - auto l_err2 = getProcScom(i_target, - P9A_MCC_USTLFIR, - scom_data); - if (l_err2) - { - l_err2->plid(plid); - errlCommit(l_err2, MMIO_COMP_ID); - } - else - { - scom_mask = (1ull << P9A_MC_USTLFIR_CHANA_BAD_DATA) | - (1ull << P9A_MC_USTLFIR_CHANB_BAD_DATA); - if (scom_data & scom_mask) - { - // TODO RTC 201588 - Error checking on Explorer side - TRACFCOMP(g_trac_mmio, ERR_MRK - "ocmbMmioPerformOp: there was an error on" - " the Explorer side, P9A_MCC_USTLFIR=0x%lX", - scom_data); - - // Clear FIR bits - scom_data &= ~scom_mask; - l_err2 = setProcScom(i_target, - P9A_MCC_USTLFIR, - scom_data); - if (l_err2) - { - l_err2->plid(plid); - errlCommit(l_err2, MMIO_COMP_ID); - } - } - } - l_err2 = getProcScom(i_target, - P9A_MCC_DSTLFIR, - scom_data); - if (l_err2) - { - l_err2->plid(plid); - errlCommit(l_err2, MMIO_COMP_ID); - } - else + addDefaultCallouts(l_err, i_ocmbTarget); + + // Switch to I2C to allow collection of registers on + // OCMB. + disableInbandScomsOcmb(i_ocmbTarget); + + // TODO RTC 201778 - Channel fail handling for Explorer + // dump some registers to the error log here? + + // Look for a better PRD error + // + // TODO RTC 92971 + // There is a potential deadlock if we call PRD here since + // we could recursively call PRD and they are locking a + // mutex. Skip this call for now. + // + //errlHndl_t l_prd_err = ATTN::checkForIplAttentions(); + errlHndl_t l_prd_err = NULL; + if(l_prd_err) { - scom_mask = - (1ull << P9A_MC_DSTLFIR_SUBCHANNEL_A_FAIL_ACTION) | - (1ull << P9A_MC_DSTLFIR_SUBCHANNEL_B_FAIL_ACTION); - if (scom_data & scom_mask) - { - // A channel checkstop has occurred. - // TODO RTC 201778 - Channel Fail Handling for - // Explorer - TRACFCOMP(g_trac_mmio, ERR_MRK - "ocmbMmioPerformOp: there was an error on" - " the Explorer channel, P9A_MCC_DSTLFIR=0x%lX", - scom_data); - } + TRACFCOMP(g_trac_mmio, + ERR_MRK"Error from checkForIplAttentions: " + "PLID=%X", + l_prd_err->plid()); + + //connect up the plids + l_err->plid(l_prd_err->plid()); + + //commit my log as info because PRD's log is better + l_err->setSev(ERRORLOG::ERRL_SEV_INFORMATIONAL); + ERRORLOG::errlCommit(l_err, MMIO_COMP_ID); + l_err = l_prd_err; } break; } + + /*@ + * @errortype + * @moduleid MMIO::MOD_MMIO_PERFORM_OP + * @reasoncode MMIO::RC_BAD_MMIO_READ + * @userdata1[0:31] Target huid + * @userdata1[32:63] Data Offset, if >= 4GB then subtract + * 2GB (allows offsets to fit in 32 bits) + * @userdata2[0:0] Operation Type + * @userdata2[28:31] Access Limit + * @userdata2[32:63] Buffer Length + * @devdesc OCMB MMIO read failed + * @custdesc Unexpected memory subsystem firmware + * error. + */ + l_err = new ERRORLOG::ErrlEntry( + ERRORLOG::ERRL_SEV_UNRECOVERABLE, + MMIO::MOD_MMIO_PERFORM_OP, + MMIO::RC_BAD_MMIO_READ, + TWO_UINT32_TO_UINT64( + get_huid(i_ocmbTarget), + (l_offset < (4 * GIGABYTE)) ? + (l_offset) : + (l_offset - (2 * GIGABYTE))), + TWO_UINT32_TO_UINT64( + (i_opType << 31) | l_accessLimit, + io_buflen), + ERRORLOG::ErrlEntry::NO_SW_CALLOUT); + + // NOTE: Explorer error regs cannot be cleared without resetting + // the chip. Error regs may contain failure data from + // previous write transaction. + // + // Check if OCMB has failure data for this transaction. + bool l_errorAddressMatches = false; + bool l_errorAddressIsZero = false; + auto l_err2 = checkOcmbError( + i_ocmbTarget, + reinterpret_cast<uint64_t>(l_mmPtr + + l_bytesCopied), + l_accessLimit, + l_offset, + i_opType, + l_errorAddressMatches, + l_errorAddressIsZero); + if (l_err2) + { + // Failed to read ocmb status register after + // we just determined that there was not + // a channel checkstop? Commit this error + // as informational and add default callouts + // to l_err. + l_err2->plid(l_err->plid()); + ERRORLOG::errlCommit(l_err2, MMIO_COMP_ID); + addDefaultCallouts(l_err, i_ocmbTarget); + break; + } + else if(l_errorAddressMatches) + { + // Read additional OCMB regs to determine if this was + // a HW or SW error. + determineCallouts(i_ocmbTarget, l_offset, i_opType, l_err); + break; + } + else if(l_errorAddressIsZero) + { + // P9A disagrees with OCMB? + TRACFCOMP(g_trac_mmio, + "ocmbMmioPerformOp(read): No Error found on OCMB??" + " 0x%08x", get_huid(i_ocmbTarget)); + addDefaultCallouts(l_err, i_ocmbTarget); + break; + } + + // Address does not match ours and is not zero. + // This was probably caused by an MMIO write failure + // doing an MMIO read to detect if the MMIO write + // was successful or not. + TRACFCOMP(g_trac_mmio, + "ocmbMmioPerformOp(read): Previous error detected on" + " OCMB 0x%08x", get_huid(i_ocmbTarget)); + break; } - else if (i_opType == DeviceFW::WRITE) + else // i_opType == DeviceFW::WRITE { - mmio_memcpy(mm_ptr + i, io_ptr + i, l_accessLimit); + // Perform the MMIO write + mmio_memcpy(l_mmPtr + l_bytesCopied, + l_ioPtr + l_bytesCopied, + l_accessLimit); eieio(); - // TODO RTC 201901 - find a better OCMB register to read, should - // be able to optimize error handling. - - // do a read on the OCMB after writing to it, since writes and - // reads are sequential, the read won't complete until after the - // write. - uint64_t scom_addr = (4 * GIGABYTE) + 4; // RTC 201901 - uint8_t l_ocmbReg[8] = {0}; - - mmio_memcpy(l_ocmbReg, mm_ptr + scom_addr, sizeof(l_ocmbReg)); - eieio(); - if (!memcmp(io_ptr + i, - &MMIO_OCMB_UE_DETECTED, - sizeof(MMIO_OCMB_UE_DETECTED))) + // MMIO write failures will not cause an exception + // to be raised on the host processor. Instead, code + // needs to check a register on the OCMB to determine + // if a specific write failed. + bool l_errorAddressMatches = false; + bool l_errorAddressIsZero = false; + l_err = checkOcmbError( + i_ocmbTarget, + reinterpret_cast<uint64_t>(l_mmPtr + + l_bytesCopied), + l_accessLimit, + l_offset, + i_opType, + l_errorAddressMatches, + l_errorAddressIsZero); + + // Check that we were able to read the error register + // and that it doesn't contain our address. + if(!l_err && !l_errorAddressMatches) { - uint64_t scom_data = 0; - uint64_t scom_mask = 0; + // No errors detected. Keep going. + continue; + } - TRACFCOMP(g_trac_mmio, ERR_MRK - "ocmbMmioPerformOp: unable to complete MMIO" - " write, SUE detected"); + // At this point, we know that the write or status read failed. + // Go ahead and create a basic MMIO Write error log. + TRACFCOMP(g_trac_mmio, ERR_MRK + "ocmbMmioPerformOp: unable to complete" + " MMIO write to offset 0x%08x on OCMB 0x%08x", + l_offset, get_huid(i_ocmbTarget)); - /*@ - * @errortype - * @moduleid MMIO::MOD_MMIO_PERFORM_OP - * @reasoncode MMIO::RC_BAD_MMIO_WRITE - * @userdata1[0:31] Target huid - * @userdata1[32:63] Data Offset, if >= 4GB then subtract - * 2GB (allows offsets to fit in 32 bits) - * @userdata2[0:0] Operation Type - * @userdata2[28:31] Access Limit - * @userdata2[32:63] Buffer Length - * @devdesc mmioPerformOp> MMIO write of an OCMB - * failed. - * @custdesc Unexpected memory subsystem firmware - * error. - */ - l_err = new ERRORLOG::ErrlEntry( - ERRORLOG::ERRL_SEV_UNRECOVERABLE, - MMIO::MOD_MMIO_PERFORM_OP, - MMIO::RC_BAD_MMIO_WRITE, - TWO_UINT32_TO_UINT64( - i_target->getAttr<TARGETING::ATTR_HUID>(), - (l_offset < (4 * GIGABYTE)) ? - (l_offset) : - (l_offset - (2 * GIGABYTE))), - TWO_UINT32_TO_UINT64( - (i_opType << 31) | l_accessLimit, - io_buflen), - ERRORLOG::ErrlEntry::NO_SW_CALLOUT); - // add OCMB to error log - l_err->addHwCallout(i_target, - HWAS::SRCI_PRIORITY_HIGH, - HWAS::DECONFIG, - HWAS::GARD_NULL); - l_err->addProcedureCallout(HWAS::EPUB_PRC_HB_CODE, - HWAS::SRCI_PRIORITY_LOW); - const auto plid = l_err->plid(); - - auto l_err2 = getProcScom(i_target, - P9A_MCC_DSTLFIR, - scom_data); - if (l_err2) + /*@ + * @errortype + * @moduleid MMIO::MOD_MMIO_PERFORM_OP + * @reasoncode MMIO::RC_BAD_MMIO_WRITE + * @userdata1[0:31] Target huid + * @userdata1[32:63] Data Offset, if >= 4GB then subtract + * 2GB (allows offsets to fit in 32 bits) + * @userdata2[0:0] Operation Type + * @userdata2[28:31] Access Limit + * @userdata2[32:63] Buffer Length + * @devdesc OCMB MMIO write failed + * @custdesc Unexpected memory subsystem firmware + * error. + */ + auto l_writeErr = new ERRORLOG::ErrlEntry( + ERRORLOG::ERRL_SEV_UNRECOVERABLE, + MMIO::MOD_MMIO_PERFORM_OP, + MMIO::RC_BAD_MMIO_WRITE, + TWO_UINT32_TO_UINT64( + get_huid(i_ocmbTarget), + (l_offset < (4 * GIGABYTE)) ? + (l_offset) : + (l_offset - (2 * GIGABYTE))), + TWO_UINT32_TO_UINT64( + (i_opType << 31) | l_accessLimit, + io_buflen), + ERRORLOG::ErrlEntry::NO_SW_CALLOUT); + + // Check if the register read failed + if(l_err) + { + // We were not able to read the error register on the + // OCMB. The most likely scenario here is that there + // was a HW failure (possibly a channel checkstop). + // + // NOTE: If we only logged this error as-is and no + // other error, we wouldn't know that the read was + // a result of a write. Instead, log both errors + // and set the PLID's to be the same. + TRACFCOMP(g_trac_mmio, + "ocmbMmioPerformOp(write): Fail to read status on" + " OCMB 0x%08x", get_huid(i_ocmbTarget)); + l_writeErr->plid(l_err->plid()); + + // Set severity of write error to match the read + // error if there is a channel checkstop. + bool l_checkstopExists = false; + errlHndl_t l_xstopErr = nullptr; + l_xstopErr = checkChannelCheckstop(i_ocmbTarget, + l_checkstopExists); + if(l_xstopErr) { - l_err2->plid(plid); - errlCommit(l_err2, MMIO_COMP_ID); + // Couldn't deterimine if checkstop exists. + // Commit the xstop error and assume no checkstop. + l_xstopErr->collectTrace(MMIO_COMP_NAME); + ERRORLOG::errlCommit(l_xstopErr, MMIO_COMP_ID); } - else + if(l_checkstopExists) { - scom_mask = - (1ull << P9A_MC_DSTLFIR_SUBCHANNEL_A_FAIL_ACTION) | - (1ull << P9A_MC_DSTLFIR_SUBCHANNEL_B_FAIL_ACTION); - if (scom_data & scom_mask) - { - // A channel checkstop has occurred. - // TODO RTC 201778 - Channel Fail Handling for - // Explorer - TRACFCOMP(g_trac_mmio, ERR_MRK - "ocmbMmioPerformOp: there was an error on" - " the Explorer channel, P9A_MCC_DSTLFIR=0x%lX", - scom_data); - } + l_writeErr->setSev(l_err->sev()); } - + ERRORLOG::errlCommit(l_err, MMIO_COMP_ID); + l_err = l_writeErr; break; } - } - bytes_read_or_written += l_accessLimit; - } + l_err = l_writeErr; + l_writeErr = nullptr; - io_buflen = bytes_read_or_written; + // At this point, we were able to read the error register + // and determined that it matched the address of our + // transaction. No need to check for a channel checkstop + // on the write operation since we already did that in the + // read path when we tried to read the OCMB status register. + + // Read additional OCMB regs to determine if this was + // a HW or SW error. + determineCallouts(i_ocmbTarget, l_offset, i_opType, l_err); + break; + } // end of write block + + } // end of for loop + + io_buflen = l_bytesCopied; } while(0); if (l_err) { + // Switch over to using I2C to prevent further MMIO access + // to this OCMB (error regs cannot be cleared on Explorer). + disableInbandScomsOcmb(i_ocmbTarget); + l_err->collectTrace(MMIO_COMP_NAME); } - TRACDCOMP(g_trac_mmio, EXIT_MRK"mmioPerformOp"); + TRACDCOMP(g_trac_mmio, EXIT_MRK"ocmbMmioPerformOp"); return l_err; } +/******************************************************************************* + * + * @brief Finds the processor connected to the target OCMB chip. + * + */ static -TARGETING::TargetHandle_t getParentProc(TARGETING::TargetHandle_t i_target) +TargetHandle_t getParentProc( + const TargetHandle_t i_ocmbTarget) { - TARGETING::TargetHandle_t proc = nullptr; - TARGETING::TargetHandleList list; - TARGETING::PredicateCTM pred(TARGETING::CLASS_CHIP, - TARGETING::TYPE_PROC); - - TARGETING::targetService().getAssociated( - list, - i_target, - TARGETING::TargetService::PARENT_BY_AFFINITY, - TARGETING::TargetService::ALL, + TargetHandle_t proc = nullptr; + TargetHandleList list; + PredicateCTM pred(CLASS_CHIP, TYPE_PROC); + + targetService().getAssociated( list, + i_ocmbTarget, + TargetService::PARENT_BY_AFFINITY, + TargetService::ALL, &pred); if (list.size() == 1) @@ -750,19 +1255,25 @@ TARGETING::TargetHandle_t getParentProc(TARGETING::TargetHandle_t i_target) return proc; } +/******************************************************************************* + * + * @brief Reads a scom register on the processor connected to the target OCMB + * chip. + * + */ static -errlHndl_t getProcScom(TARGETING::TargetHandle_t i_target, +errlHndl_t getProcScom(const TargetHandle_t i_ocmbTarget, uint64_t i_scomAddr, uint64_t &o_scomData) { errlHndl_t l_err = nullptr; - auto proc = getParentProc(i_target); + auto proc = getParentProc(i_ocmbTarget); if (proc == nullptr) { TRACFCOMP(g_trac_mmio, ERR_MRK "getProcScom: Unable to find parent processor for target(0x%X)", - i_target->getAttr<TARGETING::ATTR_HUID>()); + get_huid(i_ocmbTarget)); /*@ * @errortype @@ -770,14 +1281,14 @@ errlHndl_t getProcScom(TARGETING::TargetHandle_t i_target, * @reasoncode MMIO::RC_PROC_NOT_FOUND * @userdata1 Target huid * @userdata2 SCOM address - * @devdesc getProcScom> Unable to find parent processor for target. + * @devdesc Unable to find parent processor for target. * @custdesc Unexpected memory subsystem firmware error. */ l_err = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_UNRECOVERABLE, MMIO::MOD_MMIO_GET_PROC_SCOM, MMIO::RC_PROC_NOT_FOUND, - i_target->getAttr<TARGETING::ATTR_HUID>(), + get_huid(i_ocmbTarget), i_scomAddr, ERRORLOG::ErrlEntry::ADD_SW_CALLOUT); } @@ -794,19 +1305,26 @@ errlHndl_t getProcScom(TARGETING::TargetHandle_t i_target, return l_err; } -static -errlHndl_t setProcScom(TARGETING::TargetHandle_t i_target, +/******************************************************************************* + * + * @brief Writes a scom register on the processor connected to the target OCMB + * chip. + * + */ +// NOTE: removed static qualifier to prevent compiler from complaining about +// the function not being used. +errlHndl_t setProcScom(const TargetHandle_t i_ocmbTarget, uint64_t i_scomAddr, uint64_t i_scomData) { errlHndl_t l_err = nullptr; - auto proc = getParentProc(i_target); + auto proc = getParentProc(i_ocmbTarget); if (proc == nullptr) { TRACFCOMP(g_trac_mmio, ERR_MRK "setProcScom: Unable to find parent processor for target(0x%X)", - i_target->getAttr<TARGETING::ATTR_HUID>()); + get_huid(i_ocmbTarget)); /*@ * @errortype @@ -814,14 +1332,14 @@ errlHndl_t setProcScom(TARGETING::TargetHandle_t i_target, * @reasoncode MMIO::RC_PROC_NOT_FOUND * @userdata1 Target huid * @userdata2 SCOM address - * @devdesc setProcScom> Unable to find parent processor for target. + * @devdesc Unable to find parent processor for target. * @custdesc Unexpected memory subsystem firmware error. */ l_err = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_UNRECOVERABLE, MMIO::MOD_MMIO_SET_PROC_SCOM, MMIO::RC_PROC_NOT_FOUND, - i_target->getAttr<TARGETING::ATTR_HUID>(), + get_huid(i_ocmbTarget), i_scomAddr, ERRORLOG::ErrlEntry::ADD_SW_CALLOUT); } @@ -838,6 +1356,13 @@ errlHndl_t setProcScom(TARGETING::TargetHandle_t i_target, return l_err; } + +/******************************************************************************* + * + * @brief Copies len bytes of data from location pointed to by vsrc to location + * pointed to by vdest. + * + */ static void *mmio_memcpy(void *vdest, const void *vsrc, size_t len) { diff --git a/src/usr/mmio/mmio_explorer.C b/src/usr/mmio/mmio_explorer.C new file mode 100644 index 000000000..b86a7b88b --- /dev/null +++ b/src/usr/mmio/mmio_explorer.C @@ -0,0 +1,474 @@ +/* IBM_PROLOG_BEGIN_TAG */ +/* This is an automatically generated prolog. */ +/* */ +/* $Source: src/usr/mmio/mmio_explorer.C $ */ +/* */ +/* OpenPOWER HostBoot Project */ +/* */ +/* Contributors Listed Below - COPYRIGHT 2019 */ +/* [+] International Business Machines Corp. */ +/* */ +/* */ +/* Licensed under the Apache License, Version 2.0 (the "License"); */ +/* you may not use this file except in compliance with the License. */ +/* You may obtain a copy of the License at */ +/* */ +/* http://www.apache.org/licenses/LICENSE-2.0 */ +/* */ +/* Unless required by applicable law or agreed to in writing, software */ +/* distributed under the License is distributed on an "AS IS" BASIS, */ +/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */ +/* implied. See the License for the specific language governing */ +/* permissions and limitations under the License. */ +/* */ +/* IBM_PROLOG_END_TAG */ +#include <devicefw/driverif.H> +#include <errl/errlentry.H> +#include <errl/errlmanager.H> +#include <errl/errludtarget.H> +#include <errl/errludlogregister.H> +#include <explorer_scom_addresses.H> +#include <exp_inband.H> +#include <mmio/mmio_reasoncodes.H> + +// Trace definition +extern trace_desc_t* g_trac_mmio; + +using namespace TARGETING; + +namespace MMIOEXP +{ + +#define MMIOEXP_SCOM2OFFSET(_SCOM_ADDR) \ + (mss::exp::ib::EXPLR_IB_MMIO_OFFSET | (_SCOM_ADDR << 3)) + +/** + * @brief Possible Open CAPI response codes for config operations + */ +enum +{ + OCAPI_RETRY_REQUEST = 0x2, + OCAPI_DATA_ERROR = 0x8, + OCAPI_UNSUPPORTED_OP_LENGTH = 0x9, + OCAPI_BAD_ADDRESS = 0xB, + OCAPI_FAILED = 0xE, +}; + +/** + * @brief Possible PCB error codes for non-config operations + */ +enum +{ + PCB_OK = 0x0, + PCB_INVALID_ADDRESS = 0x4, + PCB_PARITY_ERROR = 0x6, + PCB_TIMEOUT = 0x7, +}; + +/** + * @brief bit-field definitions for MCFGERR register + */ +typedef union mcfgerrReg +{ + struct + { + uint64_t reserved :16; + uint64_t resp_code :4; + uint64_t bdi :1; + uint64_t error_type :3; + uint64_t device :5; + uint64_t function :3; + uint64_t dev_func_mismatch :1; + uint64_t detect_bad_op :1; + uint64_t tbit_is_1 :1; + uint64_t data_is_bad :1; + uint64_t pl_is_invalid :1; + uint64_t bad_op_or_align :1; + uint64_t addr_no_implemented:1; + uint64_t rdata_vld :1; + uint64_t tbit :1; + uint64_t plen :3; + uint64_t portnun :2; + uint64_t dl :2; + uint64_t capptag :16; + }; + uint64_t word64; +}mcfgerrReg_t; + +/** + * @brief bit-field definitions for GIF2PCB_ERROR register + */ +typedef union gif2pcbErrorReg +{ + struct + { + uint64_t parity_error_rsp_info :1; + uint64_t parity_error_rsp_data_0 :1; + uint64_t parity_error_rsp_data_1 :1; + uint64_t parity_error_rsp_data_2 :1; + uint64_t parity_error_rsp_data_3 :1; + uint64_t timeout_error :1; + uint64_t int_addr_access_error :1; + uint64_t invalid_access :1; + uint64_t pcb_err_code :3; + uint64_t axi_read_addr_parity_error :1; + uint64_t axi_write_addr_parity_error :1; + uint64_t axi_write_data_parity_error_31_24 :1; + uint64_t axi_write_data_parity_error_23_16 :1; + uint64_t axi_write_data_parity_error_15_8 :1; + uint64_t axi_write_data_parity_error_7_0 :1; + uint64_t pib2gif_parity_error :1; + uint64_t reserved :46; + }; + struct + { + uint64_t used_bits :18; + uint64_t unused_bits :46; + }; + uint64_t word64; +}gif2pcbErrorReg_t; + +/** + * @brief bit-field definitions for PIB2GIF_ERROR register + */ +typedef union pib2gifErrorReg +{ + struct + { + uint64_t parity_error_req_data_0:1; + uint64_t parity_error_req_data_1:1; + uint64_t parity_error_req_data_2:1; + uint64_t parity_error_req_data_3:1; + uint64_t parity_error_req_addr_0:1; + uint64_t parity_error_req_addr_1:1; + uint64_t parity_error_req_ctrl:1; + uint64_t timeout_error:1; + uint64_t int_addr_access_error:1; + uint64_t parity_error_on_fsm:1; + uint64_t parity_error_on_reg0:1; + uint64_t parity_error_on_reg1:1; + uint64_t parity_error_on_reg2:1; + uint64_t parity_error_on_reg3:1; + uint64_t parity_error_on_reg4:1; + uint64_t parity_error_on_reg5:1; + uint64_t invalid_address_error:1; + uint64_t reserved1:15; + uint64_t gif2pcb_error:18; + uint64_t reserved2:14; + }; + uint64_t word64; +}pib2gifErrorReg_t; + +// Explorer MMIO addresses only have 35 bits +constexpr uint64_t MASK_35BITS = 0x7FFFFFFFFull; + + +/******************************************************************************* + * + * See header file for comments + */ +errlHndl_t checkExpError(const TargetHandle_t i_expTarget, + const uint64_t i_va, + const uint64_t i_accessLimit, + const uint64_t i_offset, + DeviceFW::OperationType i_opType, + bool& o_errorAddressMatches, + bool& o_errorAddressIsZero) +{ + errlHndl_t l_err = nullptr; + uint64_t l_errAddr = 0; + bool l_errorAddressMatches = false; + bool l_errorAddressIsZero = false; + const char* l_regStr = nullptr; + + // NOTE: mmio_memcpy could be doing multiple transactions. This means + // we need to test the explorer error address register against a + // range of values instead of a single value. + // NOTE: Explorer only uses the low 35 bits of the address for MMIO access + const uint64_t l_mmioAddr35Lo = i_va & MASK_35BITS; + const uint64_t l_mmioAddr35Hi = (i_va + i_accessLimit) & MASK_35BITS; + + do + { + // For access to CONFIG space, the MCFGERRA scom register + // contains the first failing address. + if(i_offset < mss::exp::ib::EXPLR_IB_MMIO_OFFSET) + { + auto l_reqSize = sizeof(l_errAddr); + l_err = DeviceFW::deviceRead( + i_expTarget, + &l_errAddr, + l_reqSize, + DEVICE_SCOM_ADDRESS(EXPLR_MMIO_MCFGERRA)); + if(l_err) + { + TRACFCOMP(g_trac_mmio, ERR_MRK + "checkExpError: getscom(MCFGERRA) failed." + " huid[0x%08x]", get_huid(i_expTarget)); + break; + } + l_regStr = "MCFGERRA"; + } + // Otherwise, we are accessing a non-config address and, if there + // is a failure, the MMIO address will show up in the lower 35 bits of + // the MMIOERR register + else + { + // If the transaction was a read to this error register then + // we already know that it failed. Don't keep trying to + // read it or we could end up in a recursive loop. + if((i_opType == DeviceFW::READ) && + (i_offset == MMIOEXP_SCOM2OFFSET(EXPLR_MMIO_MMIOERR))) + { + break; + } + auto l_reqSize = sizeof(l_errAddr); + l_err = DeviceFW::deviceRead( + i_expTarget, + &l_errAddr, + l_reqSize, + DEVICE_SCOM_ADDRESS(EXPLR_MMIO_MMIOERR)); + if(l_err) + { + TRACFCOMP(g_trac_mmio, ERR_MRK + "checkExpError: getscom(MMIOERR) failed." + " huid[0x%08x]", get_huid(i_expTarget)); + break; + } + l_regStr = "MMIOERR"; + } + + // Check if error address from explorer is zero, meaning that + // explorer did not detect an error. + if(l_errAddr == 0) + { + l_errorAddressIsZero = true; + } + + // Check if 35-bit error address is outside our transaction + // access range + const uint64_t l_errAddr35 = l_errAddr & MASK_35BITS; + if((l_errAddr35 < l_mmioAddr35Lo) || + (l_errAddr35 >= l_mmioAddr35Hi)) + { + TRACDCOMP(g_trac_mmio, + "checkExpError: %s: 0x%09llx is not between 0x%09llx and" + " 0x%09llx on huid[0x%08x]", + l_regStr, l_errAddr, l_mmioAddr35Lo, + l_mmioAddr35Hi, get_huid(i_expTarget)); + // Error address is outside our transaction range so this error + // was not caused by our transaction. + break; + } + + TRACFCOMP(g_trac_mmio, ERR_MRK + "checkExpError: %s: 0x%09llx is between 0x%09llx and" + " 0x%09llx on huid[0x%08x]", + l_regStr, l_errAddr35, l_mmioAddr35Lo, + l_mmioAddr35Hi, get_huid(i_expTarget)); + l_errorAddressMatches = true; + + // NOTE: These registers cannot be cleared without resetting the chip. + + }while(0); + + o_errorAddressMatches = l_errorAddressMatches; + o_errorAddressIsZero = l_errorAddressIsZero; + return l_err; +} + +/******************************************************************************* + * + * See header file for comments + */ +errlHndl_t determineExpCallouts(const TargetHandle_t i_expTarget, + const uint64_t i_offset, + DeviceFW::OperationType i_opType, + errlHndl_t i_err, + bool& o_fwFailure) +{ + bool l_fwFailure = false; //default to a hw failure + errlHndl_t l_err = nullptr; + size_t l_reqSize = 0; + ERRORLOG::ErrlUserDetailsLogRegister l_regDump(i_expTarget); + + do + { + // If the transaction was a read to any of these error registers, + // that we're about to read then we know that it already failed. + // Don't keep trying to read it or we could end up in a recursive + // loop. + if(i_opType == DeviceFW::READ) + { + switch(i_offset) + { + case MMIOEXP_SCOM2OFFSET(EXPLR_MMIO_MCFGERR): + case MMIOEXP_SCOM2OFFSET(EXPLR_MMIO_MCFGERRA): + case MMIOEXP_SCOM2OFFSET(EXPLR_MMIO_MMIOERR): + case MMIOEXP_SCOM2OFFSET(EXPLR_TP_MB_UNIT_TOP_PIB2GIF_ERROR_REG): + case MMIOEXP_SCOM2OFFSET(EXPLR_TP_MB_UNIT_TOP_GIF2PCB_ERROR_REG): + case MMIOEXP_SCOM2OFFSET(EXPLR_MMIO_MFIR): + case MMIOEXP_SCOM2OFFSET(EXPLR_MMIO_MFIRWOF): + TRACFCOMP(g_trac_mmio, + "determineExpCallouts: recursive loop detected:" + " OCMB[0x%08x] offset[0x%016llx]", + TARGETING::get_huid(i_expTarget), i_offset); + /*@ + * @errortype + * @moduleid MMIO::MOD_DETERMINE_EXP_CALLOUTS + * @reasoncode MMIO::RC_BAD_MMIO_READ + * @userdata1 OCMB huid + * @userdata2 Address offset + * @devdesc OCMB MMIO read failed + * @custdesc Unexpected memory subsystem firmware + * error. + */ + l_err = new ERRORLOG::ErrlEntry( + ERRORLOG::ERRL_SEV_UNRECOVERABLE, + MMIO::MOD_DETERMINE_EXP_CALLOUTS, + MMIO::RC_BAD_MMIO_READ, + TARGETING::get_huid(i_expTarget), + i_offset, + ERRORLOG::ErrlEntry::NO_SW_CALLOUT); + break; + + default: + break; + } + if(l_err) + { + break; + } + } + + // Check if this is an access to config space + if(i_offset < mss::exp::ib::EXPLR_IB_MMIO_OFFSET) + { + mcfgerrReg_t l_reg; + + TRACFCOMP(g_trac_mmio, + "determineExpCallouts: getting callouts for failed config" + " space transaction on OCMB[0x%08x]", get_huid(i_expTarget)); + + // Read the Explorer MCFGERR register + // NOTE: This register is not clearable + l_reqSize = sizeof(l_reg.word64); + l_err = DeviceFW::deviceRead( + i_expTarget, + &l_reg.word64, + l_reqSize, + DEVICE_SCOM_ADDRESS(EXPLR_MMIO_MCFGERR)); + if(l_err) + { + TRACFCOMP(g_trac_mmio, ERR_MRK + "determineExpCallouts: getscom(MCFGERR) failed" + " on OCMB[0x%08x]", get_huid(i_expTarget)); + break; + } + + TRACFCOMP(g_trac_mmio, + "determineExpCallouts: MCFGERR: 0x%016llx on" + " OCMB[0x%08x]", l_reg.word64, get_huid(i_expTarget)); + + // Extract the OCAPI response code from the register + switch(l_reg.resp_code) + { + // Firmware Errors + case OCAPI_UNSUPPORTED_OP_LENGTH: + case OCAPI_BAD_ADDRESS: + l_fwFailure = true; + break; + + // This one could be caused by a bad address (FW) if there is + // a device/function mismatch. Otherwise, it's bad HW. + case OCAPI_FAILED: + if(l_reg.dev_func_mismatch) + { + l_fwFailure = true; + break; + } + break; + + // Everything else is HW failure + default: + break; + } + + // Dump some regs specific to config failures + l_regDump.addDataBuffer(&l_reg.word64, sizeof(l_reg.word64), + DEVICE_SCOM_ADDRESS(EXPLR_MMIO_MCFGERR)); + l_regDump.addData(DEVICE_SCOM_ADDRESS(EXPLR_MMIO_MCFGERRA)); + break; + } + + // We were accessing a SCOM reg, MSCC reg, or SRAM + + pib2gifErrorReg_t l_pib2gif; + gif2pcbErrorReg_t l_gif2pcb; + + TRACFCOMP(g_trac_mmio, + "determineExpCallouts: getting callouts for failed MMIO space" + " transaction on OCMB[0x%08x]", get_huid(i_expTarget)); + + // Read the PIB2GIF error reg + // NOTE: This register is ONLY accessible through MMIO path, not I2C. + l_reqSize = sizeof(l_pib2gif.word64); + l_err = DeviceFW::deviceRead( + i_expTarget, + &l_pib2gif.word64, + l_reqSize, + DEVICE_SCOM_ADDRESS(EXPLR_TP_MB_UNIT_TOP_PIB2GIF_ERROR_REG)); + if(l_err) + { + TRACFCOMP(g_trac_mmio, ERR_MRK + "determineExpCallouts: getscom(PIB2GIF_ERROR_REG) failed" + " on OCMB[0x%08x]", get_huid(i_expTarget)); + break; + } + + TRACFCOMP(g_trac_mmio, + "determineExpCallouts: PIB2GIF_ERROR_REG: 0x%016llx" + " on OCMB[0x%08x]", l_pib2gif.word64, get_huid(i_expTarget)); + + // The pib2gif error register contains a copy of the gif2pcb error reg. + // No need to read it again, just copy it into our struct. + l_gif2pcb.word64 = 0; + l_gif2pcb.used_bits = l_pib2gif.gif2pcb_error; + + TRACFCOMP(g_trac_mmio, + "determineExpCallouts: GIF2PCB_ERROR_REG: 0x%016llx" + " on OCMB[0x%08x]", l_gif2pcb.word64, get_huid(i_expTarget)); + + // Check for software errors + if((l_pib2gif.invalid_address_error) || + (l_gif2pcb.invalid_access) || + (l_gif2pcb.pcb_err_code == PCB_INVALID_ADDRESS)) + { + l_fwFailure = true; + } + + // dump some regs specific to MMIO failures + l_regDump.addDataBuffer(&l_pib2gif.word64, sizeof(l_pib2gif.word64), + DEVICE_SCOM_ADDRESS(EXPLR_TP_MB_UNIT_TOP_PIB2GIF_ERROR_REG)); + l_regDump.addData( + DEVICE_SCOM_ADDRESS(EXPLR_TP_MB_UNIT_TOP_GIF2PCB_ERROR_REG)); + l_regDump.addData(DEVICE_SCOM_ADDRESS(EXPLR_MMIO_MMIOERR)); + break; + }while(0); + + if(!l_err) + { + // Dump some registers common to both types of transaction types + l_regDump.addData(DEVICE_SCOM_ADDRESS(EXPLR_MMIO_MFIR)); + l_regDump.addData(DEVICE_SCOM_ADDRESS(EXPLR_MMIO_MFIRWOF)); + + // Add our register dump to the error log. + l_regDump.addToLog(i_err); + } + + // Notify caller of HW or FW failure + o_fwFailure = l_fwFailure; + return l_err; +} + +}; // End MMIOEXP namespace diff --git a/src/usr/mmio/mmio_explorer.H b/src/usr/mmio/mmio_explorer.H new file mode 100644 index 000000000..a7b6b1d04 --- /dev/null +++ b/src/usr/mmio/mmio_explorer.H @@ -0,0 +1,90 @@ +/* IBM_PROLOG_BEGIN_TAG */ +/* This is an automatically generated prolog. */ +/* */ +/* $Source: src/usr/mmio/mmio_explorer.H $ */ +/* */ +/* OpenPOWER HostBoot Project */ +/* */ +/* Contributors Listed Below - COPYRIGHT 2019 */ +/* [+] International Business Machines Corp. */ +/* */ +/* */ +/* Licensed under the Apache License, Version 2.0 (the "License"); */ +/* you may not use this file except in compliance with the License. */ +/* You may obtain a copy of the License at */ +/* */ +/* http://www.apache.org/licenses/LICENSE-2.0 */ +/* */ +/* Unless required by applicable law or agreed to in writing, software */ +/* distributed under the License is distributed on an "AS IS" BASIS, */ +/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */ +/* implied. See the License for the specific language governing */ +/* permissions and limitations under the License. */ +/* */ +/* IBM_PROLOG_END_TAG */ +#ifndef __MMIO_EXPLORER_H +#define __MMIO_EXPLORER_H + +/** @file mmio_explorer.H + * @brief Provides interface to perform Explorer MMIO operations + */ + +#include <errl/errlentry.H> +#include <targeting/common/target.H> + +namespace MMIOEXP +{ + +/** + * + * @brief Determine if the OCMB detected a failure on a specific MMIO + * transaction to the specified OCMB target. + * + * @param[in] i_expTarget Handle for the target OCMB chip. + * @param[in] i_va Virtual address of the transaction to check + * @param[in] i_accessLimit The byte range of the transaction + * @param[in] i_offset The offset of the config reg, scom reg, MSCC reg or + * SRAM to be accessed on the explorer chip. + * @param[in] i_opType The operation type (read or write) + * @param[out] o_errorAddressMatches Set to true if the OCMB chip detected a + * failure on our transaction. + * @param[out] o_errorAddressIsZero Set to true if no error has been detected + * yet. + * @return nullptr on succesful read of OCMB error status, non-null otherwise. + * + */ +errlHndl_t checkExpError(const TARGETING::TargetHandle_t i_expTarget, + const uint64_t i_va, + const uint64_t i_accessLimit, + const uint64_t i_offset, + DeviceFW::OperationType i_opType, + bool& o_errorAddressMatches, + bool& o_errorAddressIsZero); + +/** + * + * @brief Collect additional failure data from the target explorer chip and add + * appropriate FRU/Procedure callouts. + * + * @note Must call checkExpError to determine that a transaction failed before + * calling this function. + * + * @param[in] i_expTarget Handle of explorer to collect extra FFDC from + * @param[in] i_offset The offset of the config reg, scom reg, MSCC reg or + * SRAM that was accessed on the explorer chip. + * @param[in] i_opType The operation type (read or write) + * @param[in] i_err There error log for adding additional FFDC + * @param[out] o_fwFailure The failure was a firmware failure if true, + * otherwise, it was a hardware failure. + * + * @return non-nullptr if unable to determine failure type, nullptr otherwise. + */ +errlHndl_t determineExpCallouts(const TARGETING::TargetHandle_t i_expTarget, + const uint64_t i_offset, + DeviceFW::OperationType i_opType, + errlHndl_t i_err, + bool& o_fwFailure); + +}; // End MMIOEXP namespace + +#endif diff --git a/src/usr/mmio/test/makefile b/src/usr/mmio/test/makefile index 133f3ca44..bb5ffe21d 100644 --- a/src/usr/mmio/test/makefile +++ b/src/usr/mmio/test/makefile @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2011,2018 +# Contributors Listed Below - COPYRIGHT 2011,2019 # [+] International Business Machines Corp. # # @@ -25,6 +25,10 @@ ROOTPATH = ../../../.. MODULE = testmmio + +EXTRAINCDIR += ${ROOTPATH}/src/import/chips/ocmb/explorer/common/include/ + + TESTS = *.H diff --git a/src/usr/mmio/test/mmiotest.H b/src/usr/mmio/test/mmiotest.H index f7abd7816..034e2f052 100644 --- a/src/usr/mmio/test/mmiotest.H +++ b/src/usr/mmio/test/mmiotest.H @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2011,2018 */ +/* Contributors Listed Below - COPYRIGHT 2011,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -27,9 +27,13 @@ #include <errl/errlentry.H> #include <limits.h> #include <devicefw/driverif.H> -#include <mmio/mmio.H> +#include "../mmio.H" +#include <targeting/common/utilFilter.H> +#include <explorer_scom_addresses.H> +#include <exp_oc_regs.H> -extern trace_desc_t* g_trac_mmio; +static const uint64_t EXPLR_IB_CONFIG_OFFSET = 0x0000000000000000ull; +static const uint64_t EXPLR_IB_MMIO_OFFSET = 0x0000000100000000ull; // 4GB class MmioTest : public CxxTest::TestSuite { @@ -40,70 +44,66 @@ class MmioTest : public CxxTest::TestSuite */ void test_Valid(void) { - TRACFCOMP( g_trac_mmio, "MmioTest::test_Valid> Start" ); + TS_TRACE("MmioTest::test_Valid> Start" ); uint64_t fails = 0; uint64_t total = 0; errlHndl_t l_err = nullptr; - uint64_t regdata = 0; - size_t op_size = sizeof(uint64_t); + uint32_t regdata4 = 0; + size_t op_size = 0; -// TODO RTC 202533 - enable this test once the Axone model is IPLing -// successfully in Simics. -#if 0 // Get OCMB target, return if there is no OCMB TARGETING::TargetHandle_t ocmb_target = nullptr; TARGETING::TargetHandleList ocmb_target_list; - getAllChips(ocmb_target_list, TARGETING::TYPE_OCMB_CHIP); + TARGETING::getAllChips(ocmb_target_list, TARGETING::TYPE_OCMB_CHIP); if (ocmb_target_list.size() == 0) { - TRACFCOMP(g_trac_fsiscom, "MmioTest::test_Valid> Target is NULL"); + TS_TRACE("MmioTest::test_Valid> Target is NULL"); TS_INFO("MmioTest::test_Valid> Target is NULL"); return; } ocmb_target = ocmb_target_list[0]; - // read + // valid read from config space register ++total; - l_err = MMIO::mmioPerformOp( - DeviceFW::READ, + op_size = sizeof(regdata4); + l_err = DeviceFW::deviceRead( ocmb_target, - ®data, + ®data4, op_size, - 0x0, - op_size); + DEVICE_MMIO_ADDRESS((EXPLR_IB_CONFIG_OFFSET | + EXPLR_OC_O0MBIT_O0DID_LSB), + op_size)); if(l_err != nullptr) { - TRACFCOMP(g_trac_mmio, - "MmioTest::test_Valid> Error for read, RC=0x%04X", + TS_TRACE("MmioTest::test_Valid> Error for read, RC=0x%04X", ERRL_GETRC_SAFE(l_err)); TS_FAIL("MmioTest::test_Valid> Error for read, RC=0x%04X", ERRL_GETRC_SAFE(l_err)); ++fails; - errlCommit(l_err, MMIO_COMP_ID); + errlCommit(l_err, CXXTEST_COMP_ID); } - // write + // valid write to config space register ++total; - l_err = MMIO::mmioPerformOp( - DeviceFW::WRITE, + op_size = sizeof(regdata4); + l_err = DeviceFW::deviceWrite( ocmb_target, - ®data, + ®data4, op_size, - 0x08, - op_size); + DEVICE_MMIO_ADDRESS((EXPLR_IB_CONFIG_OFFSET | + EXPLR_OC_O0CCD_LSB), + op_size)); if(l_err != nullptr) { - TRACFCOMP(g_trac_mmio, - "MmioTest::test_Valid> Error for write, RC=0x%04X", - ERRL_GETRC_SAFE(l_err)); + TS_TRACE("MmioTest::test_Valid> Error for write, RC=0x%04X", + ERRL_GETRC_SAFE(l_err)); TS_FAIL("MmioTest::test_Valid> Error for write, RC=0x%04X", ERRL_GETRC_SAFE(l_err)); ++fails; - errlCommit(l_err, MMIO_COMP_ID); + errlCommit(l_err, CXXTEST_COMP_ID); } -#endif - TRACFCOMP(g_trac_mmio, "Mmio::test_Valid> %d/%d fails", fails, total); + TS_TRACE("Mmio::test_Valid> %d/%d fails", fails, total); }; }; |