diff options
Diffstat (limited to 'src/usr')
-rw-r--r-- | src/usr/hwas/hostbootIstep.C | 6 | ||||
-rw-r--r-- | src/usr/hwpf/hwp/dmi_training/dmi_training.C | 45 | ||||
-rwxr-xr-x | src/usr/i2c/i2c.C | 2 | ||||
-rw-r--r-- | src/usr/ibscom/ibscom.C | 475 | ||||
-rw-r--r-- | src/usr/ibscom/ibscom.H | 18 | ||||
-rw-r--r-- | src/usr/ibscom/test/ibscomtest.H | 272 | ||||
-rw-r--r-- | src/usr/scom/scom.C | 25 | ||||
-rw-r--r-- | src/usr/testcore/rtloader/loader.H | 2 | ||||
-rw-r--r-- | src/usr/xscom/piberror.C | 9 |
9 files changed, 670 insertions, 184 deletions
diff --git a/src/usr/hwas/hostbootIstep.C b/src/usr/hwas/hostbootIstep.C index 8e10c5de4..adeec948b 100644 --- a/src/usr/hwas/hostbootIstep.C +++ b/src/usr/hwas/hostbootIstep.C @@ -41,6 +41,7 @@ #include <targeting/attrsync.H> #include <diag/prdf/prdfMain.H> #include <intr/interrupt.H> +#include <ibscom/ibscomif.H> namespace HWAS { @@ -231,6 +232,9 @@ void* host_prd_hwreconfig( void *io_pArgs ) errlHndl_t errl = NULL; + // Flip the scom path back to FSI in case we enabled IBSCOM previously + IBSCOM::enableInbandScoms(IBSCOM_DISABLE); + // Call PRDF to remove non-function chips from its system model errl = PRDF::refresh(); @@ -239,7 +243,7 @@ void* host_prd_hwreconfig( void *io_pArgs ) TRACFCOMP(ISTEPS_TRACE::g_trac_isteps_trace, "host_prd_hwreconfig ERROR 0x%.8X returned from" " call to PRDF::refresh", errl->reasonCode()); - } + } TRACDCOMP( ISTEPS_TRACE::g_trac_isteps_trace, "host_prd_hwreconfig exit" ); diff --git a/src/usr/hwpf/hwp/dmi_training/dmi_training.C b/src/usr/hwpf/hwp/dmi_training/dmi_training.C index cc680cb12..64c08030e 100644 --- a/src/usr/hwpf/hwp/dmi_training/dmi_training.C +++ b/src/usr/hwpf/hwp/dmi_training/dmi_training.C @@ -75,6 +75,7 @@ #include <erepairAccessorHwpFuncs.H> #include "dmi_io_dccal/dmi_io_dccal.H" #include <pbusLinkSvc.H> +#include <ibscom/ibscomif.H> namespace DMI_TRAINING { @@ -1244,49 +1245,7 @@ void* call_cen_set_inband_addr( void *io_pArgs ) } //Now enable Inband SCOM for all membuf chips. - TARGETING::TargetHandleList membufChips; - getAllChips(membufChips, TYPE_MEMBUF, true); - - TARGETING::Target * sys = NULL; - TARGETING::targetService().getTopLevelTarget(sys); - - for(uint32_t i=0; i<membufChips.size(); i++) - { - // If the membuf chip supports IBSCOM AND.. - // (Chip is >=DD20 OR IBSCOM Override is set) - if ((membufChips[i]->getAttr<ATTR_PRIMARY_CAPABILITIES>() - .supportsInbandScom) && - (// TODO: RTC 68984: Disable IBSCOM for now (membufChips[i]->getAttr<TARGETING::ATTR_EC>() >= 0x20) || - (sys->getAttr<TARGETING::ATTR_IBSCOM_ENABLE_OVERRIDE>() != 0)) - ) - { - ScomSwitches l_switches = - membufChips[i]->getAttr<ATTR_SCOM_SWITCHES>(); - - // If Inband Scom is not already enabled. - if ((l_switches.useInbandScom != 1) || - (l_switches.useFsiScom != 0)) - { - l_switches.useFsiScom = 0; - l_switches.useInbandScom = 1; - - // Turn off FSI scom and turn on Inband Scom. - membufChips[i]->setAttr<ATTR_SCOM_SWITCHES>(l_switches); - - TRACFCOMP(ISTEPS_TRACE::g_trac_isteps_trace, - "Enable IBSCOM on target HUID %.8X", - TARGETING::get_huid(membufChips[i])); - } - } - else - { - TRACFCOMP(ISTEPS_TRACE::g_trac_isteps_trace, - "IBSCOM NOT enabled on target HUID %.8X", - TARGETING::get_huid(membufChips[i])); - - } - } - + IBSCOM::enableInbandScoms(); }while(0); TRACDCOMP( ISTEPS_TRACE::g_trac_isteps_trace, diff --git a/src/usr/i2c/i2c.C b/src/usr/i2c/i2c.C index 5dbbf892f..8c2656f18 100755 --- a/src/usr/i2c/i2c.C +++ b/src/usr/i2c/i2c.C @@ -1383,6 +1383,7 @@ errlHndl_t i2cSetupMasters ( void ) mode.bit_rate_div = io_args.bit_rate_divisor; + size = sizeof(uint64_t); err = deviceWrite( centList[centaur], &mode.value, size, @@ -1490,6 +1491,7 @@ errlHndl_t i2cSetupMasters ( void ) mode.bit_rate_div = io_args.bit_rate_divisor; + size = sizeof(uint64_t); err = deviceWrite( procList[proc], &mode.value, size, diff --git a/src/usr/ibscom/ibscom.C b/src/usr/ibscom/ibscom.C index 874f8f98c..abbc3f3c7 100644 --- a/src/usr/ibscom/ibscom.C +++ b/src/usr/ibscom/ibscom.C @@ -41,6 +41,9 @@ #include <limits.h> #include <errl/errludtarget.H> #include <xscom/piberror.H> +#include <diag/attn/attn.H> +#include <ibscom/ibscomif.H> +#include <targeting/common/utilFilter.H> // Easy macro replace for unit testing //#define TRACUCOMP(args...) TRACFCOMP(args) @@ -48,13 +51,16 @@ // Trace definition trace_desc_t* g_trac_ibscom = NULL; -TRAC_INIT(&g_trac_ibscom, "IBSCOM", KILOBYTE); +TRAC_INIT(&g_trac_ibscom, IBSCOM_COMP_NAME, KILOBYTE); using namespace ERRORLOG; using namespace TARGETING; namespace IBSCOM { +// SCOM Register addresses +const uint32_t MBS_FIR = 0x02011400; +const uint32_t MBSIBERR0 = 0x0201141B; // Register XSCcom access functions to DD framework DEVICE_REGISTER_ROUTE(DeviceFW::WILDCARD, @@ -296,6 +302,109 @@ errlHndl_t getTargetVirtualAddress(Target* i_target, return l_err; } +/////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////// +void err_cleanup(Target* i_target, + uint64_t i_addr) +{ + //Going to commit at most 1 informational error here + errlHndl_t l_err = NULL; + errlHndl_t tmp_err = NULL; + ERRORLOG::ErrlUserDetailsLogRegister l_logReg(i_target); + + uint64_t zeroData = 0x0; + size_t op_size = sizeof(uint64_t); + + // Clear our the status reg + op_size = sizeof(uint64_t); + tmp_err = deviceOp( DeviceFW::WRITE, + i_target, + &zeroData, + op_size, + DEVICE_FSISCOM_ADDRESS(MBSIBERR0) ); + if(tmp_err) + { + if( l_err ) + { + delete tmp_err; + } + else + { + l_err = tmp_err; + } + + //Really just want to save the address, so stick in some + //obvious dummy data + uint64_t dummyData = 0x00000000DEADBEEF; + l_logReg.addDataBuffer(&dummyData, sizeof(dummyData), + DEVICE_IBSCOM_ADDRESS(MBSIBERR0)); + } + + // Clear out the FIR bits we might trigger + uint64_t mbs_fir = 0; + op_size = sizeof(uint64_t); + tmp_err = deviceOp( DeviceFW::READ, + i_target, + &mbs_fir, + op_size, + DEVICE_FSISCOM_ADDRESS(MBS_FIR) ); + if(tmp_err) + { + if( l_err ) + { + delete tmp_err; + } + else + { + l_err = tmp_err; + } + + //Really just want to save the address, so stick in some + //obvious dummy data + uint64_t dummyData = 0x10000000DEADBEEF; + l_logReg.addDataBuffer(&dummyData, sizeof(dummyData), + DEVICE_IBSCOM_ADDRESS(MBS_FIR)); + } + + //22=MBS_FIR_MASK_REG_HOST_INBAND_READ_ERROR + //23=MBS_FIR_MASK_REG_HOST_INBAND_WRITE_ERROR + mbs_fir &= 0xFFFFFCFFFFFFFFFF; + op_size = sizeof(uint64_t); + l_err = deviceOp( DeviceFW::WRITE, + i_target, + &mbs_fir, + op_size, + DEVICE_FSISCOM_ADDRESS(MBS_FIR) ); + if(tmp_err) + { + if( l_err ) + { + delete tmp_err; + } + else + { + l_err = tmp_err; + } + + //Really just want to save the address, so stick in some + //obvious dummy data + uint64_t dummyData = 0x20000000DEADBEEF; + l_logReg.addDataBuffer(&dummyData, sizeof(dummyData), + DEVICE_IBSCOM_ADDRESS(MBS_FIR)); + } + + if( l_err ) + { + l_logReg.addToLog(l_err); + + //force to informational so we don't log extra errors + //inside of possible error collection paths + l_err->setSev(ERRORLOG::ERRL_SEV_INFORMATIONAL); + errlCommit(l_err,IBSCOM_COMP_ID); + l_err = NULL; + } +} + /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// @@ -351,6 +460,35 @@ errlHndl_t doIBScom(DeviceFW::OperationType i_opType, l_mutex = i_target->getHbMutexAttr<TARGETING::ATTR_IBSCOM_MUTEX>(); mutex_lock(l_mutex); need_unlock = true; + + //Need to check if ibscom is still enabled before moving on in + //case we flipped the switch due to an error + ScomSwitches l_switches = i_target->getAttr<ATTR_SCOM_SWITCHES>(); + if( !l_switches.useInbandScom ) + { + TRACFCOMP(g_trac_ibscom, ERR_MRK"doIBScom> IBSCOM longer enabled on %.8X, error must have occurred", get_huid(i_target)); + /*@ + * @errortype + * @moduleid IBSCOM_DO_IBSCOM + * @reasoncode IBSCOM_RETRY_DUE_TO_ERROR + * @userdata1[0:31] HUID of Centaur Target + * @userdata1[32:64] SCOM Address + * @userdata2 Not Used + * @devdesc Previous error disabled ibscom, so forcing + * a retry via FSI + */ + l_err = + new ErrlEntry(ERRL_SEV_UNRECOVERABLE, + IBSCOM_DO_IBSCOM, + IBSCOM_RETRY_DUE_TO_ERROR, + get_huid(i_target), + i_addr); + //This error should NEVER get returned to caller, so it's a + //FW bug if it actually gets comitted. + l_err->addProcedureCallout(HWAS::EPUB_PRC_HB_CODE, + HWAS::SRCI_PRIORITY_HIGH); + break; + } } if (i_opType == DeviceFW::READ) @@ -391,6 +529,8 @@ errlHndl_t doIBScom(DeviceFW::OperationType i_opType, //FW bug if it actually gets comitted. l_err->addProcedureCallout(HWAS::EPUB_PRC_HB_CODE, HWAS::SRCI_PRIORITY_HIGH); + ERRORLOG::ErrlUserDetailsTarget(i_target,"IBSCOM Target") + .addToLog(l_err); break; } else @@ -416,20 +556,19 @@ errlHndl_t doIBScom(DeviceFW::OperationType i_opType, l_virtAddr[i_addr] = l_data; eieio(); + //Workaround for HW264203 + //A read of MBSIBWRSTAT will not trigger a SUE so we need to + //read the MBS_FIR instead. TRACDCOMP(g_trac_ibscom, - "doIBScom: Read MBSIBWRSTAT to check for error"); - //Read MBSIBWRSTAT to check for errors - //If an error occured on last write, reading MBSIBWRSTAT will - //trigger a SUE. - const uint32_t MBSIBWRSTAT = 0x201141D; - uint64_t statData = 0; + "doIBScom: Read MBS_FIR to check for error"); + uint64_t fir_data = 0; size_t readSize = sizeof(uint64_t); l_err = doIBScom(DeviceFW::READ, - i_target, - &statData, - readSize, - MBSIBWRSTAT, - true); + i_target, + &fir_data, + readSize, + MBS_FIR, + true); if(l_err != NULL) { if( IBSCOM_SUE_IN_ERR_PATH == l_err->reasonCode() ) @@ -446,103 +585,122 @@ errlHndl_t doIBScom(DeviceFW::OperationType i_opType, break; } } + else + { + TRACUCOMP(g_trac_ibscom, "doIBScom: MBS_FIR=%.16X",fir_data); + //check the FIR bits specifically + //23 = MBS_FIR_MASK_REG_HOST_INBAND_WRITE_ERROR: A PIB error + // or inband buffer error was detected on a host inband + // write operation. + if( fir_data & 0x0000010000000000 ) + { + TRACFCOMP(g_trac_ibscom, ERR_MRK" doIBScom: MBS_FIR[23] detected after write : %.16X", fir_data); + rw_error = true; + } + } } + // Common error checking for both read and write if(rw_error) { bool busDown = false; TRACUCOMP(g_trac_ibscom, "doIBScom: Get Error data, read MBSIBERR0"); - const uint32_t MBSIBERR0 = 0x201141B; - const uint64_t HOST_ERROR_VALID = 0x0000000080000000; - const uint64_t PIB_ERROR_STATUS_MASK = 0x0000000070000000; - const uint64_t PIB_ERROR_SHIFT = 28; - size_t readSize = sizeof(uint64_t); - uint64_t mbsiberr0_data = 0; + size_t op_size = sizeof(uint64_t); + + // Note: Using FSISCOM path to read the errors even though + // we could use IBSCOM in DD2 because it makes code simpler + + MBSIBERRO_Reg_t mbsiberr0; + op_size = sizeof(uint64_t); + l_err = deviceOp( DeviceFW::READ, + i_target, + &(mbsiberr0.data), + op_size, + DEVICE_FSISCOM_ADDRESS(MBSIBERR0) ); + if(l_err) + { + TRACFCOMP(g_trac_ibscom, ERR_MRK + "doIBScom: Error reading MBSIBERR0 over FSI"); + //Save away the IBSCOM address + ERRORLOG::ErrlUserDetailsLogRegister l_logReg(i_target); + //Really just want to save the address, so stick in some + //obvious dummy data + uint64_t dummyData = 0x30000000DEADBEEF; + l_logReg.addDataBuffer(&dummyData, sizeof(dummyData), + DEVICE_IBSCOM_ADDRESS(i_addr)); + l_logReg.addToLog(l_err); + + //force to informational so we don't log extra errors + //inside of possible error collection paths + l_err->setSev(ERRORLOG::ERRL_SEV_INFORMATIONAL); + errlCommit(l_err,IBSCOM_COMP_ID); + l_err = NULL; + + //fabricate some error data + mbsiberr0.addr = i_addr; + mbsiberr0.errvalid = 1; + mbsiberr0.piberr = 0; + mbsiberr0.iswrite = (i_opType == DeviceFW::READ) ? 0 : 1; + mbsiberr0.reserved = 0xBADBAD; + } + + TRACUCOMP(g_trac_ibscom, + "doIBScom: MBSIBERR0(0x%.16x) = 0x%.16X", + MBSIBERR0, mbsiberr0.data); - //Use FSISCOM as workaround for DD1.x centaur chips (HW246298) - if(i_target->getAttr<TARGETING::ATTR_EC>() < 0x20) + //if the MBSIBERR0Q_IB_HOST_ERROR_VALID bit is not set + // then we have a bus failure + if( !(mbsiberr0.errvalid) ) { - //Need to explicitly use FSI SCOM in DD1X chips - l_err = deviceOp( DeviceFW::READ, - i_target, - &mbsiberr0_data, - readSize, - DEVICE_FSISCOM_ADDRESS(MBSIBERR0) ); - if(l_err) - { - TRACFCOMP(g_trac_ibscom, ERR_MRK - "doIBScom: Error reading MBSIBERR0 over FSI"); - //Save away the IBSCOM address - ERRORLOG::ErrlUserDetailsLogRegister - l_logReg(i_target); - //Really just want to save the addres, so stick in some - //obvious dummy data - uint64_t dummyData = 0x00000000DEADBEEF; - l_logReg.addDataBuffer(&dummyData, sizeof(dummyData), - DEVICE_IBSCOM_ADDRESS(i_addr)); - l_logReg.addToLog(l_err); - break; - } - TRACUCOMP(g_trac_ibscom, - "doIBScom: MBSIBERR0(0x%.16x) = 0x%.16X", - MBSIBERR0, mbsiberr0_data); + //Bus is down + busDown = true; + } + //confirm that we are looking at error data for the scom we did + //0:31 = MBSIBERR0Q_IB_HOST_ADDRESS: This is the 32 bit scom + // address that was being accessed when the error was detected. + else if( mbsiberr0.addr != i_addr ) + { + TRACFCOMP( g_trac_ibscom, "doIBScom> The address in MBSIBERR0 (0x%.8X) doesn't match what we were scomming (0x%.8X)", mbsiberr0.addr, i_addr ); + /*@ + * @errortype + * @moduleid IBSCOM_DO_IBSCOM + * @reasoncode IBSCOM_WRONG_ERROR + * @userdata1[0:31] HUID of Centaur Target + * @userdata1[32:64] SCOM Address + * @userdata2 Contents of MBSIBERR0 register + * @devdesc Detected error doesn't match the address + * we failed on + */ + l_err = new ErrlEntry(ERRL_SEV_UNRECOVERABLE, + IBSCOM_DO_IBSCOM, + IBSCOM_WRONG_ERROR, + TWO_UINT32_TO_UINT64( + get_huid(i_target), + i_addr), + mbsiberr0.data); + // this would be a code bug because we got out of sync somehow + l_err->addProcedureCallout( HWAS::EPUB_PRC_HB_CODE, + HWAS::SRCI_PRIORITY_HIGH ); + ERRORLOG::ErrlUserDetailsTarget(i_target,"IBSCOM Target") + .addToLog(l_err); + ERRORLOG::ErrlUserDetailsLogRegister ffdc(i_target); + ffdc.addData(DEVICE_FSISCOM_ADDRESS(MBS_FIR)); + ffdc.addData(DEVICE_FSISCOM_ADDRESS(MBSIBERR0)); + ffdc.addToLog(l_err); + l_err->collectTrace(IBSCOM_COMP_NAME); //attempt to clear the error register so future accesses //will work - uint64_t zeroData = 0x0; - readSize = sizeof(uint64_t); - l_err = deviceOp( DeviceFW::WRITE, - i_target, - &zeroData, - readSize, - DEVICE_FSISCOM_ADDRESS(MBSIBERR0) ); - if(l_err ) - { - errlCommit(l_err,IBSCOM_COMP_ID); - l_err = NULL; - } + err_cleanup(i_target,i_addr); - //if the MBSIBERR0Q_IB_HOST_ERROR_VALID bit is not set - // then we have a bus failure - if( !(mbsiberr0_data & HOST_ERROR_VALID) ) - { - //Bus is down - busDown = true; - } + break; } - else // >= DD20 - { - //TODO RTC: 68984: Validate error path on DD2.0 Centaurs - l_err = doIBScom(DeviceFW::READ, - i_target, - &mbsiberr0_data, - readSize, - MBSIBERR0, - true); - if(l_err != NULL) - { - if( IBSCOM_SUE_IN_ERR_PATH == l_err->reasonCode() ) - { - TRACFCOMP(g_trac_ibscom, ERR_MRK - "doIBScom: SUE on write detected"); - delete l_err; - l_err = NULL; - busDown = true; - } - else - { - TRACFCOMP(g_trac_ibscom, ERR_MRK"doIBScom: Unexpected error when checking for SUE"); - break; - } - } - } // >= DD20 + if(busDown) { - //TODO RTC: 69115 - call PRD to do FIR analysis, return PRD - //error instead. /*@ * @errortype * @moduleid IBSCOM_DO_IBSCOM @@ -553,19 +711,25 @@ errlHndl_t doIBScom(DeviceFW::OperationType i_opType, * @devdesc Bus failure when attempting to perform * IBSCOM operation. IBSCOM disabled. */ - l_err = + errlHndl_t ib_err = new ErrlEntry(ERRL_SEV_UNRECOVERABLE, IBSCOM_DO_IBSCOM, IBSCOM_BUS_FAILURE, TWO_UINT32_TO_UINT64( get_huid(i_target), i_addr), - mbsiberr0_data); + mbsiberr0.data); + + ib_err->addHwCallout(i_target, + HWAS::SRCI_PRIORITY_HIGH, + HWAS::NO_DECONFIG, + HWAS::GARD_NULL); - l_err->addHwCallout(i_target, - HWAS::SRCI_PRIORITY_HIGH, - HWAS::NO_DECONFIG, - HWAS::GARD_NULL); + //grab some HW regs via FSISCOM + ERRORLOG::ErrlUserDetailsLogRegister ffdc(i_target); + ffdc.addData(DEVICE_FSISCOM_ADDRESS(MBS_FIR)); + ffdc.addData(DEVICE_FSISCOM_ADDRESS(MBSIBERR0)); + ffdc.addToLog(l_err); //disable IBSCOM ScomSwitches l_switches = @@ -581,6 +745,33 @@ errlHndl_t doIBScom(DeviceFW::OperationType i_opType, // Turn off IBSCOM and turn on FSI SCOM. i_target->setAttr<ATTR_SCOM_SWITCHES>(l_switches); } + + //@todo: RTC:92971 + //There is a potential deadlock if we call PRD here + //Look for a better PRD error + //errlHndl_t prd_err = ATTN::checkForIplAttentions(); + errlHndl_t prd_err = NULL; + if( prd_err ) + { + TRACFCOMP( g_trac_ibscom, ERR_MRK"Error from checkForIplAttentions : PLID=%X", prd_err->plid() ); + //connect up the plids + ib_err->plid(prd_err->plid()); + //commit my log as info because PRD's log is better + ib_err->setSev(ERRORLOG::ERRL_SEV_INFORMATIONAL); + errlCommit(ib_err,IBSCOM_COMP_ID); + l_err = prd_err; + } + else + { + //my log is the only one + l_err = ib_err; + } + + l_err->collectTrace(IBSCOM_COMP_NAME); + + //Note-not cleaning up the error status here since + // we will not be using IBSCOM again + break; } else // bus isn't down, some other kind of error @@ -597,23 +788,33 @@ errlHndl_t doIBScom(DeviceFW::OperationType i_opType, */ l_err = new ErrlEntry(ERRL_SEV_UNRECOVERABLE, IBSCOM_DO_IBSCOM, - IBSCOM_BUS_FAILURE, + IBSCOM_PIB_FAILURE, TWO_UINT32_TO_UINT64( get_huid(i_target), i_addr), - mbsiberr0_data); + mbsiberr0.data); //Add this target to the FFDC - ERRORLOG::ErrlUserDetailsTarget(i_target).addToLog(l_err); - - uint64_t pib_code = - (mbsiberr0_data & PIB_ERROR_STATUS_MASK) >> PIB_ERROR_SHIFT; + ERRORLOG::ErrlUserDetailsTarget(i_target,"IBSCOM Target") + .addToLog(l_err); //add callouts based on the PIB error PIB::addFruCallouts( i_target, - pib_code, + mbsiberr0.piberr, l_err ); + //grab some HW regs via FSISCOM + ERRORLOG::ErrlUserDetailsLogRegister ffdc(i_target); + ffdc.addData(DEVICE_FSISCOM_ADDRESS(MBS_FIR)); + ffdc.addData(DEVICE_FSISCOM_ADDRESS(MBSIBERR0)); + ffdc.addToLog(l_err); + + l_err->collectTrace(IBSCOM_COMP_NAME); + + //attempt to clear the error register so future accesses + //will work + err_cleanup(i_target,i_addr); + break; } } @@ -659,4 +860,70 @@ errlHndl_t ibscomPerformOp(DeviceFW::OperationType i_opType, return l_err; } + +/** + * @brief Enable or disable Inband SCOMs on all capable chips + */ +void enableInbandScoms( bool i_disable ) +{ + TARGETING::TargetHandleList membufChips; + TARGETING::getAllChips(membufChips, TYPE_MEMBUF, true); + + mutex_t* l_mutex = NULL; + + TARGETING::Target * sys = NULL; + TARGETING::targetService().getTopLevelTarget(sys); + + uint8_t l_override = + sys->getAttr<TARGETING::ATTR_IBSCOM_ENABLE_OVERRIDE>(); + TRACFCOMP(g_trac_ibscom,"IBSCOM_ENABLE_OVERRIDE=%d",l_override); + + for(uint32_t i=0; i<membufChips.size(); i++) + { + TARGETING::Target* mb = membufChips[i]; + + // If the membuf chip supports IBSCOM AND.. + // (Chip is >=DD20 OR IBSCOM Override is set) + if( (mb->getAttr<ATTR_PRIMARY_CAPABILITIES>().supportsInbandScom) + && + ( (mb->getAttr<TARGETING::ATTR_EC>() >= 0x20) || + (l_override != 0) ) + ) + { + //don't mess with attributes without the mutex (just to be safe) + l_mutex = mb->getHbMutexAttr<TARGETING::ATTR_IBSCOM_MUTEX>(); + mutex_lock(l_mutex); + + ScomSwitches l_switches = mb->getAttr<ATTR_SCOM_SWITCHES>(); + + uint8_t ib_new = 1; + uint8_t fsi_new = 0; + if( i_disable == IBSCOM_DISABLE ) + { + ib_new = 0; + fsi_new = 1; + } + + // If Inband Scom enablement changed + if ((l_switches.useInbandScom != ib_new) || + (l_switches.useFsiScom != fsi_new)) + { + l_switches.useFsiScom = fsi_new; + l_switches.useInbandScom = ib_new; + + // Modify attribute + membufChips[i]->setAttr<ATTR_SCOM_SWITCHES>(l_switches); + + TRACFCOMP(g_trac_ibscom, + "IBSCOM=%d on target HUID %.8X", + ib_new, + TARGETING::get_huid(mb)); + } + + mutex_unlock(l_mutex); + } + } +} + + } // end namespace diff --git a/src/usr/ibscom/ibscom.H b/src/usr/ibscom/ibscom.H index 3ba3c34f1..f1742b3e5 100644 --- a/src/usr/ibscom/ibscom.H +++ b/src/usr/ibscom/ibscom.H @@ -67,6 +67,24 @@ errlHndl_t ibscomPerformOp(DeviceFW::OperationType i_opType, int64_t i_accessType, va_list i_args); +/** + * Bit definition for MBSIBERR0 + */ +struct MBSIBERRO_Reg_t +{ + union + { + uint64_t data; + struct + { + uint64_t addr:32; //MBSIBERR0Q_IB_HOST_ADDRESS + uint64_t errvalid:1; //MBSIBERR0Q_IB_HOST_ERROR_VALID + uint64_t piberr:3; //MBSIBERR0Q_IB_HOST_ERROR_STATUS + uint64_t iswrite:1; //MBSIBERR0Q_IB_HOST_WRITE_NOT_READ + uint64_t reserved:27; + }; + }; +}; }; diff --git a/src/usr/ibscom/test/ibscomtest.H b/src/usr/ibscom/test/ibscomtest.H index 44e2d7e66..0e623c771 100644 --- a/src/usr/ibscom/test/ibscomtest.H +++ b/src/usr/ibscom/test/ibscomtest.H @@ -35,6 +35,7 @@ #include <devicefw/userif.H> #include <ibscom/ibscomreasoncodes.H> #include <devicefw/driverif.H> +#include <sys/time.h> extern trace_desc_t* g_trac_ibscom; @@ -86,26 +87,36 @@ class IBscomTest: public CxxTest::TestSuite return; } l_testTarget = *(centaur_list.begin()); + TRACFCOMP(g_trac_ibscom,"test_IBscom> Using target %.8X", TARGETING::get_huid(l_testTarget)); + + ScomSwitches l_switches = l_testTarget->getAttr<ATTR_SCOM_SWITCHES>(); + if( !l_switches.useInbandScom ) + { + TRACFCOMP(g_trac_ibscom,"Skipping test_IBscom because ibscom is not enabled"); + return; + } TRACDCOMP(g_trac_ibscom, - "IBscomTest::test_IBscom> Read orignal data from Centaur"); + "IBscomTest::test_IBscom> Read original data from Centaur"); - const uint64_t addrs[] = {0x0201164F, 0x0301069A}; + const uint64_t addrs[] = {0x02010803/*0:26*/, 0x03010403/*0:21*/}; uint64_t orig_data[2] = {0}; - uint64_t new_data[] = {0x1234567ABABABA00, 0xFEEDB0B0FEDCBA00}; + uint64_t new_data1[] = {0x123456E000000000, 0xFEEDB00000000000}; uint64_t read_data[2] = {0}; uint64_t read_data_fsi[2] = {0}; size_t op_size = sizeof(uint64_t); + const uint64_t junk = 0x1122334455667788; - //Save of initial register content + //Save off initial register content via FSI for(uint32_t i=0; i<2; i++) { op_size = sizeof(uint64_t); - l_err = deviceRead( l_testTarget, - &orig_data[i], - op_size, - DEVICE_SCOM_ADDRESS(addrs[i]) ); + l_err = deviceOp( DeviceFW::READ, + l_testTarget, + &orig_data[i], + op_size, + DEVICE_FSISCOM_ADDRESS(addrs[i]) ); if( l_err ) { TRACFCOMP(g_trac_ibscom,"IBscomTest::test_IBscom> Orig Read: Error from device : addr=0x%X, RC=%X", @@ -116,17 +127,54 @@ class IBscomTest: public CxxTest::TestSuite } total++; + // OR in the original data so we don't clear mask bits + new_data1[i] |= orig_data[i]; + } + + //Read the data with IBSCOM + for(uint32_t i=0; i<2; i++) + { + TRACDCOMP(g_trac_ibscom, + "IBscomTest::test_IBscom> Read data"); + + //reset size + op_size = sizeof(uint64_t); + read_data[i] = junk; + l_err = deviceOp( DeviceFW::READ, + l_testTarget, + &read_data[i], + op_size, + DEVICE_IBSCOM_ADDRESS(addrs[i]) ); + if( l_err ) + { + TRACFCOMP(g_trac_ibscom,"IBscomTest::test_IBscom> Read: Error from device : addr=0x%X, RC=%X", + addrs[i], l_err->reasonCode() ); + TS_FAIL( "test_IBscom> ERROR : Error log from IBSCOM read" ); + fails++; + errlCommit(l_err,IBSCOM_COMP_ID); + } + + if(orig_data[i] != read_data[i]) + { + TS_FAIL( "test_IBscom> ERROR : Data miss-match on IBSCOM read check." ); + + TRACFCOMP(g_trac_ibscom,"IBscomTest::test_IBscom>ERROR: addr=0x%.8x: Write data=0x%.16X, IBSCOM read data=0x%.16X", + addrs[i], new_data1[i], read_data[i]); + fails++; + } + + total++; } - //Write in some new data + //Write in some new data with IBSCOM for(uint32_t i=0; i<2; i++) { - TRACDCOMP(g_trac_ibscom,"IBscomTest::test_IBscom> Write new pattern: 0x%.16X", new_data[i]); + TRACDCOMP(g_trac_ibscom,"IBscomTest::test_IBscom> Write new pattern: 0x%.16X", new_data1[i]); op_size = sizeof(uint64_t); l_err = deviceOp( DeviceFW::WRITE, l_testTarget, - &new_data[i], + &new_data1[i], op_size, DEVICE_IBSCOM_ADDRESS(addrs[i]) ); if( l_err ) @@ -137,11 +185,12 @@ class IBscomTest: public CxxTest::TestSuite fails++; errlCommit(l_err,IBSCOM_COMP_ID); } + nanosleep( 0, 1000000 ); //sleep for 1ms total++; } - //Read the data back with IBSCOM + //Read the data back with FSISCOM for(uint32_t i=0; i<2; i++) { TRACDCOMP(g_trac_ibscom, @@ -151,31 +200,56 @@ class IBscomTest: public CxxTest::TestSuite op_size = sizeof(uint64_t); l_err = deviceOp( DeviceFW::READ, l_testTarget, - &read_data[i], + &read_data_fsi[i], op_size, - DEVICE_IBSCOM_ADDRESS(addrs[i]) ); + DEVICE_FSISCOM_ADDRESS(addrs[i]) ); if( l_err ) { - TRACFCOMP(g_trac_ibscom,"IBscomTest::test_IBscom> Read: Error from device : addr=0x%X, RC=%X", + TRACFCOMP(g_trac_ibscom, "IBscomTest::test_IBscom> Read: Error from device : addr=0x%X, RC=%X", addrs[i], l_err->reasonCode() ); - TS_FAIL( "test_IBscom> ERROR : Error log from IBSCOM read" ); + TS_FAIL( "test_IBscom> ERROR : Error log from FSI Read" ); fails++; errlCommit(l_err,IBSCOM_COMP_ID); } - if(new_data[i] != read_data[i]) + if(new_data1[i] != read_data_fsi[i]) { - TS_FAIL( "test_IBscom> ERROR : Data miss-match on IBSCOM read-back check." ); + TS_FAIL( "test_IBscom> ERROR : Data miss-match on FSI read-back check." ); - TRACFCOMP(g_trac_ibscom,"IBscomTest::test_IBscom>ERROR: addr=0x%.8x: Write data=0x%.16X, IBSCOM read data=0x%.16X", - addrs[i], new_data[i], read_data[i]); + TRACFCOMP(g_trac_ibscom, "IBscomTest::test_IBscom>ERROR: addr=0x%.8x: Write data=0x%.16X, FSI read data=0x%.16X", + addrs[i], new_data1[i], read_data_fsi[i]); + fails++; + } + total++; + } + + //Write in some new data with FSISCOM + uint64_t new_data2[] = {0xA5A5A50000000000/*0:26*/, + 0x1122000000000000/*0:21*/}; + for(uint32_t i=0; i<2; i++) + { + TRACDCOMP(g_trac_ibscom,"IBscomTest::test_IBscom> Write new pattern: 0x%.16X", new_data2[i]); + + op_size = sizeof(uint64_t); + l_err = deviceOp( DeviceFW::WRITE, + l_testTarget, + &new_data2[i], + op_size, + DEVICE_FSISCOM_ADDRESS(addrs[i]) ); + if( l_err ) + { + TRACFCOMP(g_trac_ibscom,"IBscomTest::test_IBscom> Write: Error from device : addr=0x%X, RC=%X", + addrs[i], l_err->reasonCode() ); + TS_FAIL( "ScomTest::test_IBscom> ERROR : Error log from FSI Write" ); fails++; + errlCommit(l_err,IBSCOM_COMP_ID); } total++; } - //Read the data back with FSISCOM + + //Read the data back with IBSCOM for(uint32_t i=0; i<2; i++) { TRACDCOMP(g_trac_ibscom, @@ -185,26 +259,27 @@ class IBscomTest: public CxxTest::TestSuite op_size = sizeof(uint64_t); l_err = deviceOp( DeviceFW::READ, l_testTarget, - &read_data_fsi[i], + &read_data[i], op_size, - DEVICE_FSISCOM_ADDRESS(addrs[i]) ); + DEVICE_IBSCOM_ADDRESS(addrs[i]) ); if( l_err ) { - TRACFCOMP(g_trac_ibscom, "IBscomTest::test_IBscom> Read: Error from device : addr=0x%X, RC=%X", + TRACFCOMP(g_trac_ibscom,"IBscomTest::test_IBscom> Read: Error from device : addr=0x%X, RC=%X", addrs[i], l_err->reasonCode() ); - TS_FAIL( "test_IBscom> ERROR : Error log from FSI Read" ); + TS_FAIL( "test_IBscom> ERROR : Error log from IBSCOM read" ); fails++; errlCommit(l_err,IBSCOM_COMP_ID); } - if(new_data[i] != read_data_fsi[i]) + if(new_data2[i] != read_data[i]) { - TS_FAIL( "test_IBscom> ERROR : Data miss-match on FSI read-back check." ); + TS_FAIL( "test_IBscom> ERROR : Data miss-match on IBSCOM read-back check." ); - TRACFCOMP(g_trac_ibscom, "IBscomTest::test_IBscom>ERROR: addr=0x%.8x: Write data=0x%.16X, FSI read data=0x%.16X", - addrs[i], new_data[i], read_data_fsi[i]); + TRACFCOMP(g_trac_ibscom,"IBscomTest::test_IBscom>ERROR: addr=0x%.8x: Write data=0x%.16X, IBSCOM read data=0x%.16X", + addrs[i], new_data1[i], read_data[i]); fails++; } + total++; } @@ -237,7 +312,146 @@ class IBscomTest: public CxxTest::TestSuite //TODO RTC: 72594: Add error path test cases when simics support //is available + void test_IBscom_error(void) + { + TRACFCOMP(g_trac_ibscom,"Skipping test_IBscom_error because Simics is broken"); + return; + + uint64_t fails = 0; + uint64_t total = 0; + errlHndl_t l_err = NULL; + + TARGETING::Target* l_testTarget = NULL; + + // Target: Find a Centaur on the Master processor + TARGETING::Target* l_procTarget = NULL; + TARGETING::targetService().masterProcChipTargetHandle(l_procTarget); + assert(l_procTarget != NULL); + + TARGETING::PredicateCTM l_cent(TARGETING::CLASS_CHIP, + TARGETING::TYPE_MEMBUF, + TARGETING::MODEL_NA); + TARGETING::PredicatePostfixExpr cent_query; + cent_query.push(&l_cent); + + + TARGETING::TargetHandleList centaur_list; + TARGETING::targetService(). + getAssociated(centaur_list, + l_procTarget, + TARGETING::TargetService::CHILD_BY_AFFINITY, + TARGETING::TargetService::ALL, + ¢_query); + + if( centaur_list.size() < 1 ) + { + TS_FAIL( "test_IBscom_error> ERROR : Unable to find a Centaur chip" ); + return; + } + l_testTarget = *(centaur_list.begin()); + + ScomSwitches l_switches = l_testTarget->getAttr<ATTR_SCOM_SWITCHES>(); + if( !l_switches.useInbandScom ) + { + TRACFCOMP(g_trac_ibscom,"Skipping test_IBscom_error because ibscom is not enabled"); + return; + } + + + TRACDCOMP(g_trac_ibscom, "IBscomTest::test_IBscom_error> Read orignal data from Centaur"); + + + uint64_t new_data = 0x1234567ABABABA00; + uint64_t read_data = 0; + size_t op_size = sizeof(uint64_t); + + //Write a bad address + uint64_t bad_addr = 0x02123456; + TRACFCOMP(g_trac_ibscom,"IBscomTest::test_IBscom_error> Write bad address 0x%.16X", bad_addr); + op_size = sizeof(uint64_t); + l_err = deviceOp( DeviceFW::WRITE, + l_testTarget, + &new_data, + op_size, + DEVICE_IBSCOM_ADDRESS(bad_addr) ); + if( !l_err ) + { + TRACFCOMP(g_trac_ibscom,"IBscomTest::test_IBscom_error> No error on bad address write" ); + TS_FAIL( "ScomTest::test_IBscom_error> No error on bad address write" ); + fails++; + } + else + { + delete l_err; + } + total++; + nanosleep( 0, 1000000 ); //sleep for 1ms + + //Verify ibscom is still enabled, i.e. we didn't think it was a bus fail + l_switches = l_testTarget->getAttr<ATTR_SCOM_SWITCHES>(); + total++; + if( !l_switches.useInbandScom ) + { + TS_FAIL( "ScomTest::test_IBscom_error> IBSCOM was wrongly disabled after bad address write" ); + fails++; + } + + //Read a bad address + bad_addr = 0x02876543; + TRACFCOMP(g_trac_ibscom,"IBscomTest::test_IBscom_error> Write bad address 0x%.16X", bad_addr); + op_size = sizeof(uint64_t); + l_err = deviceOp( DeviceFW::READ, + l_testTarget, + &read_data, + op_size, + DEVICE_IBSCOM_ADDRESS(bad_addr) ); + if( !l_err ) + { + TRACFCOMP(g_trac_ibscom,"IBscomTest::test_IBscom_error> No error on bad address read" ); + TS_FAIL( "ScomTest::test_IBscom_error> No error on bad address read" ); + fails++; + } + else + { + delete l_err; + } + total++; + + //Verify ibscom is still enabled, i.e. we didn't think it was a bus fail + l_switches = l_testTarget->getAttr<ATTR_SCOM_SWITCHES>(); + total++; + if( !l_switches.useInbandScom ) + { + TS_FAIL( "ScomTest::test_IBscom_error> IBSCOM was wrongly disabled after bad address read" ); + fails++; + } + //Read a good address to prove things still work + TRACFCOMP(g_trac_ibscom,"IBscomTest::test_IBscom_error> Read good address 0x%.16X", 0x02010803); + op_size = sizeof(uint64_t); + l_err = deviceOp( DeviceFW::READ, + l_testTarget, + &read_data, + op_size, + DEVICE_IBSCOM_ADDRESS(0x02010803) ); + if( l_err ) + { + TRACFCOMP(g_trac_ibscom,"IBscomTest::test_IBscom_error> Error on read after fail" ); + TS_FAIL( "ScomTest::test_IBscom_error> Error on read after fail" ); + errlCommit(l_err,IBSCOM_COMP_ID); + fails++; + } + total++; + + TS_TRACE("test_IBscom_error runs successfully!"); + TRACFCOMP(g_trac_ibscom, + "IBscomTest::test_IBscom_error> %d/%d fails", + fails, total ); + + //TS_FAIL("FORCING ERROR TO STOP IPL"); + + return; + } }; #endif diff --git a/src/usr/scom/scom.C b/src/usr/scom/scom.C index 7b5ecaf27..b15f72406 100644 --- a/src/usr/scom/scom.C +++ b/src/usr/scom/scom.C @@ -5,7 +5,7 @@ /* */ /* IBM CONFIDENTIAL */ /* */ -/* COPYRIGHT International Business Machines Corp. 2011,2012 */ +/* COPYRIGHT International Business Machines Corp. 2011,2013 */ /* */ /* p1 */ /* */ @@ -36,12 +36,13 @@ #include <errl/errlmanager.H> #include "scom.H" #include <scom/scomreasoncodes.H> +#include <ibscom/ibscomreasoncodes.H> #include <sys/time.h> // Trace definition trace_desc_t* g_trac_scom = NULL; -TRAC_INIT(&g_trac_scom, "SCOM", KILOBYTE, TRACE::BUFFER_SLOW); //1K +TRAC_INIT(&g_trac_scom, SCOM_COMP_NAME, KILOBYTE, TRACE::BUFFER_SLOW); //1K namespace SCOM @@ -102,7 +103,6 @@ errlHndl_t checkIndirectAndDoScom(DeviceFW::OperationType i_opType, bool l_indScomError = false; uint64_t temp_io_buffer = 0; - //@todo - determine hwhat an appropriate timeout value enum { MAX_INDSCOM_TIMEOUT_NS = 100000 }; //=.1ms // If the indirect scom bit is 0, then doing a regular scom @@ -213,8 +213,7 @@ errlHndl_t checkIndirectAndDoScom(DeviceFW::OperationType i_opType, break; } - //TODO tmp remove for VPO, need better polling strategy -- RTC43738 - //nanosleep( 0, 10000 ); //sleep for 10,000 ns + nanosleep( 0, 10000 ); //sleep for 10,000 ns elapsed_indScom_time_ns += 10000; }while ( elapsed_indScom_time_ns <= MAX_INDSCOM_TIMEOUT_NS); @@ -336,8 +335,7 @@ errlHndl_t checkIndirectAndDoScom(DeviceFW::OperationType i_opType, } - //TODO tmp remove for VPO, need better polling strategy -- RTC43738 - //nanosleep( 0, 10000 ); //sleep for 10,000 ns + nanosleep( 0, 10000 ); //sleep for 10,000 ns elapsed_indScom_time_ns += 10000; }while ( elapsed_indScom_time_ns <= MAX_INDSCOM_TIMEOUT_NS); @@ -464,6 +462,19 @@ errlHndl_t doScomOp(DeviceFW::OperationType i_opType, }while(0); + //Look for special retry codes + if( l_err + && (0xFFFFFFFF != i_accessType) + && (l_err->reasonCode() == IBSCOM::IBSCOM_RETRY_DUE_TO_ERROR) ) + { + delete l_err; + TRACFCOMP(g_trac_scom, "Forcing retry of Scom to %.16X on %.8X", i_addr, TARGETING::get_huid(i_target)); + // use the unused i_accessType parameter to avoid an infinite recursion + int64_t accessType_flag = 0xFFFFFFFF; + l_err = doScomOp( i_opType, i_target, io_buffer, + io_buflen, accessType_flag, i_addr ); + } + return l_err; } diff --git a/src/usr/testcore/rtloader/loader.H b/src/usr/testcore/rtloader/loader.H index ea36f1126..03e8e5f39 100644 --- a/src/usr/testcore/rtloader/loader.H +++ b/src/usr/testcore/rtloader/loader.H @@ -32,6 +32,7 @@ #include <errl/errlmanager.H> #include <util/utillidmgr.H> #include <map> +#include <sys/time.h> #include <runtime/interface.h> #include <vpd/vpd_if.H> @@ -105,6 +106,7 @@ class RuntimeLoaderTest : public CxxTest::TestSuite intf->malloc = malloc; intf->free = free; intf->realloc = realloc; + intf->sleep = nanosleep; intf->assert = rt_assert; intf->sendErrorLog = rt_logErr; intf->scom_read = rt_scom_read; diff --git a/src/usr/xscom/piberror.C b/src/usr/xscom/piberror.C index 366fd2579..0ffe42a2b 100644 --- a/src/usr/xscom/piberror.C +++ b/src/usr/xscom/piberror.C @@ -53,6 +53,15 @@ void addFruCallouts(TARGETING::Target* i_target, switch (i_pibErrStatus) { case PIB::PIB_CHIPLET_OFFLINE: + //Offline should just be a code bug, but it seems that there are + // cases where bad hardware can also cause this problem + io_errl->addProcedureCallout(HWAS::EPUB_PRC_HB_CODE, + HWAS::SRCI_PRIORITY_HIGH); + io_errl->addHwCallout( i_target, + HWAS::SRCI_PRIORITY_LOW, + HWAS::NO_DECONFIG, + HWAS::GARD_NULL ); + break; case PIB::PIB_PARTIAL_GOOD: case PIB::PIB_INVALID_ADDRESS: io_errl->addProcedureCallout(HWAS::EPUB_PRC_HB_CODE, |