summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rwxr-xr-xsrc/build/tools/listdeps.pl1
-rw-r--r--src/include/runtime/interface.h2
-rw-r--r--src/include/stdlib.h2
-rw-r--r--src/include/usr/ibscom/ibscomif.H45
-rw-r--r--src/include/usr/ibscom/ibscomreasoncodes.H2
-rw-r--r--src/include/usr/xscom/piberror.H16
-rw-r--r--src/runtime/rt_stdlib.C6
-rw-r--r--src/usr/hwas/hostbootIstep.C6
-rw-r--r--src/usr/hwpf/hwp/dmi_training/dmi_training.C45
-rwxr-xr-xsrc/usr/i2c/i2c.C2
-rw-r--r--src/usr/ibscom/ibscom.C475
-rw-r--r--src/usr/ibscom/ibscom.H18
-rw-r--r--src/usr/ibscom/test/ibscomtest.H272
-rw-r--r--src/usr/scom/scom.C25
-rw-r--r--src/usr/testcore/rtloader/loader.H2
-rw-r--r--src/usr/xscom/piberror.C9
16 files changed, 735 insertions, 193 deletions
diff --git a/src/build/tools/listdeps.pl b/src/build/tools/listdeps.pl
index 2fc1df199..29087c828 100755
--- a/src/build/tools/listdeps.pl
+++ b/src/build/tools/listdeps.pl
@@ -201,6 +201,7 @@ my %resident_modules = (
"libattn.so" => '1',
"libi2c.so" => '1',
"libutil.so" => '1',
+ "libibscom.so" => '1',
);
diff --git a/src/include/runtime/interface.h b/src/include/runtime/interface.h
index 0b7382b61..dfe6a93d6 100644
--- a/src/include/runtime/interface.h
+++ b/src/include/runtime/interface.h
@@ -58,6 +58,8 @@ typedef struct hostInterfaces
void (*free)(void*);
/** realloc */
void* (*realloc)(void*, size_t);
+ /** sleep */
+ void (*sleep)(uint64_t sec, uint64_t nsec);
/** sendErrorLog
* @param[in] plid Platform Log identifier
diff --git a/src/include/stdlib.h b/src/include/stdlib.h
index 36d2c147c..c2b0615d8 100644
--- a/src/include/stdlib.h
+++ b/src/include/stdlib.h
@@ -5,7 +5,7 @@
/* */
/* IBM CONFIDENTIAL */
/* */
-/* COPYRIGHT International Business Machines Corp. 2010,2012 */
+/* COPYRIGHT International Business Machines Corp. 2010,2013 */
/* */
/* p1 */
/* */
diff --git a/src/include/usr/ibscom/ibscomif.H b/src/include/usr/ibscom/ibscomif.H
new file mode 100644
index 000000000..65304acc6
--- /dev/null
+++ b/src/include/usr/ibscom/ibscomif.H
@@ -0,0 +1,45 @@
+/* IBM_PROLOG_BEGIN_TAG */
+/* This is an automatically generated prolog. */
+/* */
+/* $Source: src/include/usr/ibscom/ibscomif.H $ */
+/* */
+/* IBM CONFIDENTIAL */
+/* */
+/* COPYRIGHT International Business Machines Corp. 2013 */
+/* */
+/* p1 */
+/* */
+/* Object Code Only (OCO) source materials */
+/* Licensed Internal Code Source Materials */
+/* IBM HostBoot Licensed Internal Code */
+/* */
+/* The source code for this program is not published or otherwise */
+/* divested of its trade secrets, irrespective of what has been */
+/* deposited with the U.S. Copyright Office. */
+/* */
+/* Origin: 30 */
+/* */
+/* IBM_PROLOG_END_TAG */
+#ifndef __IBSCOM_IBSCOMIF_H
+#define __IBSCOM_IBSCOMIF_H
+
+#include <stdint.h>
+#include <builtins.h>
+#include <errl/errlentry.H>
+
+namespace IBSCOM
+{
+
+#define IBSCOM_DISABLE true
+#define IBSCOM_ENABLE false
+
+/**
+ * @brief Enable or disable Inband SCOMs on all capable chips
+ *
+ * @param i_disable true: disable inband scoms
+ */
+void enableInbandScoms( bool i_disable = IBSCOM_ENABLE );
+
+}
+
+#endif
diff --git a/src/include/usr/ibscom/ibscomreasoncodes.H b/src/include/usr/ibscom/ibscomreasoncodes.H
index 2990cc56b..39927db41 100644
--- a/src/include/usr/ibscom/ibscomreasoncodes.H
+++ b/src/include/usr/ibscom/ibscomreasoncodes.H
@@ -46,6 +46,8 @@ namespace IBSCOM
IBSCOM_BUS_FAILURE = IBSCOM_COMP_ID | 0x06,
IBSCOM_INVALID_ADDRESS = IBSCOM_COMP_ID | 0x07,
IBSCOM_PIB_FAILURE = IBSCOM_COMP_ID | 0x08,
+ IBSCOM_WRONG_ERROR = IBSCOM_COMP_ID | 0x09,
+ IBSCOM_RETRY_DUE_TO_ERROR = IBSCOM_COMP_ID | 0x0A,
};
};
diff --git a/src/include/usr/xscom/piberror.H b/src/include/usr/xscom/piberror.H
index da5ee08e1..73512e961 100644
--- a/src/include/usr/xscom/piberror.H
+++ b/src/include/usr/xscom/piberror.H
@@ -33,14 +33,14 @@ namespace PIB
*/
enum
{
- PIB_NO_ERROR = 0,//0x00000000,
- PIB_RESOURCE_OCCUPIED = 1,//0x00000001,
- PIB_CHIPLET_OFFLINE = 2,//0x00000010,
- PIB_PARTIAL_GOOD = 3,//0x00000011,
- PIB_INVALID_ADDRESS = 4,//0x00000100,
- PIB_CLOCK_ERROR = 5,//0x00000101,
- PIB_PARITY_ERROR = 6,//0x00000110,
- PIB_TIMEOUT = 7,//0x00000111,
+ PIB_NO_ERROR = 0,//0b000,
+ PIB_RESOURCE_OCCUPIED = 1,//0b001,
+ PIB_CHIPLET_OFFLINE = 2,//0b010,
+ PIB_PARTIAL_GOOD = 3,//0b011,
+ PIB_INVALID_ADDRESS = 4,//0b100,
+ PIB_CLOCK_ERROR = 5,//0b101,
+ PIB_PARITY_ERROR = 6,//0b110,
+ PIB_TIMEOUT = 7,//0b111,
};
diff --git a/src/runtime/rt_stdlib.C b/src/runtime/rt_stdlib.C
index ec92badc6..ba1bc347a 100644
--- a/src/runtime/rt_stdlib.C
+++ b/src/runtime/rt_stdlib.C
@@ -23,6 +23,7 @@
#include <stdlib.h>
#include <runtime/interface.h>
#include <string.h>
+#include <sys/time.h>
void* malloc(size_t s)
{
@@ -54,3 +55,8 @@ void* calloc(size_t num, size_t size)
return mem;
}
+
+void nanosleep( uint64_t sec, uint64_t nsec )
+{
+ g_hostInterfaces->sleep(sec,nsec);
+}
diff --git a/src/usr/hwas/hostbootIstep.C b/src/usr/hwas/hostbootIstep.C
index 8e10c5de4..adeec948b 100644
--- a/src/usr/hwas/hostbootIstep.C
+++ b/src/usr/hwas/hostbootIstep.C
@@ -41,6 +41,7 @@
#include <targeting/attrsync.H>
#include <diag/prdf/prdfMain.H>
#include <intr/interrupt.H>
+#include <ibscom/ibscomif.H>
namespace HWAS
{
@@ -231,6 +232,9 @@ void* host_prd_hwreconfig( void *io_pArgs )
errlHndl_t errl = NULL;
+ // Flip the scom path back to FSI in case we enabled IBSCOM previously
+ IBSCOM::enableInbandScoms(IBSCOM_DISABLE);
+
// Call PRDF to remove non-function chips from its system model
errl = PRDF::refresh();
@@ -239,7 +243,7 @@ void* host_prd_hwreconfig( void *io_pArgs )
TRACFCOMP(ISTEPS_TRACE::g_trac_isteps_trace,
"host_prd_hwreconfig ERROR 0x%.8X returned from"
" call to PRDF::refresh", errl->reasonCode());
- }
+ }
TRACDCOMP( ISTEPS_TRACE::g_trac_isteps_trace,
"host_prd_hwreconfig exit" );
diff --git a/src/usr/hwpf/hwp/dmi_training/dmi_training.C b/src/usr/hwpf/hwp/dmi_training/dmi_training.C
index cc680cb12..64c08030e 100644
--- a/src/usr/hwpf/hwp/dmi_training/dmi_training.C
+++ b/src/usr/hwpf/hwp/dmi_training/dmi_training.C
@@ -75,6 +75,7 @@
#include <erepairAccessorHwpFuncs.H>
#include "dmi_io_dccal/dmi_io_dccal.H"
#include <pbusLinkSvc.H>
+#include <ibscom/ibscomif.H>
namespace DMI_TRAINING
{
@@ -1244,49 +1245,7 @@ void* call_cen_set_inband_addr( void *io_pArgs )
}
//Now enable Inband SCOM for all membuf chips.
- TARGETING::TargetHandleList membufChips;
- getAllChips(membufChips, TYPE_MEMBUF, true);
-
- TARGETING::Target * sys = NULL;
- TARGETING::targetService().getTopLevelTarget(sys);
-
- for(uint32_t i=0; i<membufChips.size(); i++)
- {
- // If the membuf chip supports IBSCOM AND..
- // (Chip is >=DD20 OR IBSCOM Override is set)
- if ((membufChips[i]->getAttr<ATTR_PRIMARY_CAPABILITIES>()
- .supportsInbandScom) &&
- (// TODO: RTC 68984: Disable IBSCOM for now (membufChips[i]->getAttr<TARGETING::ATTR_EC>() >= 0x20) ||
- (sys->getAttr<TARGETING::ATTR_IBSCOM_ENABLE_OVERRIDE>() != 0))
- )
- {
- ScomSwitches l_switches =
- membufChips[i]->getAttr<ATTR_SCOM_SWITCHES>();
-
- // If Inband Scom is not already enabled.
- if ((l_switches.useInbandScom != 1) ||
- (l_switches.useFsiScom != 0))
- {
- l_switches.useFsiScom = 0;
- l_switches.useInbandScom = 1;
-
- // Turn off FSI scom and turn on Inband Scom.
- membufChips[i]->setAttr<ATTR_SCOM_SWITCHES>(l_switches);
-
- TRACFCOMP(ISTEPS_TRACE::g_trac_isteps_trace,
- "Enable IBSCOM on target HUID %.8X",
- TARGETING::get_huid(membufChips[i]));
- }
- }
- else
- {
- TRACFCOMP(ISTEPS_TRACE::g_trac_isteps_trace,
- "IBSCOM NOT enabled on target HUID %.8X",
- TARGETING::get_huid(membufChips[i]));
-
- }
- }
-
+ IBSCOM::enableInbandScoms();
}while(0);
TRACDCOMP( ISTEPS_TRACE::g_trac_isteps_trace,
diff --git a/src/usr/i2c/i2c.C b/src/usr/i2c/i2c.C
index 5dbbf892f..8c2656f18 100755
--- a/src/usr/i2c/i2c.C
+++ b/src/usr/i2c/i2c.C
@@ -1383,6 +1383,7 @@ errlHndl_t i2cSetupMasters ( void )
mode.bit_rate_div = io_args.bit_rate_divisor;
+ size = sizeof(uint64_t);
err = deviceWrite( centList[centaur],
&mode.value,
size,
@@ -1490,6 +1491,7 @@ errlHndl_t i2cSetupMasters ( void )
mode.bit_rate_div = io_args.bit_rate_divisor;
+ size = sizeof(uint64_t);
err = deviceWrite( procList[proc],
&mode.value,
size,
diff --git a/src/usr/ibscom/ibscom.C b/src/usr/ibscom/ibscom.C
index 874f8f98c..abbc3f3c7 100644
--- a/src/usr/ibscom/ibscom.C
+++ b/src/usr/ibscom/ibscom.C
@@ -41,6 +41,9 @@
#include <limits.h>
#include <errl/errludtarget.H>
#include <xscom/piberror.H>
+#include <diag/attn/attn.H>
+#include <ibscom/ibscomif.H>
+#include <targeting/common/utilFilter.H>
// Easy macro replace for unit testing
//#define TRACUCOMP(args...) TRACFCOMP(args)
@@ -48,13 +51,16 @@
// Trace definition
trace_desc_t* g_trac_ibscom = NULL;
-TRAC_INIT(&g_trac_ibscom, "IBSCOM", KILOBYTE);
+TRAC_INIT(&g_trac_ibscom, IBSCOM_COMP_NAME, KILOBYTE);
using namespace ERRORLOG;
using namespace TARGETING;
namespace IBSCOM
{
+// SCOM Register addresses
+const uint32_t MBS_FIR = 0x02011400;
+const uint32_t MBSIBERR0 = 0x0201141B;
// Register XSCcom access functions to DD framework
DEVICE_REGISTER_ROUTE(DeviceFW::WILDCARD,
@@ -296,6 +302,109 @@ errlHndl_t getTargetVirtualAddress(Target* i_target,
return l_err;
}
+///////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////
+void err_cleanup(Target* i_target,
+ uint64_t i_addr)
+{
+ //Going to commit at most 1 informational error here
+ errlHndl_t l_err = NULL;
+ errlHndl_t tmp_err = NULL;
+ ERRORLOG::ErrlUserDetailsLogRegister l_logReg(i_target);
+
+ uint64_t zeroData = 0x0;
+ size_t op_size = sizeof(uint64_t);
+
+ // Clear our the status reg
+ op_size = sizeof(uint64_t);
+ tmp_err = deviceOp( DeviceFW::WRITE,
+ i_target,
+ &zeroData,
+ op_size,
+ DEVICE_FSISCOM_ADDRESS(MBSIBERR0) );
+ if(tmp_err)
+ {
+ if( l_err )
+ {
+ delete tmp_err;
+ }
+ else
+ {
+ l_err = tmp_err;
+ }
+
+ //Really just want to save the address, so stick in some
+ //obvious dummy data
+ uint64_t dummyData = 0x00000000DEADBEEF;
+ l_logReg.addDataBuffer(&dummyData, sizeof(dummyData),
+ DEVICE_IBSCOM_ADDRESS(MBSIBERR0));
+ }
+
+ // Clear out the FIR bits we might trigger
+ uint64_t mbs_fir = 0;
+ op_size = sizeof(uint64_t);
+ tmp_err = deviceOp( DeviceFW::READ,
+ i_target,
+ &mbs_fir,
+ op_size,
+ DEVICE_FSISCOM_ADDRESS(MBS_FIR) );
+ if(tmp_err)
+ {
+ if( l_err )
+ {
+ delete tmp_err;
+ }
+ else
+ {
+ l_err = tmp_err;
+ }
+
+ //Really just want to save the address, so stick in some
+ //obvious dummy data
+ uint64_t dummyData = 0x10000000DEADBEEF;
+ l_logReg.addDataBuffer(&dummyData, sizeof(dummyData),
+ DEVICE_IBSCOM_ADDRESS(MBS_FIR));
+ }
+
+ //22=MBS_FIR_MASK_REG_HOST_INBAND_READ_ERROR
+ //23=MBS_FIR_MASK_REG_HOST_INBAND_WRITE_ERROR
+ mbs_fir &= 0xFFFFFCFFFFFFFFFF;
+ op_size = sizeof(uint64_t);
+ l_err = deviceOp( DeviceFW::WRITE,
+ i_target,
+ &mbs_fir,
+ op_size,
+ DEVICE_FSISCOM_ADDRESS(MBS_FIR) );
+ if(tmp_err)
+ {
+ if( l_err )
+ {
+ delete tmp_err;
+ }
+ else
+ {
+ l_err = tmp_err;
+ }
+
+ //Really just want to save the address, so stick in some
+ //obvious dummy data
+ uint64_t dummyData = 0x20000000DEADBEEF;
+ l_logReg.addDataBuffer(&dummyData, sizeof(dummyData),
+ DEVICE_IBSCOM_ADDRESS(MBS_FIR));
+ }
+
+ if( l_err )
+ {
+ l_logReg.addToLog(l_err);
+
+ //force to informational so we don't log extra errors
+ //inside of possible error collection paths
+ l_err->setSev(ERRORLOG::ERRL_SEV_INFORMATIONAL);
+ errlCommit(l_err,IBSCOM_COMP_ID);
+ l_err = NULL;
+ }
+}
+
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
@@ -351,6 +460,35 @@ errlHndl_t doIBScom(DeviceFW::OperationType i_opType,
l_mutex = i_target->getHbMutexAttr<TARGETING::ATTR_IBSCOM_MUTEX>();
mutex_lock(l_mutex);
need_unlock = true;
+
+ //Need to check if ibscom is still enabled before moving on in
+ //case we flipped the switch due to an error
+ ScomSwitches l_switches = i_target->getAttr<ATTR_SCOM_SWITCHES>();
+ if( !l_switches.useInbandScom )
+ {
+ TRACFCOMP(g_trac_ibscom, ERR_MRK"doIBScom> IBSCOM longer enabled on %.8X, error must have occurred", get_huid(i_target));
+ /*@
+ * @errortype
+ * @moduleid IBSCOM_DO_IBSCOM
+ * @reasoncode IBSCOM_RETRY_DUE_TO_ERROR
+ * @userdata1[0:31] HUID of Centaur Target
+ * @userdata1[32:64] SCOM Address
+ * @userdata2 Not Used
+ * @devdesc Previous error disabled ibscom, so forcing
+ * a retry via FSI
+ */
+ l_err =
+ new ErrlEntry(ERRL_SEV_UNRECOVERABLE,
+ IBSCOM_DO_IBSCOM,
+ IBSCOM_RETRY_DUE_TO_ERROR,
+ get_huid(i_target),
+ i_addr);
+ //This error should NEVER get returned to caller, so it's a
+ //FW bug if it actually gets comitted.
+ l_err->addProcedureCallout(HWAS::EPUB_PRC_HB_CODE,
+ HWAS::SRCI_PRIORITY_HIGH);
+ break;
+ }
}
if (i_opType == DeviceFW::READ)
@@ -391,6 +529,8 @@ errlHndl_t doIBScom(DeviceFW::OperationType i_opType,
//FW bug if it actually gets comitted.
l_err->addProcedureCallout(HWAS::EPUB_PRC_HB_CODE,
HWAS::SRCI_PRIORITY_HIGH);
+ ERRORLOG::ErrlUserDetailsTarget(i_target,"IBSCOM Target")
+ .addToLog(l_err);
break;
}
else
@@ -416,20 +556,19 @@ errlHndl_t doIBScom(DeviceFW::OperationType i_opType,
l_virtAddr[i_addr] = l_data;
eieio();
+ //Workaround for HW264203
+ //A read of MBSIBWRSTAT will not trigger a SUE so we need to
+ //read the MBS_FIR instead.
TRACDCOMP(g_trac_ibscom,
- "doIBScom: Read MBSIBWRSTAT to check for error");
- //Read MBSIBWRSTAT to check for errors
- //If an error occured on last write, reading MBSIBWRSTAT will
- //trigger a SUE.
- const uint32_t MBSIBWRSTAT = 0x201141D;
- uint64_t statData = 0;
+ "doIBScom: Read MBS_FIR to check for error");
+ uint64_t fir_data = 0;
size_t readSize = sizeof(uint64_t);
l_err = doIBScom(DeviceFW::READ,
- i_target,
- &statData,
- readSize,
- MBSIBWRSTAT,
- true);
+ i_target,
+ &fir_data,
+ readSize,
+ MBS_FIR,
+ true);
if(l_err != NULL)
{
if( IBSCOM_SUE_IN_ERR_PATH == l_err->reasonCode() )
@@ -446,103 +585,122 @@ errlHndl_t doIBScom(DeviceFW::OperationType i_opType,
break;
}
}
+ else
+ {
+ TRACUCOMP(g_trac_ibscom, "doIBScom: MBS_FIR=%.16X",fir_data);
+ //check the FIR bits specifically
+ //23 = MBS_FIR_MASK_REG_HOST_INBAND_WRITE_ERROR: A PIB error
+ // or inband buffer error was detected on a host inband
+ // write operation.
+ if( fir_data & 0x0000010000000000 )
+ {
+ TRACFCOMP(g_trac_ibscom, ERR_MRK" doIBScom: MBS_FIR[23] detected after write : %.16X", fir_data);
+ rw_error = true;
+ }
+ }
}
+ // Common error checking for both read and write
if(rw_error)
{
bool busDown = false;
TRACUCOMP(g_trac_ibscom,
"doIBScom: Get Error data, read MBSIBERR0");
- const uint32_t MBSIBERR0 = 0x201141B;
- const uint64_t HOST_ERROR_VALID = 0x0000000080000000;
- const uint64_t PIB_ERROR_STATUS_MASK = 0x0000000070000000;
- const uint64_t PIB_ERROR_SHIFT = 28;
- size_t readSize = sizeof(uint64_t);
- uint64_t mbsiberr0_data = 0;
+ size_t op_size = sizeof(uint64_t);
+
+ // Note: Using FSISCOM path to read the errors even though
+ // we could use IBSCOM in DD2 because it makes code simpler
+
+ MBSIBERRO_Reg_t mbsiberr0;
+ op_size = sizeof(uint64_t);
+ l_err = deviceOp( DeviceFW::READ,
+ i_target,
+ &(mbsiberr0.data),
+ op_size,
+ DEVICE_FSISCOM_ADDRESS(MBSIBERR0) );
+ if(l_err)
+ {
+ TRACFCOMP(g_trac_ibscom, ERR_MRK
+ "doIBScom: Error reading MBSIBERR0 over FSI");
+ //Save away the IBSCOM address
+ ERRORLOG::ErrlUserDetailsLogRegister l_logReg(i_target);
+ //Really just want to save the address, so stick in some
+ //obvious dummy data
+ uint64_t dummyData = 0x30000000DEADBEEF;
+ l_logReg.addDataBuffer(&dummyData, sizeof(dummyData),
+ DEVICE_IBSCOM_ADDRESS(i_addr));
+ l_logReg.addToLog(l_err);
+
+ //force to informational so we don't log extra errors
+ //inside of possible error collection paths
+ l_err->setSev(ERRORLOG::ERRL_SEV_INFORMATIONAL);
+ errlCommit(l_err,IBSCOM_COMP_ID);
+ l_err = NULL;
+
+ //fabricate some error data
+ mbsiberr0.addr = i_addr;
+ mbsiberr0.errvalid = 1;
+ mbsiberr0.piberr = 0;
+ mbsiberr0.iswrite = (i_opType == DeviceFW::READ) ? 0 : 1;
+ mbsiberr0.reserved = 0xBADBAD;
+ }
+
+ TRACUCOMP(g_trac_ibscom,
+ "doIBScom: MBSIBERR0(0x%.16x) = 0x%.16X",
+ MBSIBERR0, mbsiberr0.data);
- //Use FSISCOM as workaround for DD1.x centaur chips (HW246298)
- if(i_target->getAttr<TARGETING::ATTR_EC>() < 0x20)
+ //if the MBSIBERR0Q_IB_HOST_ERROR_VALID bit is not set
+ // then we have a bus failure
+ if( !(mbsiberr0.errvalid) )
{
- //Need to explicitly use FSI SCOM in DD1X chips
- l_err = deviceOp( DeviceFW::READ,
- i_target,
- &mbsiberr0_data,
- readSize,
- DEVICE_FSISCOM_ADDRESS(MBSIBERR0) );
- if(l_err)
- {
- TRACFCOMP(g_trac_ibscom, ERR_MRK
- "doIBScom: Error reading MBSIBERR0 over FSI");
- //Save away the IBSCOM address
- ERRORLOG::ErrlUserDetailsLogRegister
- l_logReg(i_target);
- //Really just want to save the addres, so stick in some
- //obvious dummy data
- uint64_t dummyData = 0x00000000DEADBEEF;
- l_logReg.addDataBuffer(&dummyData, sizeof(dummyData),
- DEVICE_IBSCOM_ADDRESS(i_addr));
- l_logReg.addToLog(l_err);
- break;
- }
- TRACUCOMP(g_trac_ibscom,
- "doIBScom: MBSIBERR0(0x%.16x) = 0x%.16X",
- MBSIBERR0, mbsiberr0_data);
+ //Bus is down
+ busDown = true;
+ }
+ //confirm that we are looking at error data for the scom we did
+ //0:31 = MBSIBERR0Q_IB_HOST_ADDRESS: This is the 32 bit scom
+ // address that was being accessed when the error was detected.
+ else if( mbsiberr0.addr != i_addr )
+ {
+ TRACFCOMP( g_trac_ibscom, "doIBScom> The address in MBSIBERR0 (0x%.8X) doesn't match what we were scomming (0x%.8X)", mbsiberr0.addr, i_addr );
+ /*@
+ * @errortype
+ * @moduleid IBSCOM_DO_IBSCOM
+ * @reasoncode IBSCOM_WRONG_ERROR
+ * @userdata1[0:31] HUID of Centaur Target
+ * @userdata1[32:64] SCOM Address
+ * @userdata2 Contents of MBSIBERR0 register
+ * @devdesc Detected error doesn't match the address
+ * we failed on
+ */
+ l_err = new ErrlEntry(ERRL_SEV_UNRECOVERABLE,
+ IBSCOM_DO_IBSCOM,
+ IBSCOM_WRONG_ERROR,
+ TWO_UINT32_TO_UINT64(
+ get_huid(i_target),
+ i_addr),
+ mbsiberr0.data);
+ // this would be a code bug because we got out of sync somehow
+ l_err->addProcedureCallout( HWAS::EPUB_PRC_HB_CODE,
+ HWAS::SRCI_PRIORITY_HIGH );
+ ERRORLOG::ErrlUserDetailsTarget(i_target,"IBSCOM Target")
+ .addToLog(l_err);
+ ERRORLOG::ErrlUserDetailsLogRegister ffdc(i_target);
+ ffdc.addData(DEVICE_FSISCOM_ADDRESS(MBS_FIR));
+ ffdc.addData(DEVICE_FSISCOM_ADDRESS(MBSIBERR0));
+ ffdc.addToLog(l_err);
+ l_err->collectTrace(IBSCOM_COMP_NAME);
//attempt to clear the error register so future accesses
//will work
- uint64_t zeroData = 0x0;
- readSize = sizeof(uint64_t);
- l_err = deviceOp( DeviceFW::WRITE,
- i_target,
- &zeroData,
- readSize,
- DEVICE_FSISCOM_ADDRESS(MBSIBERR0) );
- if(l_err )
- {
- errlCommit(l_err,IBSCOM_COMP_ID);
- l_err = NULL;
- }
+ err_cleanup(i_target,i_addr);
- //if the MBSIBERR0Q_IB_HOST_ERROR_VALID bit is not set
- // then we have a bus failure
- if( !(mbsiberr0_data & HOST_ERROR_VALID) )
- {
- //Bus is down
- busDown = true;
- }
+ break;
}
- else // >= DD20
- {
- //TODO RTC: 68984: Validate error path on DD2.0 Centaurs
- l_err = doIBScom(DeviceFW::READ,
- i_target,
- &mbsiberr0_data,
- readSize,
- MBSIBERR0,
- true);
- if(l_err != NULL)
- {
- if( IBSCOM_SUE_IN_ERR_PATH == l_err->reasonCode() )
- {
- TRACFCOMP(g_trac_ibscom, ERR_MRK
- "doIBScom: SUE on write detected");
- delete l_err;
- l_err = NULL;
- busDown = true;
- }
- else
- {
- TRACFCOMP(g_trac_ibscom, ERR_MRK"doIBScom: Unexpected error when checking for SUE");
- break;
- }
- }
- } // >= DD20
+
if(busDown)
{
- //TODO RTC: 69115 - call PRD to do FIR analysis, return PRD
- //error instead.
/*@
* @errortype
* @moduleid IBSCOM_DO_IBSCOM
@@ -553,19 +711,25 @@ errlHndl_t doIBScom(DeviceFW::OperationType i_opType,
* @devdesc Bus failure when attempting to perform
* IBSCOM operation. IBSCOM disabled.
*/
- l_err =
+ errlHndl_t ib_err =
new ErrlEntry(ERRL_SEV_UNRECOVERABLE,
IBSCOM_DO_IBSCOM,
IBSCOM_BUS_FAILURE,
TWO_UINT32_TO_UINT64(
get_huid(i_target),
i_addr),
- mbsiberr0_data);
+ mbsiberr0.data);
+
+ ib_err->addHwCallout(i_target,
+ HWAS::SRCI_PRIORITY_HIGH,
+ HWAS::NO_DECONFIG,
+ HWAS::GARD_NULL);
- l_err->addHwCallout(i_target,
- HWAS::SRCI_PRIORITY_HIGH,
- HWAS::NO_DECONFIG,
- HWAS::GARD_NULL);
+ //grab some HW regs via FSISCOM
+ ERRORLOG::ErrlUserDetailsLogRegister ffdc(i_target);
+ ffdc.addData(DEVICE_FSISCOM_ADDRESS(MBS_FIR));
+ ffdc.addData(DEVICE_FSISCOM_ADDRESS(MBSIBERR0));
+ ffdc.addToLog(l_err);
//disable IBSCOM
ScomSwitches l_switches =
@@ -581,6 +745,33 @@ errlHndl_t doIBScom(DeviceFW::OperationType i_opType,
// Turn off IBSCOM and turn on FSI SCOM.
i_target->setAttr<ATTR_SCOM_SWITCHES>(l_switches);
}
+
+ //@todo: RTC:92971
+ //There is a potential deadlock if we call PRD here
+ //Look for a better PRD error
+ //errlHndl_t prd_err = ATTN::checkForIplAttentions();
+ errlHndl_t prd_err = NULL;
+ if( prd_err )
+ {
+ TRACFCOMP( g_trac_ibscom, ERR_MRK"Error from checkForIplAttentions : PLID=%X", prd_err->plid() );
+ //connect up the plids
+ ib_err->plid(prd_err->plid());
+ //commit my log as info because PRD's log is better
+ ib_err->setSev(ERRORLOG::ERRL_SEV_INFORMATIONAL);
+ errlCommit(ib_err,IBSCOM_COMP_ID);
+ l_err = prd_err;
+ }
+ else
+ {
+ //my log is the only one
+ l_err = ib_err;
+ }
+
+ l_err->collectTrace(IBSCOM_COMP_NAME);
+
+ //Note-not cleaning up the error status here since
+ // we will not be using IBSCOM again
+
break;
}
else // bus isn't down, some other kind of error
@@ -597,23 +788,33 @@ errlHndl_t doIBScom(DeviceFW::OperationType i_opType,
*/
l_err = new ErrlEntry(ERRL_SEV_UNRECOVERABLE,
IBSCOM_DO_IBSCOM,
- IBSCOM_BUS_FAILURE,
+ IBSCOM_PIB_FAILURE,
TWO_UINT32_TO_UINT64(
get_huid(i_target),
i_addr),
- mbsiberr0_data);
+ mbsiberr0.data);
//Add this target to the FFDC
- ERRORLOG::ErrlUserDetailsTarget(i_target).addToLog(l_err);
-
- uint64_t pib_code =
- (mbsiberr0_data & PIB_ERROR_STATUS_MASK) >> PIB_ERROR_SHIFT;
+ ERRORLOG::ErrlUserDetailsTarget(i_target,"IBSCOM Target")
+ .addToLog(l_err);
//add callouts based on the PIB error
PIB::addFruCallouts( i_target,
- pib_code,
+ mbsiberr0.piberr,
l_err );
+ //grab some HW regs via FSISCOM
+ ERRORLOG::ErrlUserDetailsLogRegister ffdc(i_target);
+ ffdc.addData(DEVICE_FSISCOM_ADDRESS(MBS_FIR));
+ ffdc.addData(DEVICE_FSISCOM_ADDRESS(MBSIBERR0));
+ ffdc.addToLog(l_err);
+
+ l_err->collectTrace(IBSCOM_COMP_NAME);
+
+ //attempt to clear the error register so future accesses
+ //will work
+ err_cleanup(i_target,i_addr);
+
break;
}
}
@@ -659,4 +860,70 @@ errlHndl_t ibscomPerformOp(DeviceFW::OperationType i_opType,
return l_err;
}
+
+/**
+ * @brief Enable or disable Inband SCOMs on all capable chips
+ */
+void enableInbandScoms( bool i_disable )
+{
+ TARGETING::TargetHandleList membufChips;
+ TARGETING::getAllChips(membufChips, TYPE_MEMBUF, true);
+
+ mutex_t* l_mutex = NULL;
+
+ TARGETING::Target * sys = NULL;
+ TARGETING::targetService().getTopLevelTarget(sys);
+
+ uint8_t l_override =
+ sys->getAttr<TARGETING::ATTR_IBSCOM_ENABLE_OVERRIDE>();
+ TRACFCOMP(g_trac_ibscom,"IBSCOM_ENABLE_OVERRIDE=%d",l_override);
+
+ for(uint32_t i=0; i<membufChips.size(); i++)
+ {
+ TARGETING::Target* mb = membufChips[i];
+
+ // If the membuf chip supports IBSCOM AND..
+ // (Chip is >=DD20 OR IBSCOM Override is set)
+ if( (mb->getAttr<ATTR_PRIMARY_CAPABILITIES>().supportsInbandScom)
+ &&
+ ( (mb->getAttr<TARGETING::ATTR_EC>() >= 0x20) ||
+ (l_override != 0) )
+ )
+ {
+ //don't mess with attributes without the mutex (just to be safe)
+ l_mutex = mb->getHbMutexAttr<TARGETING::ATTR_IBSCOM_MUTEX>();
+ mutex_lock(l_mutex);
+
+ ScomSwitches l_switches = mb->getAttr<ATTR_SCOM_SWITCHES>();
+
+ uint8_t ib_new = 1;
+ uint8_t fsi_new = 0;
+ if( i_disable == IBSCOM_DISABLE )
+ {
+ ib_new = 0;
+ fsi_new = 1;
+ }
+
+ // If Inband Scom enablement changed
+ if ((l_switches.useInbandScom != ib_new) ||
+ (l_switches.useFsiScom != fsi_new))
+ {
+ l_switches.useFsiScom = fsi_new;
+ l_switches.useInbandScom = ib_new;
+
+ // Modify attribute
+ membufChips[i]->setAttr<ATTR_SCOM_SWITCHES>(l_switches);
+
+ TRACFCOMP(g_trac_ibscom,
+ "IBSCOM=%d on target HUID %.8X",
+ ib_new,
+ TARGETING::get_huid(mb));
+ }
+
+ mutex_unlock(l_mutex);
+ }
+ }
+}
+
+
} // end namespace
diff --git a/src/usr/ibscom/ibscom.H b/src/usr/ibscom/ibscom.H
index 3ba3c34f1..f1742b3e5 100644
--- a/src/usr/ibscom/ibscom.H
+++ b/src/usr/ibscom/ibscom.H
@@ -67,6 +67,24 @@ errlHndl_t ibscomPerformOp(DeviceFW::OperationType i_opType,
int64_t i_accessType,
va_list i_args);
+/**
+ * Bit definition for MBSIBERR0
+ */
+struct MBSIBERRO_Reg_t
+{
+ union
+ {
+ uint64_t data;
+ struct
+ {
+ uint64_t addr:32; //MBSIBERR0Q_IB_HOST_ADDRESS
+ uint64_t errvalid:1; //MBSIBERR0Q_IB_HOST_ERROR_VALID
+ uint64_t piberr:3; //MBSIBERR0Q_IB_HOST_ERROR_STATUS
+ uint64_t iswrite:1; //MBSIBERR0Q_IB_HOST_WRITE_NOT_READ
+ uint64_t reserved:27;
+ };
+ };
+};
};
diff --git a/src/usr/ibscom/test/ibscomtest.H b/src/usr/ibscom/test/ibscomtest.H
index 44e2d7e66..0e623c771 100644
--- a/src/usr/ibscom/test/ibscomtest.H
+++ b/src/usr/ibscom/test/ibscomtest.H
@@ -35,6 +35,7 @@
#include <devicefw/userif.H>
#include <ibscom/ibscomreasoncodes.H>
#include <devicefw/driverif.H>
+#include <sys/time.h>
extern trace_desc_t* g_trac_ibscom;
@@ -86,26 +87,36 @@ class IBscomTest: public CxxTest::TestSuite
return;
}
l_testTarget = *(centaur_list.begin());
+ TRACFCOMP(g_trac_ibscom,"test_IBscom> Using target %.8X", TARGETING::get_huid(l_testTarget));
+
+ ScomSwitches l_switches = l_testTarget->getAttr<ATTR_SCOM_SWITCHES>();
+ if( !l_switches.useInbandScom )
+ {
+ TRACFCOMP(g_trac_ibscom,"Skipping test_IBscom because ibscom is not enabled");
+ return;
+ }
TRACDCOMP(g_trac_ibscom,
- "IBscomTest::test_IBscom> Read orignal data from Centaur");
+ "IBscomTest::test_IBscom> Read original data from Centaur");
- const uint64_t addrs[] = {0x0201164F, 0x0301069A};
+ const uint64_t addrs[] = {0x02010803/*0:26*/, 0x03010403/*0:21*/};
uint64_t orig_data[2] = {0};
- uint64_t new_data[] = {0x1234567ABABABA00, 0xFEEDB0B0FEDCBA00};
+ uint64_t new_data1[] = {0x123456E000000000, 0xFEEDB00000000000};
uint64_t read_data[2] = {0};
uint64_t read_data_fsi[2] = {0};
size_t op_size = sizeof(uint64_t);
+ const uint64_t junk = 0x1122334455667788;
- //Save of initial register content
+ //Save off initial register content via FSI
for(uint32_t i=0; i<2; i++)
{
op_size = sizeof(uint64_t);
- l_err = deviceRead( l_testTarget,
- &orig_data[i],
- op_size,
- DEVICE_SCOM_ADDRESS(addrs[i]) );
+ l_err = deviceOp( DeviceFW::READ,
+ l_testTarget,
+ &orig_data[i],
+ op_size,
+ DEVICE_FSISCOM_ADDRESS(addrs[i]) );
if( l_err )
{
TRACFCOMP(g_trac_ibscom,"IBscomTest::test_IBscom> Orig Read: Error from device : addr=0x%X, RC=%X",
@@ -116,17 +127,54 @@ class IBscomTest: public CxxTest::TestSuite
}
total++;
+ // OR in the original data so we don't clear mask bits
+ new_data1[i] |= orig_data[i];
+ }
+
+ //Read the data with IBSCOM
+ for(uint32_t i=0; i<2; i++)
+ {
+ TRACDCOMP(g_trac_ibscom,
+ "IBscomTest::test_IBscom> Read data");
+
+ //reset size
+ op_size = sizeof(uint64_t);
+ read_data[i] = junk;
+ l_err = deviceOp( DeviceFW::READ,
+ l_testTarget,
+ &read_data[i],
+ op_size,
+ DEVICE_IBSCOM_ADDRESS(addrs[i]) );
+ if( l_err )
+ {
+ TRACFCOMP(g_trac_ibscom,"IBscomTest::test_IBscom> Read: Error from device : addr=0x%X, RC=%X",
+ addrs[i], l_err->reasonCode() );
+ TS_FAIL( "test_IBscom> ERROR : Error log from IBSCOM read" );
+ fails++;
+ errlCommit(l_err,IBSCOM_COMP_ID);
+ }
+
+ if(orig_data[i] != read_data[i])
+ {
+ TS_FAIL( "test_IBscom> ERROR : Data miss-match on IBSCOM read check." );
+
+ TRACFCOMP(g_trac_ibscom,"IBscomTest::test_IBscom>ERROR: addr=0x%.8x: Write data=0x%.16X, IBSCOM read data=0x%.16X",
+ addrs[i], new_data1[i], read_data[i]);
+ fails++;
+ }
+
+ total++;
}
- //Write in some new data
+ //Write in some new data with IBSCOM
for(uint32_t i=0; i<2; i++)
{
- TRACDCOMP(g_trac_ibscom,"IBscomTest::test_IBscom> Write new pattern: 0x%.16X", new_data[i]);
+ TRACDCOMP(g_trac_ibscom,"IBscomTest::test_IBscom> Write new pattern: 0x%.16X", new_data1[i]);
op_size = sizeof(uint64_t);
l_err = deviceOp( DeviceFW::WRITE,
l_testTarget,
- &new_data[i],
+ &new_data1[i],
op_size,
DEVICE_IBSCOM_ADDRESS(addrs[i]) );
if( l_err )
@@ -137,11 +185,12 @@ class IBscomTest: public CxxTest::TestSuite
fails++;
errlCommit(l_err,IBSCOM_COMP_ID);
}
+ nanosleep( 0, 1000000 ); //sleep for 1ms
total++;
}
- //Read the data back with IBSCOM
+ //Read the data back with FSISCOM
for(uint32_t i=0; i<2; i++)
{
TRACDCOMP(g_trac_ibscom,
@@ -151,31 +200,56 @@ class IBscomTest: public CxxTest::TestSuite
op_size = sizeof(uint64_t);
l_err = deviceOp( DeviceFW::READ,
l_testTarget,
- &read_data[i],
+ &read_data_fsi[i],
op_size,
- DEVICE_IBSCOM_ADDRESS(addrs[i]) );
+ DEVICE_FSISCOM_ADDRESS(addrs[i]) );
if( l_err )
{
- TRACFCOMP(g_trac_ibscom,"IBscomTest::test_IBscom> Read: Error from device : addr=0x%X, RC=%X",
+ TRACFCOMP(g_trac_ibscom, "IBscomTest::test_IBscom> Read: Error from device : addr=0x%X, RC=%X",
addrs[i], l_err->reasonCode() );
- TS_FAIL( "test_IBscom> ERROR : Error log from IBSCOM read" );
+ TS_FAIL( "test_IBscom> ERROR : Error log from FSI Read" );
fails++;
errlCommit(l_err,IBSCOM_COMP_ID);
}
- if(new_data[i] != read_data[i])
+ if(new_data1[i] != read_data_fsi[i])
{
- TS_FAIL( "test_IBscom> ERROR : Data miss-match on IBSCOM read-back check." );
+ TS_FAIL( "test_IBscom> ERROR : Data miss-match on FSI read-back check." );
- TRACFCOMP(g_trac_ibscom,"IBscomTest::test_IBscom>ERROR: addr=0x%.8x: Write data=0x%.16X, IBSCOM read data=0x%.16X",
- addrs[i], new_data[i], read_data[i]);
+ TRACFCOMP(g_trac_ibscom, "IBscomTest::test_IBscom>ERROR: addr=0x%.8x: Write data=0x%.16X, FSI read data=0x%.16X",
+ addrs[i], new_data1[i], read_data_fsi[i]);
+ fails++;
+ }
+ total++;
+ }
+
+ //Write in some new data with FSISCOM
+ uint64_t new_data2[] = {0xA5A5A50000000000/*0:26*/,
+ 0x1122000000000000/*0:21*/};
+ for(uint32_t i=0; i<2; i++)
+ {
+ TRACDCOMP(g_trac_ibscom,"IBscomTest::test_IBscom> Write new pattern: 0x%.16X", new_data2[i]);
+
+ op_size = sizeof(uint64_t);
+ l_err = deviceOp( DeviceFW::WRITE,
+ l_testTarget,
+ &new_data2[i],
+ op_size,
+ DEVICE_FSISCOM_ADDRESS(addrs[i]) );
+ if( l_err )
+ {
+ TRACFCOMP(g_trac_ibscom,"IBscomTest::test_IBscom> Write: Error from device : addr=0x%X, RC=%X",
+ addrs[i], l_err->reasonCode() );
+ TS_FAIL( "ScomTest::test_IBscom> ERROR : Error log from FSI Write" );
fails++;
+ errlCommit(l_err,IBSCOM_COMP_ID);
}
total++;
}
- //Read the data back with FSISCOM
+
+ //Read the data back with IBSCOM
for(uint32_t i=0; i<2; i++)
{
TRACDCOMP(g_trac_ibscom,
@@ -185,26 +259,27 @@ class IBscomTest: public CxxTest::TestSuite
op_size = sizeof(uint64_t);
l_err = deviceOp( DeviceFW::READ,
l_testTarget,
- &read_data_fsi[i],
+ &read_data[i],
op_size,
- DEVICE_FSISCOM_ADDRESS(addrs[i]) );
+ DEVICE_IBSCOM_ADDRESS(addrs[i]) );
if( l_err )
{
- TRACFCOMP(g_trac_ibscom, "IBscomTest::test_IBscom> Read: Error from device : addr=0x%X, RC=%X",
+ TRACFCOMP(g_trac_ibscom,"IBscomTest::test_IBscom> Read: Error from device : addr=0x%X, RC=%X",
addrs[i], l_err->reasonCode() );
- TS_FAIL( "test_IBscom> ERROR : Error log from FSI Read" );
+ TS_FAIL( "test_IBscom> ERROR : Error log from IBSCOM read" );
fails++;
errlCommit(l_err,IBSCOM_COMP_ID);
}
- if(new_data[i] != read_data_fsi[i])
+ if(new_data2[i] != read_data[i])
{
- TS_FAIL( "test_IBscom> ERROR : Data miss-match on FSI read-back check." );
+ TS_FAIL( "test_IBscom> ERROR : Data miss-match on IBSCOM read-back check." );
- TRACFCOMP(g_trac_ibscom, "IBscomTest::test_IBscom>ERROR: addr=0x%.8x: Write data=0x%.16X, FSI read data=0x%.16X",
- addrs[i], new_data[i], read_data_fsi[i]);
+ TRACFCOMP(g_trac_ibscom,"IBscomTest::test_IBscom>ERROR: addr=0x%.8x: Write data=0x%.16X, IBSCOM read data=0x%.16X",
+ addrs[i], new_data1[i], read_data[i]);
fails++;
}
+
total++;
}
@@ -237,7 +312,146 @@ class IBscomTest: public CxxTest::TestSuite
//TODO RTC: 72594: Add error path test cases when simics support
//is available
+ void test_IBscom_error(void)
+ {
+ TRACFCOMP(g_trac_ibscom,"Skipping test_IBscom_error because Simics is broken");
+ return;
+
+ uint64_t fails = 0;
+ uint64_t total = 0;
+ errlHndl_t l_err = NULL;
+
+ TARGETING::Target* l_testTarget = NULL;
+
+ // Target: Find a Centaur on the Master processor
+ TARGETING::Target* l_procTarget = NULL;
+ TARGETING::targetService().masterProcChipTargetHandle(l_procTarget);
+ assert(l_procTarget != NULL);
+
+ TARGETING::PredicateCTM l_cent(TARGETING::CLASS_CHIP,
+ TARGETING::TYPE_MEMBUF,
+ TARGETING::MODEL_NA);
+ TARGETING::PredicatePostfixExpr cent_query;
+ cent_query.push(&l_cent);
+
+
+ TARGETING::TargetHandleList centaur_list;
+ TARGETING::targetService().
+ getAssociated(centaur_list,
+ l_procTarget,
+ TARGETING::TargetService::CHILD_BY_AFFINITY,
+ TARGETING::TargetService::ALL,
+ &cent_query);
+
+ if( centaur_list.size() < 1 )
+ {
+ TS_FAIL( "test_IBscom_error> ERROR : Unable to find a Centaur chip" );
+ return;
+ }
+ l_testTarget = *(centaur_list.begin());
+
+ ScomSwitches l_switches = l_testTarget->getAttr<ATTR_SCOM_SWITCHES>();
+ if( !l_switches.useInbandScom )
+ {
+ TRACFCOMP(g_trac_ibscom,"Skipping test_IBscom_error because ibscom is not enabled");
+ return;
+ }
+
+
+ TRACDCOMP(g_trac_ibscom, "IBscomTest::test_IBscom_error> Read orignal data from Centaur");
+
+
+ uint64_t new_data = 0x1234567ABABABA00;
+ uint64_t read_data = 0;
+ size_t op_size = sizeof(uint64_t);
+
+ //Write a bad address
+ uint64_t bad_addr = 0x02123456;
+ TRACFCOMP(g_trac_ibscom,"IBscomTest::test_IBscom_error> Write bad address 0x%.16X", bad_addr);
+ op_size = sizeof(uint64_t);
+ l_err = deviceOp( DeviceFW::WRITE,
+ l_testTarget,
+ &new_data,
+ op_size,
+ DEVICE_IBSCOM_ADDRESS(bad_addr) );
+ if( !l_err )
+ {
+ TRACFCOMP(g_trac_ibscom,"IBscomTest::test_IBscom_error> No error on bad address write" );
+ TS_FAIL( "ScomTest::test_IBscom_error> No error on bad address write" );
+ fails++;
+ }
+ else
+ {
+ delete l_err;
+ }
+ total++;
+ nanosleep( 0, 1000000 ); //sleep for 1ms
+
+ //Verify ibscom is still enabled, i.e. we didn't think it was a bus fail
+ l_switches = l_testTarget->getAttr<ATTR_SCOM_SWITCHES>();
+ total++;
+ if( !l_switches.useInbandScom )
+ {
+ TS_FAIL( "ScomTest::test_IBscom_error> IBSCOM was wrongly disabled after bad address write" );
+ fails++;
+ }
+
+ //Read a bad address
+ bad_addr = 0x02876543;
+ TRACFCOMP(g_trac_ibscom,"IBscomTest::test_IBscom_error> Write bad address 0x%.16X", bad_addr);
+ op_size = sizeof(uint64_t);
+ l_err = deviceOp( DeviceFW::READ,
+ l_testTarget,
+ &read_data,
+ op_size,
+ DEVICE_IBSCOM_ADDRESS(bad_addr) );
+ if( !l_err )
+ {
+ TRACFCOMP(g_trac_ibscom,"IBscomTest::test_IBscom_error> No error on bad address read" );
+ TS_FAIL( "ScomTest::test_IBscom_error> No error on bad address read" );
+ fails++;
+ }
+ else
+ {
+ delete l_err;
+ }
+ total++;
+
+ //Verify ibscom is still enabled, i.e. we didn't think it was a bus fail
+ l_switches = l_testTarget->getAttr<ATTR_SCOM_SWITCHES>();
+ total++;
+ if( !l_switches.useInbandScom )
+ {
+ TS_FAIL( "ScomTest::test_IBscom_error> IBSCOM was wrongly disabled after bad address read" );
+ fails++;
+ }
+ //Read a good address to prove things still work
+ TRACFCOMP(g_trac_ibscom,"IBscomTest::test_IBscom_error> Read good address 0x%.16X", 0x02010803);
+ op_size = sizeof(uint64_t);
+ l_err = deviceOp( DeviceFW::READ,
+ l_testTarget,
+ &read_data,
+ op_size,
+ DEVICE_IBSCOM_ADDRESS(0x02010803) );
+ if( l_err )
+ {
+ TRACFCOMP(g_trac_ibscom,"IBscomTest::test_IBscom_error> Error on read after fail" );
+ TS_FAIL( "ScomTest::test_IBscom_error> Error on read after fail" );
+ errlCommit(l_err,IBSCOM_COMP_ID);
+ fails++;
+ }
+ total++;
+
+ TS_TRACE("test_IBscom_error runs successfully!");
+ TRACFCOMP(g_trac_ibscom,
+ "IBscomTest::test_IBscom_error> %d/%d fails",
+ fails, total );
+
+ //TS_FAIL("FORCING ERROR TO STOP IPL");
+
+ return;
+ }
};
#endif
diff --git a/src/usr/scom/scom.C b/src/usr/scom/scom.C
index 7b5ecaf27..b15f72406 100644
--- a/src/usr/scom/scom.C
+++ b/src/usr/scom/scom.C
@@ -5,7 +5,7 @@
/* */
/* IBM CONFIDENTIAL */
/* */
-/* COPYRIGHT International Business Machines Corp. 2011,2012 */
+/* COPYRIGHT International Business Machines Corp. 2011,2013 */
/* */
/* p1 */
/* */
@@ -36,12 +36,13 @@
#include <errl/errlmanager.H>
#include "scom.H"
#include <scom/scomreasoncodes.H>
+#include <ibscom/ibscomreasoncodes.H>
#include <sys/time.h>
// Trace definition
trace_desc_t* g_trac_scom = NULL;
-TRAC_INIT(&g_trac_scom, "SCOM", KILOBYTE, TRACE::BUFFER_SLOW); //1K
+TRAC_INIT(&g_trac_scom, SCOM_COMP_NAME, KILOBYTE, TRACE::BUFFER_SLOW); //1K
namespace SCOM
@@ -102,7 +103,6 @@ errlHndl_t checkIndirectAndDoScom(DeviceFW::OperationType i_opType,
bool l_indScomError = false;
uint64_t temp_io_buffer = 0;
- //@todo - determine hwhat an appropriate timeout value
enum { MAX_INDSCOM_TIMEOUT_NS = 100000 }; //=.1ms
// If the indirect scom bit is 0, then doing a regular scom
@@ -213,8 +213,7 @@ errlHndl_t checkIndirectAndDoScom(DeviceFW::OperationType i_opType,
break;
}
- //TODO tmp remove for VPO, need better polling strategy -- RTC43738
- //nanosleep( 0, 10000 ); //sleep for 10,000 ns
+ nanosleep( 0, 10000 ); //sleep for 10,000 ns
elapsed_indScom_time_ns += 10000;
}while ( elapsed_indScom_time_ns <= MAX_INDSCOM_TIMEOUT_NS);
@@ -336,8 +335,7 @@ errlHndl_t checkIndirectAndDoScom(DeviceFW::OperationType i_opType,
}
- //TODO tmp remove for VPO, need better polling strategy -- RTC43738
- //nanosleep( 0, 10000 ); //sleep for 10,000 ns
+ nanosleep( 0, 10000 ); //sleep for 10,000 ns
elapsed_indScom_time_ns += 10000;
}while ( elapsed_indScom_time_ns <= MAX_INDSCOM_TIMEOUT_NS);
@@ -464,6 +462,19 @@ errlHndl_t doScomOp(DeviceFW::OperationType i_opType,
}while(0);
+ //Look for special retry codes
+ if( l_err
+ && (0xFFFFFFFF != i_accessType)
+ && (l_err->reasonCode() == IBSCOM::IBSCOM_RETRY_DUE_TO_ERROR) )
+ {
+ delete l_err;
+ TRACFCOMP(g_trac_scom, "Forcing retry of Scom to %.16X on %.8X", i_addr, TARGETING::get_huid(i_target));
+ // use the unused i_accessType parameter to avoid an infinite recursion
+ int64_t accessType_flag = 0xFFFFFFFF;
+ l_err = doScomOp( i_opType, i_target, io_buffer,
+ io_buflen, accessType_flag, i_addr );
+ }
+
return l_err;
}
diff --git a/src/usr/testcore/rtloader/loader.H b/src/usr/testcore/rtloader/loader.H
index ea36f1126..03e8e5f39 100644
--- a/src/usr/testcore/rtloader/loader.H
+++ b/src/usr/testcore/rtloader/loader.H
@@ -32,6 +32,7 @@
#include <errl/errlmanager.H>
#include <util/utillidmgr.H>
#include <map>
+#include <sys/time.h>
#include <runtime/interface.h>
#include <vpd/vpd_if.H>
@@ -105,6 +106,7 @@ class RuntimeLoaderTest : public CxxTest::TestSuite
intf->malloc = malloc;
intf->free = free;
intf->realloc = realloc;
+ intf->sleep = nanosleep;
intf->assert = rt_assert;
intf->sendErrorLog = rt_logErr;
intf->scom_read = rt_scom_read;
diff --git a/src/usr/xscom/piberror.C b/src/usr/xscom/piberror.C
index 366fd2579..0ffe42a2b 100644
--- a/src/usr/xscom/piberror.C
+++ b/src/usr/xscom/piberror.C
@@ -53,6 +53,15 @@ void addFruCallouts(TARGETING::Target* i_target,
switch (i_pibErrStatus)
{
case PIB::PIB_CHIPLET_OFFLINE:
+ //Offline should just be a code bug, but it seems that there are
+ // cases where bad hardware can also cause this problem
+ io_errl->addProcedureCallout(HWAS::EPUB_PRC_HB_CODE,
+ HWAS::SRCI_PRIORITY_HIGH);
+ io_errl->addHwCallout( i_target,
+ HWAS::SRCI_PRIORITY_LOW,
+ HWAS::NO_DECONFIG,
+ HWAS::GARD_NULL );
+ break;
case PIB::PIB_PARTIAL_GOOD:
case PIB::PIB_INVALID_ADDRESS:
io_errl->addProcedureCallout(HWAS::EPUB_PRC_HB_CODE,
OpenPOWER on IntegriCloud