summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDan Crowell <dcrowell@us.ibm.com>2013-11-13 09:32:01 -0600
committerA. Patrick Williams III <iawillia@us.ibm.com>2013-12-12 16:27:50 -0600
commit948809ead9262fe86e78e303f3e5d1a19388f7ee (patch)
treef96f47384b95e865c9a324747f687f7cc0ca8d59
parent07c9716fcf31ce5352ce412a643071ea89bd2ca1 (diff)
downloadtalos-hostboot-948809ead9262fe86e78e303f3e5d1a19388f7ee.tar.gz
talos-hostboot-948809ead9262fe86e78e303f3e5d1a19388f7ee.zip
IBSCOM Enablement and Error Handling
Enable IBSCOM for Centaur DD2.x chips Add appropriate error handling Flip scom to FSI after IBSCOM fail Enable reconfig loop cleanup Code verified on hardware Change-Id: I394789b900e5779dded78dd5fbcc9b9257e856fb RTC: 69115 Reviewed-on: http://gfw160.aus.stglabs.ibm.com:8080/gerrit/7410 Tested-by: Jenkins Server Reviewed-by: Michael Baiocchi <baiocchi@us.ibm.com> Reviewed-by: ADAM R. MUHLE <armuhle@us.ibm.com> Reviewed-by: A. Patrick Williams III <iawillia@us.ibm.com>
-rwxr-xr-xsrc/build/tools/listdeps.pl1
-rw-r--r--src/include/runtime/interface.h2
-rw-r--r--src/include/stdlib.h2
-rw-r--r--src/include/usr/ibscom/ibscomif.H45
-rw-r--r--src/include/usr/ibscom/ibscomreasoncodes.H2
-rw-r--r--src/include/usr/xscom/piberror.H16
-rw-r--r--src/runtime/rt_stdlib.C6
-rw-r--r--src/usr/hwas/hostbootIstep.C6
-rw-r--r--src/usr/hwpf/hwp/dmi_training/dmi_training.C45
-rwxr-xr-xsrc/usr/i2c/i2c.C2
-rw-r--r--src/usr/ibscom/ibscom.C475
-rw-r--r--src/usr/ibscom/ibscom.H18
-rw-r--r--src/usr/ibscom/test/ibscomtest.H272
-rw-r--r--src/usr/scom/scom.C25
-rw-r--r--src/usr/testcore/rtloader/loader.H2
-rw-r--r--src/usr/xscom/piberror.C9
16 files changed, 735 insertions, 193 deletions
diff --git a/src/build/tools/listdeps.pl b/src/build/tools/listdeps.pl
index 2fc1df199..29087c828 100755
--- a/src/build/tools/listdeps.pl
+++ b/src/build/tools/listdeps.pl
@@ -201,6 +201,7 @@ my %resident_modules = (
"libattn.so" => '1',
"libi2c.so" => '1',
"libutil.so" => '1',
+ "libibscom.so" => '1',
);
diff --git a/src/include/runtime/interface.h b/src/include/runtime/interface.h
index 0b7382b61..dfe6a93d6 100644
--- a/src/include/runtime/interface.h
+++ b/src/include/runtime/interface.h
@@ -58,6 +58,8 @@ typedef struct hostInterfaces
void (*free)(void*);
/** realloc */
void* (*realloc)(void*, size_t);
+ /** sleep */
+ void (*sleep)(uint64_t sec, uint64_t nsec);
/** sendErrorLog
* @param[in] plid Platform Log identifier
diff --git a/src/include/stdlib.h b/src/include/stdlib.h
index 36d2c147c..c2b0615d8 100644
--- a/src/include/stdlib.h
+++ b/src/include/stdlib.h
@@ -5,7 +5,7 @@
/* */
/* IBM CONFIDENTIAL */
/* */
-/* COPYRIGHT International Business Machines Corp. 2010,2012 */
+/* COPYRIGHT International Business Machines Corp. 2010,2013 */
/* */
/* p1 */
/* */
diff --git a/src/include/usr/ibscom/ibscomif.H b/src/include/usr/ibscom/ibscomif.H
new file mode 100644
index 000000000..65304acc6
--- /dev/null
+++ b/src/include/usr/ibscom/ibscomif.H
@@ -0,0 +1,45 @@
+/* IBM_PROLOG_BEGIN_TAG */
+/* This is an automatically generated prolog. */
+/* */
+/* $Source: src/include/usr/ibscom/ibscomif.H $ */
+/* */
+/* IBM CONFIDENTIAL */
+/* */
+/* COPYRIGHT International Business Machines Corp. 2013 */
+/* */
+/* p1 */
+/* */
+/* Object Code Only (OCO) source materials */
+/* Licensed Internal Code Source Materials */
+/* IBM HostBoot Licensed Internal Code */
+/* */
+/* The source code for this program is not published or otherwise */
+/* divested of its trade secrets, irrespective of what has been */
+/* deposited with the U.S. Copyright Office. */
+/* */
+/* Origin: 30 */
+/* */
+/* IBM_PROLOG_END_TAG */
+#ifndef __IBSCOM_IBSCOMIF_H
+#define __IBSCOM_IBSCOMIF_H
+
+#include <stdint.h>
+#include <builtins.h>
+#include <errl/errlentry.H>
+
+namespace IBSCOM
+{
+
+#define IBSCOM_DISABLE true
+#define IBSCOM_ENABLE false
+
+/**
+ * @brief Enable or disable Inband SCOMs on all capable chips
+ *
+ * @param i_disable true: disable inband scoms
+ */
+void enableInbandScoms( bool i_disable = IBSCOM_ENABLE );
+
+}
+
+#endif
diff --git a/src/include/usr/ibscom/ibscomreasoncodes.H b/src/include/usr/ibscom/ibscomreasoncodes.H
index 2990cc56b..39927db41 100644
--- a/src/include/usr/ibscom/ibscomreasoncodes.H
+++ b/src/include/usr/ibscom/ibscomreasoncodes.H
@@ -46,6 +46,8 @@ namespace IBSCOM
IBSCOM_BUS_FAILURE = IBSCOM_COMP_ID | 0x06,
IBSCOM_INVALID_ADDRESS = IBSCOM_COMP_ID | 0x07,
IBSCOM_PIB_FAILURE = IBSCOM_COMP_ID | 0x08,
+ IBSCOM_WRONG_ERROR = IBSCOM_COMP_ID | 0x09,
+ IBSCOM_RETRY_DUE_TO_ERROR = IBSCOM_COMP_ID | 0x0A,
};
};
diff --git a/src/include/usr/xscom/piberror.H b/src/include/usr/xscom/piberror.H
index da5ee08e1..73512e961 100644
--- a/src/include/usr/xscom/piberror.H
+++ b/src/include/usr/xscom/piberror.H
@@ -33,14 +33,14 @@ namespace PIB
*/
enum
{
- PIB_NO_ERROR = 0,//0x00000000,
- PIB_RESOURCE_OCCUPIED = 1,//0x00000001,
- PIB_CHIPLET_OFFLINE = 2,//0x00000010,
- PIB_PARTIAL_GOOD = 3,//0x00000011,
- PIB_INVALID_ADDRESS = 4,//0x00000100,
- PIB_CLOCK_ERROR = 5,//0x00000101,
- PIB_PARITY_ERROR = 6,//0x00000110,
- PIB_TIMEOUT = 7,//0x00000111,
+ PIB_NO_ERROR = 0,//0b000,
+ PIB_RESOURCE_OCCUPIED = 1,//0b001,
+ PIB_CHIPLET_OFFLINE = 2,//0b010,
+ PIB_PARTIAL_GOOD = 3,//0b011,
+ PIB_INVALID_ADDRESS = 4,//0b100,
+ PIB_CLOCK_ERROR = 5,//0b101,
+ PIB_PARITY_ERROR = 6,//0b110,
+ PIB_TIMEOUT = 7,//0b111,
};
diff --git a/src/runtime/rt_stdlib.C b/src/runtime/rt_stdlib.C
index ec92badc6..ba1bc347a 100644
--- a/src/runtime/rt_stdlib.C
+++ b/src/runtime/rt_stdlib.C
@@ -23,6 +23,7 @@
#include <stdlib.h>
#include <runtime/interface.h>
#include <string.h>
+#include <sys/time.h>
void* malloc(size_t s)
{
@@ -54,3 +55,8 @@ void* calloc(size_t num, size_t size)
return mem;
}
+
+void nanosleep( uint64_t sec, uint64_t nsec )
+{
+ g_hostInterfaces->sleep(sec,nsec);
+}
diff --git a/src/usr/hwas/hostbootIstep.C b/src/usr/hwas/hostbootIstep.C
index 8e10c5de4..adeec948b 100644
--- a/src/usr/hwas/hostbootIstep.C
+++ b/src/usr/hwas/hostbootIstep.C
@@ -41,6 +41,7 @@
#include <targeting/attrsync.H>
#include <diag/prdf/prdfMain.H>
#include <intr/interrupt.H>
+#include <ibscom/ibscomif.H>
namespace HWAS
{
@@ -231,6 +232,9 @@ void* host_prd_hwreconfig( void *io_pArgs )
errlHndl_t errl = NULL;
+ // Flip the scom path back to FSI in case we enabled IBSCOM previously
+ IBSCOM::enableInbandScoms(IBSCOM_DISABLE);
+
// Call PRDF to remove non-function chips from its system model
errl = PRDF::refresh();
@@ -239,7 +243,7 @@ void* host_prd_hwreconfig( void *io_pArgs )
TRACFCOMP(ISTEPS_TRACE::g_trac_isteps_trace,
"host_prd_hwreconfig ERROR 0x%.8X returned from"
" call to PRDF::refresh", errl->reasonCode());
- }
+ }
TRACDCOMP( ISTEPS_TRACE::g_trac_isteps_trace,
"host_prd_hwreconfig exit" );
diff --git a/src/usr/hwpf/hwp/dmi_training/dmi_training.C b/src/usr/hwpf/hwp/dmi_training/dmi_training.C
index cc680cb12..64c08030e 100644
--- a/src/usr/hwpf/hwp/dmi_training/dmi_training.C
+++ b/src/usr/hwpf/hwp/dmi_training/dmi_training.C
@@ -75,6 +75,7 @@
#include <erepairAccessorHwpFuncs.H>
#include "dmi_io_dccal/dmi_io_dccal.H"
#include <pbusLinkSvc.H>
+#include <ibscom/ibscomif.H>
namespace DMI_TRAINING
{
@@ -1244,49 +1245,7 @@ void* call_cen_set_inband_addr( void *io_pArgs )
}
//Now enable Inband SCOM for all membuf chips.
- TARGETING::TargetHandleList membufChips;
- getAllChips(membufChips, TYPE_MEMBUF, true);
-
- TARGETING::Target * sys = NULL;
- TARGETING::targetService().getTopLevelTarget(sys);
-
- for(uint32_t i=0; i<membufChips.size(); i++)
- {
- // If the membuf chip supports IBSCOM AND..
- // (Chip is >=DD20 OR IBSCOM Override is set)
- if ((membufChips[i]->getAttr<ATTR_PRIMARY_CAPABILITIES>()
- .supportsInbandScom) &&
- (// TODO: RTC 68984: Disable IBSCOM for now (membufChips[i]->getAttr<TARGETING::ATTR_EC>() >= 0x20) ||
- (sys->getAttr<TARGETING::ATTR_IBSCOM_ENABLE_OVERRIDE>() != 0))
- )
- {
- ScomSwitches l_switches =
- membufChips[i]->getAttr<ATTR_SCOM_SWITCHES>();
-
- // If Inband Scom is not already enabled.
- if ((l_switches.useInbandScom != 1) ||
- (l_switches.useFsiScom != 0))
- {
- l_switches.useFsiScom = 0;
- l_switches.useInbandScom = 1;
-
- // Turn off FSI scom and turn on Inband Scom.
- membufChips[i]->setAttr<ATTR_SCOM_SWITCHES>(l_switches);
-
- TRACFCOMP(ISTEPS_TRACE::g_trac_isteps_trace,
- "Enable IBSCOM on target HUID %.8X",
- TARGETING::get_huid(membufChips[i]));
- }
- }
- else
- {
- TRACFCOMP(ISTEPS_TRACE::g_trac_isteps_trace,
- "IBSCOM NOT enabled on target HUID %.8X",
- TARGETING::get_huid(membufChips[i]));
-
- }
- }
-
+ IBSCOM::enableInbandScoms();
}while(0);
TRACDCOMP( ISTEPS_TRACE::g_trac_isteps_trace,
diff --git a/src/usr/i2c/i2c.C b/src/usr/i2c/i2c.C
index 5dbbf892f..8c2656f18 100755
--- a/src/usr/i2c/i2c.C
+++ b/src/usr/i2c/i2c.C
@@ -1383,6 +1383,7 @@ errlHndl_t i2cSetupMasters ( void )
mode.bit_rate_div = io_args.bit_rate_divisor;
+ size = sizeof(uint64_t);
err = deviceWrite( centList[centaur],
&mode.value,
size,
@@ -1490,6 +1491,7 @@ errlHndl_t i2cSetupMasters ( void )
mode.bit_rate_div = io_args.bit_rate_divisor;
+ size = sizeof(uint64_t);
err = deviceWrite( procList[proc],
&mode.value,
size,
diff --git a/src/usr/ibscom/ibscom.C b/src/usr/ibscom/ibscom.C
index 874f8f98c..abbc3f3c7 100644
--- a/src/usr/ibscom/ibscom.C
+++ b/src/usr/ibscom/ibscom.C
@@ -41,6 +41,9 @@
#include <limits.h>
#include <errl/errludtarget.H>
#include <xscom/piberror.H>
+#include <diag/attn/attn.H>
+#include <ibscom/ibscomif.H>
+#include <targeting/common/utilFilter.H>
// Easy macro replace for unit testing
//#define TRACUCOMP(args...) TRACFCOMP(args)
@@ -48,13 +51,16 @@
// Trace definition
trace_desc_t* g_trac_ibscom = NULL;
-TRAC_INIT(&g_trac_ibscom, "IBSCOM", KILOBYTE);
+TRAC_INIT(&g_trac_ibscom, IBSCOM_COMP_NAME, KILOBYTE);
using namespace ERRORLOG;
using namespace TARGETING;
namespace IBSCOM
{
+// SCOM Register addresses
+const uint32_t MBS_FIR = 0x02011400;
+const uint32_t MBSIBERR0 = 0x0201141B;
// Register XSCcom access functions to DD framework
DEVICE_REGISTER_ROUTE(DeviceFW::WILDCARD,
@@ -296,6 +302,109 @@ errlHndl_t getTargetVirtualAddress(Target* i_target,
return l_err;
}
+///////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////
+void err_cleanup(Target* i_target,
+ uint64_t i_addr)
+{
+ //Going to commit at most 1 informational error here
+ errlHndl_t l_err = NULL;
+ errlHndl_t tmp_err = NULL;
+ ERRORLOG::ErrlUserDetailsLogRegister l_logReg(i_target);
+
+ uint64_t zeroData = 0x0;
+ size_t op_size = sizeof(uint64_t);
+
+ // Clear our the status reg
+ op_size = sizeof(uint64_t);
+ tmp_err = deviceOp( DeviceFW::WRITE,
+ i_target,
+ &zeroData,
+ op_size,
+ DEVICE_FSISCOM_ADDRESS(MBSIBERR0) );
+ if(tmp_err)
+ {
+ if( l_err )
+ {
+ delete tmp_err;
+ }
+ else
+ {
+ l_err = tmp_err;
+ }
+
+ //Really just want to save the address, so stick in some
+ //obvious dummy data
+ uint64_t dummyData = 0x00000000DEADBEEF;
+ l_logReg.addDataBuffer(&dummyData, sizeof(dummyData),
+ DEVICE_IBSCOM_ADDRESS(MBSIBERR0));
+ }
+
+ // Clear out the FIR bits we might trigger
+ uint64_t mbs_fir = 0;
+ op_size = sizeof(uint64_t);
+ tmp_err = deviceOp( DeviceFW::READ,
+ i_target,
+ &mbs_fir,
+ op_size,
+ DEVICE_FSISCOM_ADDRESS(MBS_FIR) );
+ if(tmp_err)
+ {
+ if( l_err )
+ {
+ delete tmp_err;
+ }
+ else
+ {
+ l_err = tmp_err;
+ }
+
+ //Really just want to save the address, so stick in some
+ //obvious dummy data
+ uint64_t dummyData = 0x10000000DEADBEEF;
+ l_logReg.addDataBuffer(&dummyData, sizeof(dummyData),
+ DEVICE_IBSCOM_ADDRESS(MBS_FIR));
+ }
+
+ //22=MBS_FIR_MASK_REG_HOST_INBAND_READ_ERROR
+ //23=MBS_FIR_MASK_REG_HOST_INBAND_WRITE_ERROR
+ mbs_fir &= 0xFFFFFCFFFFFFFFFF;
+ op_size = sizeof(uint64_t);
+ l_err = deviceOp( DeviceFW::WRITE,
+ i_target,
+ &mbs_fir,
+ op_size,
+ DEVICE_FSISCOM_ADDRESS(MBS_FIR) );
+ if(tmp_err)
+ {
+ if( l_err )
+ {
+ delete tmp_err;
+ }
+ else
+ {
+ l_err = tmp_err;
+ }
+
+ //Really just want to save the address, so stick in some
+ //obvious dummy data
+ uint64_t dummyData = 0x20000000DEADBEEF;
+ l_logReg.addDataBuffer(&dummyData, sizeof(dummyData),
+ DEVICE_IBSCOM_ADDRESS(MBS_FIR));
+ }
+
+ if( l_err )
+ {
+ l_logReg.addToLog(l_err);
+
+ //force to informational so we don't log extra errors
+ //inside of possible error collection paths
+ l_err->setSev(ERRORLOG::ERRL_SEV_INFORMATIONAL);
+ errlCommit(l_err,IBSCOM_COMP_ID);
+ l_err = NULL;
+ }
+}
+
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
@@ -351,6 +460,35 @@ errlHndl_t doIBScom(DeviceFW::OperationType i_opType,
l_mutex = i_target->getHbMutexAttr<TARGETING::ATTR_IBSCOM_MUTEX>();
mutex_lock(l_mutex);
need_unlock = true;
+
+ //Need to check if ibscom is still enabled before moving on in
+ //case we flipped the switch due to an error
+ ScomSwitches l_switches = i_target->getAttr<ATTR_SCOM_SWITCHES>();
+ if( !l_switches.useInbandScom )
+ {
+ TRACFCOMP(g_trac_ibscom, ERR_MRK"doIBScom> IBSCOM longer enabled on %.8X, error must have occurred", get_huid(i_target));
+ /*@
+ * @errortype
+ * @moduleid IBSCOM_DO_IBSCOM
+ * @reasoncode IBSCOM_RETRY_DUE_TO_ERROR
+ * @userdata1[0:31] HUID of Centaur Target
+ * @userdata1[32:64] SCOM Address
+ * @userdata2 Not Used
+ * @devdesc Previous error disabled ibscom, so forcing
+ * a retry via FSI
+ */
+ l_err =
+ new ErrlEntry(ERRL_SEV_UNRECOVERABLE,
+ IBSCOM_DO_IBSCOM,
+ IBSCOM_RETRY_DUE_TO_ERROR,
+ get_huid(i_target),
+ i_addr);
+ //This error should NEVER get returned to caller, so it's a
+ //FW bug if it actually gets comitted.
+ l_err->addProcedureCallout(HWAS::EPUB_PRC_HB_CODE,
+ HWAS::SRCI_PRIORITY_HIGH);
+ break;
+ }
}
if (i_opType == DeviceFW::READ)
@@ -391,6 +529,8 @@ errlHndl_t doIBScom(DeviceFW::OperationType i_opType,
//FW bug if it actually gets comitted.
l_err->addProcedureCallout(HWAS::EPUB_PRC_HB_CODE,
HWAS::SRCI_PRIORITY_HIGH);
+ ERRORLOG::ErrlUserDetailsTarget(i_target,"IBSCOM Target")
+ .addToLog(l_err);
break;
}
else
@@ -416,20 +556,19 @@ errlHndl_t doIBScom(DeviceFW::OperationType i_opType,
l_virtAddr[i_addr] = l_data;
eieio();
+ //Workaround for HW264203
+ //A read of MBSIBWRSTAT will not trigger a SUE so we need to
+ //read the MBS_FIR instead.
TRACDCOMP(g_trac_ibscom,
- "doIBScom: Read MBSIBWRSTAT to check for error");
- //Read MBSIBWRSTAT to check for errors
- //If an error occured on last write, reading MBSIBWRSTAT will
- //trigger a SUE.
- const uint32_t MBSIBWRSTAT = 0x201141D;
- uint64_t statData = 0;
+ "doIBScom: Read MBS_FIR to check for error");
+ uint64_t fir_data = 0;
size_t readSize = sizeof(uint64_t);
l_err = doIBScom(DeviceFW::READ,
- i_target,
- &statData,
- readSize,
- MBSIBWRSTAT,
- true);
+ i_target,
+ &fir_data,
+ readSize,
+ MBS_FIR,
+ true);
if(l_err != NULL)
{
if( IBSCOM_SUE_IN_ERR_PATH == l_err->reasonCode() )
@@ -446,103 +585,122 @@ errlHndl_t doIBScom(DeviceFW::OperationType i_opType,
break;
}
}
+ else
+ {
+ TRACUCOMP(g_trac_ibscom, "doIBScom: MBS_FIR=%.16X",fir_data);
+ //check the FIR bits specifically
+ //23 = MBS_FIR_MASK_REG_HOST_INBAND_WRITE_ERROR: A PIB error
+ // or inband buffer error was detected on a host inband
+ // write operation.
+ if( fir_data & 0x0000010000000000 )
+ {
+ TRACFCOMP(g_trac_ibscom, ERR_MRK" doIBScom: MBS_FIR[23] detected after write : %.16X", fir_data);
+ rw_error = true;
+ }
+ }
}
+ // Common error checking for both read and write
if(rw_error)
{
bool busDown = false;
TRACUCOMP(g_trac_ibscom,
"doIBScom: Get Error data, read MBSIBERR0");
- const uint32_t MBSIBERR0 = 0x201141B;
- const uint64_t HOST_ERROR_VALID = 0x0000000080000000;
- const uint64_t PIB_ERROR_STATUS_MASK = 0x0000000070000000;
- const uint64_t PIB_ERROR_SHIFT = 28;
- size_t readSize = sizeof(uint64_t);
- uint64_t mbsiberr0_data = 0;
+ size_t op_size = sizeof(uint64_t);
+
+ // Note: Using FSISCOM path to read the errors even though
+ // we could use IBSCOM in DD2 because it makes code simpler
+
+ MBSIBERRO_Reg_t mbsiberr0;
+ op_size = sizeof(uint64_t);
+ l_err = deviceOp( DeviceFW::READ,
+ i_target,
+ &(mbsiberr0.data),
+ op_size,
+ DEVICE_FSISCOM_ADDRESS(MBSIBERR0) );
+ if(l_err)
+ {
+ TRACFCOMP(g_trac_ibscom, ERR_MRK
+ "doIBScom: Error reading MBSIBERR0 over FSI");
+ //Save away the IBSCOM address
+ ERRORLOG::ErrlUserDetailsLogRegister l_logReg(i_target);
+ //Really just want to save the address, so stick in some
+ //obvious dummy data
+ uint64_t dummyData = 0x30000000DEADBEEF;
+ l_logReg.addDataBuffer(&dummyData, sizeof(dummyData),
+ DEVICE_IBSCOM_ADDRESS(i_addr));
+ l_logReg.addToLog(l_err);
+
+ //force to informational so we don't log extra errors
+ //inside of possible error collection paths
+ l_err->setSev(ERRORLOG::ERRL_SEV_INFORMATIONAL);
+ errlCommit(l_err,IBSCOM_COMP_ID);
+ l_err = NULL;
+
+ //fabricate some error data
+ mbsiberr0.addr = i_addr;
+ mbsiberr0.errvalid = 1;
+ mbsiberr0.piberr = 0;
+ mbsiberr0.iswrite = (i_opType == DeviceFW::READ) ? 0 : 1;
+ mbsiberr0.reserved = 0xBADBAD;
+ }
+
+ TRACUCOMP(g_trac_ibscom,
+ "doIBScom: MBSIBERR0(0x%.16x) = 0x%.16X",
+ MBSIBERR0, mbsiberr0.data);
- //Use FSISCOM as workaround for DD1.x centaur chips (HW246298)
- if(i_target->getAttr<TARGETING::ATTR_EC>() < 0x20)
+ //if the MBSIBERR0Q_IB_HOST_ERROR_VALID bit is not set
+ // then we have a bus failure
+ if( !(mbsiberr0.errvalid) )
{
- //Need to explicitly use FSI SCOM in DD1X chips
- l_err = deviceOp( DeviceFW::READ,
- i_target,
- &mbsiberr0_data,
- readSize,
- DEVICE_FSISCOM_ADDRESS(MBSIBERR0) );
- if(l_err)
- {
- TRACFCOMP(g_trac_ibscom, ERR_MRK
- "doIBScom: Error reading MBSIBERR0 over FSI");
- //Save away the IBSCOM address
- ERRORLOG::ErrlUserDetailsLogRegister
- l_logReg(i_target);
- //Really just want to save the addres, so stick in some
- //obvious dummy data
- uint64_t dummyData = 0x00000000DEADBEEF;
- l_logReg.addDataBuffer(&dummyData, sizeof(dummyData),
- DEVICE_IBSCOM_ADDRESS(i_addr));
- l_logReg.addToLog(l_err);
- break;
- }
- TRACUCOMP(g_trac_ibscom,
- "doIBScom: MBSIBERR0(0x%.16x) = 0x%.16X",
- MBSIBERR0, mbsiberr0_data);
+ //Bus is down
+ busDown = true;
+ }
+ //confirm that we are looking at error data for the scom we did
+ //0:31 = MBSIBERR0Q_IB_HOST_ADDRESS: This is the 32 bit scom
+ // address that was being accessed when the error was detected.
+ else if( mbsiberr0.addr != i_addr )
+ {
+ TRACFCOMP( g_trac_ibscom, "doIBScom> The address in MBSIBERR0 (0x%.8X) doesn't match what we were scomming (0x%.8X)", mbsiberr0.addr, i_addr );
+ /*@
+ * @errortype
+ * @moduleid IBSCOM_DO_IBSCOM
+ * @reasoncode IBSCOM_WRONG_ERROR
+ * @userdata1[0:31] HUID of Centaur Target
+ * @userdata1[32:64] SCOM Address
+ * @userdata2 Contents of MBSIBERR0 register
+ * @devdesc Detected error doesn't match the address
+ * we failed on
+ */
+ l_err = new ErrlEntry(ERRL_SEV_UNRECOVERABLE,
+ IBSCOM_DO_IBSCOM,
+ IBSCOM_WRONG_ERROR,
+ TWO_UINT32_TO_UINT64(
+ get_huid(i_target),
+ i_addr),
+ mbsiberr0.data);
+ // this would be a code bug because we got out of sync somehow
+ l_err->addProcedureCallout( HWAS::EPUB_PRC_HB_CODE,
+ HWAS::SRCI_PRIORITY_HIGH );
+ ERRORLOG::ErrlUserDetailsTarget(i_target,"IBSCOM Target")
+ .addToLog(l_err);
+ ERRORLOG::ErrlUserDetailsLogRegister ffdc(i_target);
+ ffdc.addData(DEVICE_FSISCOM_ADDRESS(MBS_FIR));
+ ffdc.addData(DEVICE_FSISCOM_ADDRESS(MBSIBERR0));
+ ffdc.addToLog(l_err);
+ l_err->collectTrace(IBSCOM_COMP_NAME);
//attempt to clear the error register so future accesses
//will work
- uint64_t zeroData = 0x0;
- readSize = sizeof(uint64_t);
- l_err = deviceOp( DeviceFW::WRITE,
- i_target,
- &zeroData,
- readSize,
- DEVICE_FSISCOM_ADDRESS(MBSIBERR0) );
- if(l_err )
- {
- errlCommit(l_err,IBSCOM_COMP_ID);
- l_err = NULL;
- }
+ err_cleanup(i_target,i_addr);
- //if the MBSIBERR0Q_IB_HOST_ERROR_VALID bit is not set
- // then we have a bus failure
- if( !(mbsiberr0_data & HOST_ERROR_VALID) )
- {
- //Bus is down
- busDown = true;
- }
+ break;
}
- else // >= DD20
- {
- //TODO RTC: 68984: Validate error path on DD2.0 Centaurs
- l_err = doIBScom(DeviceFW::READ,
- i_target,
- &mbsiberr0_data,
- readSize,
- MBSIBERR0,
- true);
- if(l_err != NULL)
- {
- if( IBSCOM_SUE_IN_ERR_PATH == l_err->reasonCode() )
- {
- TRACFCOMP(g_trac_ibscom, ERR_MRK
- "doIBScom: SUE on write detected");
- delete l_err;
- l_err = NULL;
- busDown = true;
- }
- else
- {
- TRACFCOMP(g_trac_ibscom, ERR_MRK"doIBScom: Unexpected error when checking for SUE");
- break;
- }
- }
- } // >= DD20
+
if(busDown)
{
- //TODO RTC: 69115 - call PRD to do FIR analysis, return PRD
- //error instead.
/*@
* @errortype
* @moduleid IBSCOM_DO_IBSCOM
@@ -553,19 +711,25 @@ errlHndl_t doIBScom(DeviceFW::OperationType i_opType,
* @devdesc Bus failure when attempting to perform
* IBSCOM operation. IBSCOM disabled.
*/
- l_err =
+ errlHndl_t ib_err =
new ErrlEntry(ERRL_SEV_UNRECOVERABLE,
IBSCOM_DO_IBSCOM,
IBSCOM_BUS_FAILURE,
TWO_UINT32_TO_UINT64(
get_huid(i_target),
i_addr),
- mbsiberr0_data);
+ mbsiberr0.data);
+
+ ib_err->addHwCallout(i_target,
+ HWAS::SRCI_PRIORITY_HIGH,
+ HWAS::NO_DECONFIG,
+ HWAS::GARD_NULL);
- l_err->addHwCallout(i_target,
- HWAS::SRCI_PRIORITY_HIGH,
- HWAS::NO_DECONFIG,
- HWAS::GARD_NULL);
+ //grab some HW regs via FSISCOM
+ ERRORLOG::ErrlUserDetailsLogRegister ffdc(i_target);
+ ffdc.addData(DEVICE_FSISCOM_ADDRESS(MBS_FIR));
+ ffdc.addData(DEVICE_FSISCOM_ADDRESS(MBSIBERR0));
+ ffdc.addToLog(l_err);
//disable IBSCOM
ScomSwitches l_switches =
@@ -581,6 +745,33 @@ errlHndl_t doIBScom(DeviceFW::OperationType i_opType,
// Turn off IBSCOM and turn on FSI SCOM.
i_target->setAttr<ATTR_SCOM_SWITCHES>(l_switches);
}
+
+ //@todo: RTC:92971
+ //There is a potential deadlock if we call PRD here
+ //Look for a better PRD error
+ //errlHndl_t prd_err = ATTN::checkForIplAttentions();
+ errlHndl_t prd_err = NULL;
+ if( prd_err )
+ {
+ TRACFCOMP( g_trac_ibscom, ERR_MRK"Error from checkForIplAttentions : PLID=%X", prd_err->plid() );
+ //connect up the plids
+ ib_err->plid(prd_err->plid());
+ //commit my log as info because PRD's log is better
+ ib_err->setSev(ERRORLOG::ERRL_SEV_INFORMATIONAL);
+ errlCommit(ib_err,IBSCOM_COMP_ID);
+ l_err = prd_err;
+ }
+ else
+ {
+ //my log is the only one
+ l_err = ib_err;
+ }
+
+ l_err->collectTrace(IBSCOM_COMP_NAME);
+
+ //Note-not cleaning up the error status here since
+ // we will not be using IBSCOM again
+
break;
}
else // bus isn't down, some other kind of error
@@ -597,23 +788,33 @@ errlHndl_t doIBScom(DeviceFW::OperationType i_opType,
*/
l_err = new ErrlEntry(ERRL_SEV_UNRECOVERABLE,
IBSCOM_DO_IBSCOM,
- IBSCOM_BUS_FAILURE,
+ IBSCOM_PIB_FAILURE,
TWO_UINT32_TO_UINT64(
get_huid(i_target),
i_addr),
- mbsiberr0_data);
+ mbsiberr0.data);
//Add this target to the FFDC
- ERRORLOG::ErrlUserDetailsTarget(i_target).addToLog(l_err);
-
- uint64_t pib_code =
- (mbsiberr0_data & PIB_ERROR_STATUS_MASK) >> PIB_ERROR_SHIFT;
+ ERRORLOG::ErrlUserDetailsTarget(i_target,"IBSCOM Target")
+ .addToLog(l_err);
//add callouts based on the PIB error
PIB::addFruCallouts( i_target,
- pib_code,
+ mbsiberr0.piberr,
l_err );
+ //grab some HW regs via FSISCOM
+ ERRORLOG::ErrlUserDetailsLogRegister ffdc(i_target);
+ ffdc.addData(DEVICE_FSISCOM_ADDRESS(MBS_FIR));
+ ffdc.addData(DEVICE_FSISCOM_ADDRESS(MBSIBERR0));
+ ffdc.addToLog(l_err);
+
+ l_err->collectTrace(IBSCOM_COMP_NAME);
+
+ //attempt to clear the error register so future accesses
+ //will work
+ err_cleanup(i_target,i_addr);
+
break;
}
}
@@ -659,4 +860,70 @@ errlHndl_t ibscomPerformOp(DeviceFW::OperationType i_opType,
return l_err;
}
+
+/**
+ * @brief Enable or disable Inband SCOMs on all capable chips
+ */
+void enableInbandScoms( bool i_disable )
+{
+ TARGETING::TargetHandleList membufChips;
+ TARGETING::getAllChips(membufChips, TYPE_MEMBUF, true);
+
+ mutex_t* l_mutex = NULL;
+
+ TARGETING::Target * sys = NULL;
+ TARGETING::targetService().getTopLevelTarget(sys);
+
+ uint8_t l_override =
+ sys->getAttr<TARGETING::ATTR_IBSCOM_ENABLE_OVERRIDE>();
+ TRACFCOMP(g_trac_ibscom,"IBSCOM_ENABLE_OVERRIDE=%d",l_override);
+
+ for(uint32_t i=0; i<membufChips.size(); i++)
+ {
+ TARGETING::Target* mb = membufChips[i];
+
+ // If the membuf chip supports IBSCOM AND..
+ // (Chip is >=DD20 OR IBSCOM Override is set)
+ if( (mb->getAttr<ATTR_PRIMARY_CAPABILITIES>().supportsInbandScom)
+ &&
+ ( (mb->getAttr<TARGETING::ATTR_EC>() >= 0x20) ||
+ (l_override != 0) )
+ )
+ {
+ //don't mess with attributes without the mutex (just to be safe)
+ l_mutex = mb->getHbMutexAttr<TARGETING::ATTR_IBSCOM_MUTEX>();
+ mutex_lock(l_mutex);
+
+ ScomSwitches l_switches = mb->getAttr<ATTR_SCOM_SWITCHES>();
+
+ uint8_t ib_new = 1;
+ uint8_t fsi_new = 0;
+ if( i_disable == IBSCOM_DISABLE )
+ {
+ ib_new = 0;
+ fsi_new = 1;
+ }
+
+ // If Inband Scom enablement changed
+ if ((l_switches.useInbandScom != ib_new) ||
+ (l_switches.useFsiScom != fsi_new))
+ {
+ l_switches.useFsiScom = fsi_new;
+ l_switches.useInbandScom = ib_new;
+
+ // Modify attribute
+ membufChips[i]->setAttr<ATTR_SCOM_SWITCHES>(l_switches);
+
+ TRACFCOMP(g_trac_ibscom,
+ "IBSCOM=%d on target HUID %.8X",
+ ib_new,
+ TARGETING::get_huid(mb));
+ }
+
+ mutex_unlock(l_mutex);
+ }
+ }
+}
+
+
} // end namespace
diff --git a/src/usr/ibscom/ibscom.H b/src/usr/ibscom/ibscom.H
index 3ba3c34f1..f1742b3e5 100644
--- a/src/usr/ibscom/ibscom.H
+++ b/src/usr/ibscom/ibscom.H
@@ -67,6 +67,24 @@ errlHndl_t ibscomPerformOp(DeviceFW::OperationType i_opType,
int64_t i_accessType,
va_list i_args);
+/**
+ * Bit definition for MBSIBERR0
+ */
+struct MBSIBERRO_Reg_t
+{
+ union
+ {
+ uint64_t data;
+ struct
+ {
+ uint64_t addr:32; //MBSIBERR0Q_IB_HOST_ADDRESS
+ uint64_t errvalid:1; //MBSIBERR0Q_IB_HOST_ERROR_VALID
+ uint64_t piberr:3; //MBSIBERR0Q_IB_HOST_ERROR_STATUS
+ uint64_t iswrite:1; //MBSIBERR0Q_IB_HOST_WRITE_NOT_READ
+ uint64_t reserved:27;
+ };
+ };
+};
};
diff --git a/src/usr/ibscom/test/ibscomtest.H b/src/usr/ibscom/test/ibscomtest.H
index 44e2d7e66..0e623c771 100644
--- a/src/usr/ibscom/test/ibscomtest.H
+++ b/src/usr/ibscom/test/ibscomtest.H
@@ -35,6 +35,7 @@
#include <devicefw/userif.H>
#include <ibscom/ibscomreasoncodes.H>
#include <devicefw/driverif.H>
+#include <sys/time.h>
extern trace_desc_t* g_trac_ibscom;
@@ -86,26 +87,36 @@ class IBscomTest: public CxxTest::TestSuite
return;
}
l_testTarget = *(centaur_list.begin());
+ TRACFCOMP(g_trac_ibscom,"test_IBscom> Using target %.8X", TARGETING::get_huid(l_testTarget));
+
+ ScomSwitches l_switches = l_testTarget->getAttr<ATTR_SCOM_SWITCHES>();
+ if( !l_switches.useInbandScom )
+ {
+ TRACFCOMP(g_trac_ibscom,"Skipping test_IBscom because ibscom is not enabled");
+ return;
+ }
TRACDCOMP(g_trac_ibscom,
- "IBscomTest::test_IBscom> Read orignal data from Centaur");
+ "IBscomTest::test_IBscom> Read original data from Centaur");
- const uint64_t addrs[] = {0x0201164F, 0x0301069A};
+ const uint64_t addrs[] = {0x02010803/*0:26*/, 0x03010403/*0:21*/};
uint64_t orig_data[2] = {0};
- uint64_t new_data[] = {0x1234567ABABABA00, 0xFEEDB0B0FEDCBA00};
+ uint64_t new_data1[] = {0x123456E000000000, 0xFEEDB00000000000};
uint64_t read_data[2] = {0};
uint64_t read_data_fsi[2] = {0};
size_t op_size = sizeof(uint64_t);
+ const uint64_t junk = 0x1122334455667788;
- //Save of initial register content
+ //Save off initial register content via FSI
for(uint32_t i=0; i<2; i++)
{
op_size = sizeof(uint64_t);
- l_err = deviceRead( l_testTarget,
- &orig_data[i],
- op_size,
- DEVICE_SCOM_ADDRESS(addrs[i]) );
+ l_err = deviceOp( DeviceFW::READ,
+ l_testTarget,
+ &orig_data[i],
+ op_size,
+ DEVICE_FSISCOM_ADDRESS(addrs[i]) );
if( l_err )
{
TRACFCOMP(g_trac_ibscom,"IBscomTest::test_IBscom> Orig Read: Error from device : addr=0x%X, RC=%X",
@@ -116,17 +127,54 @@ class IBscomTest: public CxxTest::TestSuite
}
total++;
+ // OR in the original data so we don't clear mask bits
+ new_data1[i] |= orig_data[i];
+ }
+
+ //Read the data with IBSCOM
+ for(uint32_t i=0; i<2; i++)
+ {
+ TRACDCOMP(g_trac_ibscom,
+ "IBscomTest::test_IBscom> Read data");
+
+ //reset size
+ op_size = sizeof(uint64_t);
+ read_data[i] = junk;
+ l_err = deviceOp( DeviceFW::READ,
+ l_testTarget,
+ &read_data[i],
+ op_size,
+ DEVICE_IBSCOM_ADDRESS(addrs[i]) );
+ if( l_err )
+ {
+ TRACFCOMP(g_trac_ibscom,"IBscomTest::test_IBscom> Read: Error from device : addr=0x%X, RC=%X",
+ addrs[i], l_err->reasonCode() );
+ TS_FAIL( "test_IBscom> ERROR : Error log from IBSCOM read" );
+ fails++;
+ errlCommit(l_err,IBSCOM_COMP_ID);
+ }
+
+ if(orig_data[i] != read_data[i])
+ {
+ TS_FAIL( "test_IBscom> ERROR : Data miss-match on IBSCOM read check." );
+
+ TRACFCOMP(g_trac_ibscom,"IBscomTest::test_IBscom>ERROR: addr=0x%.8x: Write data=0x%.16X, IBSCOM read data=0x%.16X",
+ addrs[i], new_data1[i], read_data[i]);
+ fails++;
+ }
+
+ total++;
}
- //Write in some new data
+ //Write in some new data with IBSCOM
for(uint32_t i=0; i<2; i++)
{
- TRACDCOMP(g_trac_ibscom,"IBscomTest::test_IBscom> Write new pattern: 0x%.16X", new_data[i]);
+ TRACDCOMP(g_trac_ibscom,"IBscomTest::test_IBscom> Write new pattern: 0x%.16X", new_data1[i]);
op_size = sizeof(uint64_t);
l_err = deviceOp( DeviceFW::WRITE,
l_testTarget,
- &new_data[i],
+ &new_data1[i],
op_size,
DEVICE_IBSCOM_ADDRESS(addrs[i]) );
if( l_err )
@@ -137,11 +185,12 @@ class IBscomTest: public CxxTest::TestSuite
fails++;
errlCommit(l_err,IBSCOM_COMP_ID);
}
+ nanosleep( 0, 1000000 ); //sleep for 1ms
total++;
}
- //Read the data back with IBSCOM
+ //Read the data back with FSISCOM
for(uint32_t i=0; i<2; i++)
{
TRACDCOMP(g_trac_ibscom,
@@ -151,31 +200,56 @@ class IBscomTest: public CxxTest::TestSuite
op_size = sizeof(uint64_t);
l_err = deviceOp( DeviceFW::READ,
l_testTarget,
- &read_data[i],
+ &read_data_fsi[i],
op_size,
- DEVICE_IBSCOM_ADDRESS(addrs[i]) );
+ DEVICE_FSISCOM_ADDRESS(addrs[i]) );
if( l_err )
{
- TRACFCOMP(g_trac_ibscom,"IBscomTest::test_IBscom> Read: Error from device : addr=0x%X, RC=%X",
+ TRACFCOMP(g_trac_ibscom, "IBscomTest::test_IBscom> Read: Error from device : addr=0x%X, RC=%X",
addrs[i], l_err->reasonCode() );
- TS_FAIL( "test_IBscom> ERROR : Error log from IBSCOM read" );
+ TS_FAIL( "test_IBscom> ERROR : Error log from FSI Read" );
fails++;
errlCommit(l_err,IBSCOM_COMP_ID);
}
- if(new_data[i] != read_data[i])
+ if(new_data1[i] != read_data_fsi[i])
{
- TS_FAIL( "test_IBscom> ERROR : Data miss-match on IBSCOM read-back check." );
+ TS_FAIL( "test_IBscom> ERROR : Data miss-match on FSI read-back check." );
- TRACFCOMP(g_trac_ibscom,"IBscomTest::test_IBscom>ERROR: addr=0x%.8x: Write data=0x%.16X, IBSCOM read data=0x%.16X",
- addrs[i], new_data[i], read_data[i]);
+ TRACFCOMP(g_trac_ibscom, "IBscomTest::test_IBscom>ERROR: addr=0x%.8x: Write data=0x%.16X, FSI read data=0x%.16X",
+ addrs[i], new_data1[i], read_data_fsi[i]);
+ fails++;
+ }
+ total++;
+ }
+
+ //Write in some new data with FSISCOM
+ uint64_t new_data2[] = {0xA5A5A50000000000/*0:26*/,
+ 0x1122000000000000/*0:21*/};
+ for(uint32_t i=0; i<2; i++)
+ {
+ TRACDCOMP(g_trac_ibscom,"IBscomTest::test_IBscom> Write new pattern: 0x%.16X", new_data2[i]);
+
+ op_size = sizeof(uint64_t);
+ l_err = deviceOp( DeviceFW::WRITE,
+ l_testTarget,
+ &new_data2[i],
+ op_size,
+ DEVICE_FSISCOM_ADDRESS(addrs[i]) );
+ if( l_err )
+ {
+ TRACFCOMP(g_trac_ibscom,"IBscomTest::test_IBscom> Write: Error from device : addr=0x%X, RC=%X",
+ addrs[i], l_err->reasonCode() );
+ TS_FAIL( "ScomTest::test_IBscom> ERROR : Error log from FSI Write" );
fails++;
+ errlCommit(l_err,IBSCOM_COMP_ID);
}
total++;
}
- //Read the data back with FSISCOM
+
+ //Read the data back with IBSCOM
for(uint32_t i=0; i<2; i++)
{
TRACDCOMP(g_trac_ibscom,
@@ -185,26 +259,27 @@ class IBscomTest: public CxxTest::TestSuite
op_size = sizeof(uint64_t);
l_err = deviceOp( DeviceFW::READ,
l_testTarget,
- &read_data_fsi[i],
+ &read_data[i],
op_size,
- DEVICE_FSISCOM_ADDRESS(addrs[i]) );
+ DEVICE_IBSCOM_ADDRESS(addrs[i]) );
if( l_err )
{
- TRACFCOMP(g_trac_ibscom, "IBscomTest::test_IBscom> Read: Error from device : addr=0x%X, RC=%X",
+ TRACFCOMP(g_trac_ibscom,"IBscomTest::test_IBscom> Read: Error from device : addr=0x%X, RC=%X",
addrs[i], l_err->reasonCode() );
- TS_FAIL( "test_IBscom> ERROR : Error log from FSI Read" );
+ TS_FAIL( "test_IBscom> ERROR : Error log from IBSCOM read" );
fails++;
errlCommit(l_err,IBSCOM_COMP_ID);
}
- if(new_data[i] != read_data_fsi[i])
+ if(new_data2[i] != read_data[i])
{
- TS_FAIL( "test_IBscom> ERROR : Data miss-match on FSI read-back check." );
+ TS_FAIL( "test_IBscom> ERROR : Data miss-match on IBSCOM read-back check." );
- TRACFCOMP(g_trac_ibscom, "IBscomTest::test_IBscom>ERROR: addr=0x%.8x: Write data=0x%.16X, FSI read data=0x%.16X",
- addrs[i], new_data[i], read_data_fsi[i]);
+ TRACFCOMP(g_trac_ibscom,"IBscomTest::test_IBscom>ERROR: addr=0x%.8x: Write data=0x%.16X, IBSCOM read data=0x%.16X",
+ addrs[i], new_data1[i], read_data[i]);
fails++;
}
+
total++;
}
@@ -237,7 +312,146 @@ class IBscomTest: public CxxTest::TestSuite
//TODO RTC: 72594: Add error path test cases when simics support
//is available
+ void test_IBscom_error(void)
+ {
+ TRACFCOMP(g_trac_ibscom,"Skipping test_IBscom_error because Simics is broken");
+ return;
+
+ uint64_t fails = 0;
+ uint64_t total = 0;
+ errlHndl_t l_err = NULL;
+
+ TARGETING::Target* l_testTarget = NULL;
+
+ // Target: Find a Centaur on the Master processor
+ TARGETING::Target* l_procTarget = NULL;
+ TARGETING::targetService().masterProcChipTargetHandle(l_procTarget);
+ assert(l_procTarget != NULL);
+
+ TARGETING::PredicateCTM l_cent(TARGETING::CLASS_CHIP,
+ TARGETING::TYPE_MEMBUF,
+ TARGETING::MODEL_NA);
+ TARGETING::PredicatePostfixExpr cent_query;
+ cent_query.push(&l_cent);
+
+
+ TARGETING::TargetHandleList centaur_list;
+ TARGETING::targetService().
+ getAssociated(centaur_list,
+ l_procTarget,
+ TARGETING::TargetService::CHILD_BY_AFFINITY,
+ TARGETING::TargetService::ALL,
+ &cent_query);
+
+ if( centaur_list.size() < 1 )
+ {
+ TS_FAIL( "test_IBscom_error> ERROR : Unable to find a Centaur chip" );
+ return;
+ }
+ l_testTarget = *(centaur_list.begin());
+
+ ScomSwitches l_switches = l_testTarget->getAttr<ATTR_SCOM_SWITCHES>();
+ if( !l_switches.useInbandScom )
+ {
+ TRACFCOMP(g_trac_ibscom,"Skipping test_IBscom_error because ibscom is not enabled");
+ return;
+ }
+
+
+ TRACDCOMP(g_trac_ibscom, "IBscomTest::test_IBscom_error> Read orignal data from Centaur");
+
+
+ uint64_t new_data = 0x1234567ABABABA00;
+ uint64_t read_data = 0;
+ size_t op_size = sizeof(uint64_t);
+
+ //Write a bad address
+ uint64_t bad_addr = 0x02123456;
+ TRACFCOMP(g_trac_ibscom,"IBscomTest::test_IBscom_error> Write bad address 0x%.16X", bad_addr);
+ op_size = sizeof(uint64_t);
+ l_err = deviceOp( DeviceFW::WRITE,
+ l_testTarget,
+ &new_data,
+ op_size,
+ DEVICE_IBSCOM_ADDRESS(bad_addr) );
+ if( !l_err )
+ {
+ TRACFCOMP(g_trac_ibscom,"IBscomTest::test_IBscom_error> No error on bad address write" );
+ TS_FAIL( "ScomTest::test_IBscom_error> No error on bad address write" );
+ fails++;
+ }
+ else
+ {
+ delete l_err;
+ }
+ total++;
+ nanosleep( 0, 1000000 ); //sleep for 1ms
+
+ //Verify ibscom is still enabled, i.e. we didn't think it was a bus fail
+ l_switches = l_testTarget->getAttr<ATTR_SCOM_SWITCHES>();
+ total++;
+ if( !l_switches.useInbandScom )
+ {
+ TS_FAIL( "ScomTest::test_IBscom_error> IBSCOM was wrongly disabled after bad address write" );
+ fails++;
+ }
+
+ //Read a bad address
+ bad_addr = 0x02876543;
+ TRACFCOMP(g_trac_ibscom,"IBscomTest::test_IBscom_error> Write bad address 0x%.16X", bad_addr);
+ op_size = sizeof(uint64_t);
+ l_err = deviceOp( DeviceFW::READ,
+ l_testTarget,
+ &read_data,
+ op_size,
+ DEVICE_IBSCOM_ADDRESS(bad_addr) );
+ if( !l_err )
+ {
+ TRACFCOMP(g_trac_ibscom,"IBscomTest::test_IBscom_error> No error on bad address read" );
+ TS_FAIL( "ScomTest::test_IBscom_error> No error on bad address read" );
+ fails++;
+ }
+ else
+ {
+ delete l_err;
+ }
+ total++;
+
+ //Verify ibscom is still enabled, i.e. we didn't think it was a bus fail
+ l_switches = l_testTarget->getAttr<ATTR_SCOM_SWITCHES>();
+ total++;
+ if( !l_switches.useInbandScom )
+ {
+ TS_FAIL( "ScomTest::test_IBscom_error> IBSCOM was wrongly disabled after bad address read" );
+ fails++;
+ }
+ //Read a good address to prove things still work
+ TRACFCOMP(g_trac_ibscom,"IBscomTest::test_IBscom_error> Read good address 0x%.16X", 0x02010803);
+ op_size = sizeof(uint64_t);
+ l_err = deviceOp( DeviceFW::READ,
+ l_testTarget,
+ &read_data,
+ op_size,
+ DEVICE_IBSCOM_ADDRESS(0x02010803) );
+ if( l_err )
+ {
+ TRACFCOMP(g_trac_ibscom,"IBscomTest::test_IBscom_error> Error on read after fail" );
+ TS_FAIL( "ScomTest::test_IBscom_error> Error on read after fail" );
+ errlCommit(l_err,IBSCOM_COMP_ID);
+ fails++;
+ }
+ total++;
+
+ TS_TRACE("test_IBscom_error runs successfully!");
+ TRACFCOMP(g_trac_ibscom,
+ "IBscomTest::test_IBscom_error> %d/%d fails",
+ fails, total );
+
+ //TS_FAIL("FORCING ERROR TO STOP IPL");
+
+ return;
+ }
};
#endif
diff --git a/src/usr/scom/scom.C b/src/usr/scom/scom.C
index 7b5ecaf27..b15f72406 100644
--- a/src/usr/scom/scom.C
+++ b/src/usr/scom/scom.C
@@ -5,7 +5,7 @@
/* */
/* IBM CONFIDENTIAL */
/* */
-/* COPYRIGHT International Business Machines Corp. 2011,2012 */
+/* COPYRIGHT International Business Machines Corp. 2011,2013 */
/* */
/* p1 */
/* */
@@ -36,12 +36,13 @@
#include <errl/errlmanager.H>
#include "scom.H"
#include <scom/scomreasoncodes.H>
+#include <ibscom/ibscomreasoncodes.H>
#include <sys/time.h>
// Trace definition
trace_desc_t* g_trac_scom = NULL;
-TRAC_INIT(&g_trac_scom, "SCOM", KILOBYTE, TRACE::BUFFER_SLOW); //1K
+TRAC_INIT(&g_trac_scom, SCOM_COMP_NAME, KILOBYTE, TRACE::BUFFER_SLOW); //1K
namespace SCOM
@@ -102,7 +103,6 @@ errlHndl_t checkIndirectAndDoScom(DeviceFW::OperationType i_opType,
bool l_indScomError = false;
uint64_t temp_io_buffer = 0;
- //@todo - determine hwhat an appropriate timeout value
enum { MAX_INDSCOM_TIMEOUT_NS = 100000 }; //=.1ms
// If the indirect scom bit is 0, then doing a regular scom
@@ -213,8 +213,7 @@ errlHndl_t checkIndirectAndDoScom(DeviceFW::OperationType i_opType,
break;
}
- //TODO tmp remove for VPO, need better polling strategy -- RTC43738
- //nanosleep( 0, 10000 ); //sleep for 10,000 ns
+ nanosleep( 0, 10000 ); //sleep for 10,000 ns
elapsed_indScom_time_ns += 10000;
}while ( elapsed_indScom_time_ns <= MAX_INDSCOM_TIMEOUT_NS);
@@ -336,8 +335,7 @@ errlHndl_t checkIndirectAndDoScom(DeviceFW::OperationType i_opType,
}
- //TODO tmp remove for VPO, need better polling strategy -- RTC43738
- //nanosleep( 0, 10000 ); //sleep for 10,000 ns
+ nanosleep( 0, 10000 ); //sleep for 10,000 ns
elapsed_indScom_time_ns += 10000;
}while ( elapsed_indScom_time_ns <= MAX_INDSCOM_TIMEOUT_NS);
@@ -464,6 +462,19 @@ errlHndl_t doScomOp(DeviceFW::OperationType i_opType,
}while(0);
+ //Look for special retry codes
+ if( l_err
+ && (0xFFFFFFFF != i_accessType)
+ && (l_err->reasonCode() == IBSCOM::IBSCOM_RETRY_DUE_TO_ERROR) )
+ {
+ delete l_err;
+ TRACFCOMP(g_trac_scom, "Forcing retry of Scom to %.16X on %.8X", i_addr, TARGETING::get_huid(i_target));
+ // use the unused i_accessType parameter to avoid an infinite recursion
+ int64_t accessType_flag = 0xFFFFFFFF;
+ l_err = doScomOp( i_opType, i_target, io_buffer,
+ io_buflen, accessType_flag, i_addr );
+ }
+
return l_err;
}
diff --git a/src/usr/testcore/rtloader/loader.H b/src/usr/testcore/rtloader/loader.H
index ea36f1126..03e8e5f39 100644
--- a/src/usr/testcore/rtloader/loader.H
+++ b/src/usr/testcore/rtloader/loader.H
@@ -32,6 +32,7 @@
#include <errl/errlmanager.H>
#include <util/utillidmgr.H>
#include <map>
+#include <sys/time.h>
#include <runtime/interface.h>
#include <vpd/vpd_if.H>
@@ -105,6 +106,7 @@ class RuntimeLoaderTest : public CxxTest::TestSuite
intf->malloc = malloc;
intf->free = free;
intf->realloc = realloc;
+ intf->sleep = nanosleep;
intf->assert = rt_assert;
intf->sendErrorLog = rt_logErr;
intf->scom_read = rt_scom_read;
diff --git a/src/usr/xscom/piberror.C b/src/usr/xscom/piberror.C
index 366fd2579..0ffe42a2b 100644
--- a/src/usr/xscom/piberror.C
+++ b/src/usr/xscom/piberror.C
@@ -53,6 +53,15 @@ void addFruCallouts(TARGETING::Target* i_target,
switch (i_pibErrStatus)
{
case PIB::PIB_CHIPLET_OFFLINE:
+ //Offline should just be a code bug, but it seems that there are
+ // cases where bad hardware can also cause this problem
+ io_errl->addProcedureCallout(HWAS::EPUB_PRC_HB_CODE,
+ HWAS::SRCI_PRIORITY_HIGH);
+ io_errl->addHwCallout( i_target,
+ HWAS::SRCI_PRIORITY_LOW,
+ HWAS::NO_DECONFIG,
+ HWAS::GARD_NULL );
+ break;
case PIB::PIB_PARTIAL_GOOD:
case PIB::PIB_INVALID_ADDRESS:
io_errl->addProcedureCallout(HWAS::EPUB_PRC_HB_CODE,
OpenPOWER on IntegriCloud