summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDan Crowell <dcrowell@us.ibm.com>2014-02-28 13:14:36 -0600
committerA. Patrick Williams III <iawillia@us.ibm.com>2014-03-03 12:14:05 -0600
commitf3c513e40abb822d78c0a83d7bf874d30eb733a1 (patch)
treea2e316a63c5f04b9a84bdd4ad7a72a62faf4a3c0
parent4c1eb65cfcec7141d464ba12d4d39dae638c4ef9 (diff)
downloadtalos-hostboot-f3c513e40abb822d78c0a83d7bf874d30eb733a1.tar.gz
talos-hostboot-f3c513e40abb822d78c0a83d7bf874d30eb733a1.zip
Improve FSI PIB2OPB Error Recovery
Modified which error bits are checked in the PIB2OPB status as well as changing a few error reset functions. Change-Id: I27676947983f0b66c940d68bbd5f134912749ad9 CQ: SW248395 Reviewed-on: http://gfw160.aus.stglabs.ibm.com:8080/gerrit/9238 Tested-by: Jenkins Server Reviewed-by: Michael Baiocchi <baiocchi@us.ibm.com> Reviewed-by: A. Patrick Williams III <iawillia@us.ibm.com>
-rwxr-xr-xsrc/build/tools/listdeps.pl3
-rw-r--r--src/include/usr/fsi/fsiif.H14
-rw-r--r--src/include/usr/hwas/hwasplatreasoncodes.H2
-rw-r--r--src/usr/fsi/fsidd.C283
-rw-r--r--src/usr/fsi/fsidd.H22
-rw-r--r--src/usr/fsiscom/fsiscom.C46
-rw-r--r--src/usr/hwas/hwasPlat.C47
-rw-r--r--src/usr/hwpf/hwp/activate_powerbus/activate_powerbus.C15
-rw-r--r--src/usr/pore/poreve/porevesrc/pib2cfam.C6
9 files changed, 339 insertions, 99 deletions
diff --git a/src/build/tools/listdeps.pl b/src/build/tools/listdeps.pl
index 124c9e2a4..35bacd5b8 100755
--- a/src/build/tools/listdeps.pl
+++ b/src/build/tools/listdeps.pl
@@ -202,6 +202,9 @@ my %resident_modules = (
"libi2c.so" => '1',
"libutil.so" => '1',
"libibscom.so" => '1',
+ "libfsiscom.so" => '1',
+ "libfsi.so" => '1',
+ "libscan.so" => '1',
);
diff --git a/src/include/usr/fsi/fsiif.H b/src/include/usr/fsi/fsiif.H
index 8d1b5f15b..4aa67f4fb 100644
--- a/src/include/usr/fsi/fsiif.H
+++ b/src/include/usr/fsi/fsiif.H
@@ -5,7 +5,7 @@
/* */
/* IBM CONFIDENTIAL */
/* */
-/* COPYRIGHT International Business Machines Corp. 2011,2013 */
+/* COPYRIGHT International Business Machines Corp. 2011,2014 */
/* */
/* p1 */
/* */
@@ -71,6 +71,7 @@ enum fsiFFDCType_t
FFDC_READWRITE_FAIL = 1,
FFDC_PIB_FAIL = 2,
FFDC_OPB_FAIL = 3,
+ FFDC_OPB_FAIL_SLAVE = 4,
};
/**
@@ -86,7 +87,16 @@ enum fsiFFDCType_t
*/
void getFsiFFDC( fsiFFDCType_t i_ffdc_type,
errlHndl_t &io_log,
- TARGETING::Target* i_target);
+ TARGETING::Target* i_target );
+
+/**
+ * @brief Cleanup the FSI PIB2OPB logic on the procs
+ *
+ * @param[in] i_target Proc Chip Target to reset
+ *
+ * @return errlHndl_t NULL on success
+ */
+errlHndl_t resetPib2Opb( TARGETING::Target* i_target );
/**
diff --git a/src/include/usr/hwas/hwasplatreasoncodes.H b/src/include/usr/hwas/hwasplatreasoncodes.H
index fc14e2a0d..48c8c41f2 100644
--- a/src/include/usr/hwas/hwasplatreasoncodes.H
+++ b/src/include/usr/hwas/hwasplatreasoncodes.H
@@ -35,6 +35,7 @@ namespace HWAS
MOD_HOST_DISCOVER_TARGETS = 0x80,
MOD_HOST_GARD = 0x81,
MOD_PLAT_DECONFIG_GARD = 0x82,
+ MOD_PLAT_READIDEC = 0x83,
};
enum HwasPlatReasonCode
@@ -44,6 +45,7 @@ namespace HWAS
RC_TOP_LEVEL_TARGET_NULL = HWAS_COMP_ID | 0x80,
RC_TARGET_NOT_GARDABLE = HWAS_COMP_ID | 0x81,
RC_GARD_REPOSITORY_FULL = HWAS_COMP_ID | 0x82,
+ RC_BAD_CHIPID = HWAS_COMP_ID | 0x83,
};
};
diff --git a/src/usr/fsi/fsidd.C b/src/usr/fsi/fsidd.C
index c45b4fbf9..a12ca306e 100644
--- a/src/usr/fsi/fsidd.C
+++ b/src/usr/fsi/fsidd.C
@@ -296,6 +296,14 @@ void getFsiFFDC(FSI::fsiFFDCType_t i_ffdc_type, errlHndl_t &i_log,
}
}
+/**
+ * @brief Cleanup the FSI PIB2OPB logic on the procs
+ */
+errlHndl_t resetPib2Opb( TARGETING::Target* i_target )
+{
+ return Singleton<FsiDD>::instance().resetPib2Opb( i_target );
+}
+
}; //end FSI namespace
@@ -384,12 +392,6 @@ errlHndl_t FsiDD::write(TARGETING::Target* i_target,
return l_err;
}
-
-
-/********************
- Internal Methods
- ********************/
-
/**
* @brief Initialize the FSI hardware
*/
@@ -492,6 +494,10 @@ errlHndl_t FsiDD::initializeHardware()
++t_itr;
}
+ // Cleanup any initial error states
+ l_err = resetPib2Opb( iv_master );
+ if( l_err ) { break; }
+
// setup the local master control regs for the MFSI
l_err = initMasterControl(iv_master,TARGETING::FSI_MASTER_TYPE_MFSI);
if( l_err )
@@ -645,26 +651,34 @@ void FsiDD::getFsiFFDC(FSI::fsiFFDCType_t i_ffdc_type,
{
errlHndl_t tmp_err = NULL;
+ // Use this call to find the OPB Master to read
+ FsiAddrInfo_t addr_info( i_target, 0x12345678 );
+ tmp_err = genFullFsiAddr( addr_info );
+ if( tmp_err )
+ {
+ delete tmp_err;
+ return;
+ }
+
// Figure out which control regs to use for FFDC regs
- FsiChipInfo_t fsi_info = getFsiInfo( i_target );
- uint64_t ctl_reg = getControlReg(fsi_info.type);
+ uint64_t ctl_reg = getControlReg(addr_info.accessInfo.type);
// Add data to error log where possible
uint32_t data = 0;
- ERRORLOG::ErrlUserDetailsLogRegister l_eud_fsiT(i_target);
+ ERRORLOG::ErrlUserDetailsLogRegister l_eud_fsiT(addr_info.opbTarg);
uint64_t dump_regs[] = {
+ ctl_reg|FSI_MATRB0_1D8,
+ ctl_reg|FSI_MDTRB0_1DC,
ctl_reg|FSI_MESRB0_1D0,
ctl_reg|FSI_MAESP0_050,
ctl_reg|FSI_MAEB_070,
ctl_reg|FSI_MSCSB0_1D4,
- ctl_reg|FSI_MATRB0_1D8,
- ctl_reg|FSI_MDTRB0_1DC
};
for( size_t x=0; x<(sizeof(dump_regs)/sizeof(dump_regs[0])); x++ )
{
- tmp_err = read( dump_regs[x], &data );
+ tmp_err = read( addr_info.opbTarg, dump_regs[x], &data );
if( tmp_err )
{
delete tmp_err;
@@ -681,7 +695,7 @@ void FsiDD::getFsiFFDC(FSI::fsiFFDCType_t i_ffdc_type,
for( size_t p = 0; p < 8; p++ )
{
uint32_t addr1 = ctl_reg|(FSI_MSTAP0_0D0+p*0x4);
- tmp_err = read( addr1, &data );
+ tmp_err = read( addr_info.opbTarg, addr1, &data );
if( tmp_err )
{
delete tmp_err;
@@ -701,6 +715,8 @@ void FsiDD::getFsiFFDC(FSI::fsiFFDCType_t i_ffdc_type,
else if( FSI::FFDC_PIB_FAIL == i_ffdc_type )
{
errlHndl_t tmp_err = NULL;
+ FsiChipInfo_t fsi_info = getFsiInfo( i_target );
+
ERRORLOG::ErrlUserDetailsLogRegister regdata(iv_master);
regdata.addData(DEVICE_XSCOM_ADDRESS(0x00020001ull));
regdata.addToLog(io_log);
@@ -718,7 +734,7 @@ void FsiDD::getFsiFFDC(FSI::fsiFFDCType_t i_ffdc_type,
uint32_t databuf = 32;
for( size_t x=0; x<(sizeof(dump_regs)/sizeof(dump_regs[0])); x++ )
{
- tmp_err = read( i_target, dump_regs[x], &databuf );
+ tmp_err = read( fsi_info.master, dump_regs[x], &databuf );
if( tmp_err )
{
delete tmp_err;
@@ -744,6 +760,10 @@ void FsiDD::getFsiFFDC(FSI::fsiFFDCType_t i_ffdc_type,
{
// Read some error regs from scom
ERRORLOG::ErrlUserDetailsLogRegister l_scom_data(i_target);
+ // What I thought I wrote last...
+ l_scom_data.addDataBuffer(&iv_lastOpbCmd,
+ sizeof(iv_lastOpbCmd),
+ DEVICE_XSCOM_ADDRESS(0xFF00000000020000ull));
// OPB Regs
l_scom_data.addData(DEVICE_XSCOM_ADDRESS(0x00020000ull));
l_scom_data.addData(DEVICE_XSCOM_ADDRESS(0x00020001ull));
@@ -756,14 +776,81 @@ void FsiDD::getFsiFFDC(FSI::fsiFFDCType_t i_ffdc_type,
l_scom_data.addData(DEVICE_XSCOM_ADDRESS(0x0002000Aull));
// Other suggestions from Markus Cebulla
l_scom_data.addData(DEVICE_XSCOM_ADDRESS(0x0005001Cull));//SBE_VITAL
- l_scom_data.addData(DEVICE_XSCOM_ADDRESS(0x0005001Cull));//SBE_VITAL
+ l_scom_data.addData(DEVICE_XSCOM_ADDRESS(0x00010005ull));//Secure reg
l_scom_data.addToLog(io_log);
}
+ else if( FSI::FFDC_OPB_FAIL_SLAVE == i_ffdc_type )
+ {
+ errlHndl_t tmp_err = NULL;
+ // Find the OPB Master and then collect FFDC_OPB_FAIL
+ FsiAddrInfo_t addr_info( i_target, 0x12345678 );
+ tmp_err = genFullFsiAddr( addr_info );
+ if( tmp_err )
+ {
+ delete tmp_err;
+ }
+ else
+ {
+ getFsiFFDC( FSI::FFDC_OPB_FAIL,
+ io_log,
+ addr_info.opbTarg );
+ }
+ }
return;
}
+/**
+ * @brief Cleanup the FSI PIB2OPB logic on the procs
+ *
+ * @param[in] i_target Proc Chip Target to reset
+ *
+ * @return errlHndl_t NULL on success
+ */
+errlHndl_t FsiDD::resetPib2Opb( TARGETING::Target* i_target )
+{
+ errlHndl_t errhdl = NULL;
+ TRACFCOMP(g_trac_fsi, "FsiDD::resetPib2Opb(%.8X)>", TARGETING::get_huid(i_target) );
+
+ do {
+ // Clear out OPB error
+ uint64_t scom_data = 0;
+ size_t scom_size = sizeof(scom_data);
+
+ uint64_t opbaddr = FSI2OPB_OFFSET_0 | OPB_REG_RES;
+ scom_data = 0x8000000000000000; //0=Unit Reset
+ errhdl = deviceOp( DeviceFW::WRITE,
+ i_target,
+ &scom_data,
+ scom_size,
+ DEVICE_XSCOM_ADDRESS(opbaddr) );
+ if( errhdl ) { break; }
+
+ opbaddr = FSI2OPB_OFFSET_0 | OPB_REG_STAT;
+ errhdl = deviceOp( DeviceFW::WRITE,
+ i_target,
+ &scom_data,
+ scom_size,
+ DEVICE_XSCOM_ADDRESS(opbaddr) );
+ if( errhdl ) { break; }
+
+ // Check if we have any errors left
+ opbaddr = FSI2OPB_OFFSET_0 | OPB_REG_STAT;
+ scom_data = 0;
+ errhdl = deviceOp( DeviceFW::READ,
+ i_target,
+ &scom_data,
+ scom_size,
+ DEVICE_XSCOM_ADDRESS(opbaddr) );
+ if( errhdl ) { break; }
+ TRACFCOMP( g_trac_fsi, "PIB2OPB Status (%.8X->%.8X) after cleanup = %.16X", TARGETING::get_huid(i_target), opbaddr, scom_data );
+ } while(0);
+
+ return errhdl;
+}
+
+
/********************
Internal Methods
********************/
@@ -775,6 +862,7 @@ FsiDD::FsiDD()
:iv_master(NULL)
,iv_ffdcTask(0)
,iv_opbErrorMask(OPB_STAT_ERR_ANY)
+,iv_lastOpbCmd(0)
{
TRACFCOMP(g_trac_fsi, "FsiDD::FsiDD()>");
@@ -852,6 +940,7 @@ errlHndl_t FsiDD::read(FsiAddrInfo_t& i_addrInfo,
errlHndl_t l_err = NULL;
bool need_unlock = false;
mutex_t* l_mutex = NULL;
+ *o_buffer = 0xDEADBEEF;
do {
// setup the OPB command register
@@ -872,11 +961,20 @@ errlHndl_t FsiDD::read(FsiAddrInfo_t& i_addrInfo,
need_unlock = true;
}
+ // make sure there are no other ops running before we start
+ l_err = pollForComplete( i_addrInfo, NULL );
+ if( l_err )
+ {
+ TRACFCOMP(g_trac_fsi, "FsiDD::read> FSI Errors before doing read operation : %.8X->%.8X", TARGETING::get_huid(i_addrInfo.fsiTarg), i_addrInfo.relAddr );
+ break;
+ }
+
// always read/write 64 bits to SCOM
size_t scom_size = sizeof(uint64_t);
// write the OPB command register to trigger the read
- TRACUCOMP(g_trac_fsi, "FsiDD::read> ScomWRITE : opbaddr=%.16llX, data=%.16llX", opbaddr, fsicmd );
+ iv_lastOpbCmd = fsicmd;
+ TRACUCOMP(g_trac_fsi, "FsiDD::read> ScomWRITE to %.8X: opbaddr=%.16llX, data=%.16llX", TARGETING::get_huid(i_addrInfo.opbTarg), opbaddr, fsicmd );
l_err = deviceOp( DeviceFW::WRITE,
i_addrInfo.opbTarg,
&fsicmd,
@@ -892,6 +990,7 @@ errlHndl_t FsiDD::read(FsiAddrInfo_t& i_addrInfo,
l_err = pollForComplete( i_addrInfo, o_buffer );
if( l_err )
{
+ TRACFCOMP(g_trac_fsi, "FsiDD::read> FSI Errors after doing read operation : %.8X->%.8X", TARGETING::get_huid(i_addrInfo.fsiTarg), i_addrInfo.relAddr );
break;
}
@@ -903,10 +1002,10 @@ errlHndl_t FsiDD::read(FsiAddrInfo_t& i_addrInfo,
}
// atomic section <<
-
- TRACRCOMP(g_trac_fsir, "FSI READ : %.6X = %.8X", i_addrInfo.absAddr, *o_buffer );
} while(0);
+ TRACRCOMP(g_trac_fsir, "FSI READ : %.8X->%.6X = %.8X", TARGETING::get_huid(i_addrInfo.opbTarg), i_addrInfo.absAddr, *o_buffer );
+
if( need_unlock )
{
mutex_unlock(l_mutex);
@@ -928,7 +1027,7 @@ errlHndl_t FsiDD::write(FsiAddrInfo_t& i_addrInfo,
mutex_t* l_mutex = NULL;
do {
- TRACRCOMP(g_trac_fsir, "FSI WRITE : %.6X = %.8X", i_addrInfo.absAddr, *i_buffer );
+ TRACRCOMP(g_trac_fsir, "FSI WRITE : %.8X->%.6X = %.8X", TARGETING::get_huid(i_addrInfo.opbTarg), i_addrInfo.absAddr, *i_buffer );
// pull out the data to write (length has been verified)
uint32_t fsidata = *i_buffer;
@@ -943,8 +1042,8 @@ errlHndl_t FsiDD::write(FsiAddrInfo_t& i_addrInfo,
uint64_t opbaddr = genOpbScomAddr(i_addrInfo,OPB_REG_CMD);
// atomic section >>
- l_mutex
- = (i_addrInfo.opbTarg)->getHbMutexAttr<TARGETING::ATTR_FSI_MASTER_MUTEX>();
+ l_mutex = (i_addrInfo.opbTarg)->
+ getHbMutexAttr<TARGETING::ATTR_FSI_MASTER_MUTEX>();
if( (iv_ffdcTask == 0) // performance hack for typical case
|| (iv_ffdcTask != task_gettid()) )
@@ -953,8 +1052,17 @@ errlHndl_t FsiDD::write(FsiAddrInfo_t& i_addrInfo,
need_unlock = true;
}
+ // make sure there are no other ops running before we start
+ l_err = pollForComplete( i_addrInfo, NULL );
+ if( l_err )
+ {
+ TRACFCOMP(g_trac_fsi, "FsiDD::write> FSI Errors before doing write operation : %.8X->%.8X", TARGETING::get_huid(i_addrInfo.fsiTarg), i_addrInfo.relAddr );
+ break;
+ }
+
// write the OPB command register
- TRACUCOMP(g_trac_fsi, "FsiDD::write> ScomWRITE : opbaddr=%.16llX, data=%.16llX", opbaddr, fsicmd );
+ iv_lastOpbCmd = fsicmd;
+ TRACUCOMP(g_trac_fsi, "FsiDD::write> ScomWRITE to %.8X: opbaddr=%.16llX, data=%.16llX", TARGETING::get_huid(i_addrInfo.opbTarg), opbaddr, fsicmd );
l_err = deviceOp( DeviceFW::WRITE,
i_addrInfo.opbTarg,
&fsicmd,
@@ -970,6 +1078,7 @@ errlHndl_t FsiDD::write(FsiAddrInfo_t& i_addrInfo,
l_err = pollForComplete( i_addrInfo, NULL );
if( l_err )
{
+ TRACFCOMP(g_trac_fsi, "FsiDD::write> FSI Errors after doing write operation : %.8X->%.8X", TARGETING::get_huid(i_addrInfo.fsiTarg), i_addrInfo.relAddr );
break;
}
@@ -1003,7 +1112,19 @@ errlHndl_t FsiDD::handleOpbErrors(FsiAddrInfo_t& i_addrInfo,
{
errlHndl_t l_err = NULL;
- if( (i_opbStatReg & iv_opbErrorMask)
+ // Do not look at error bits for the Master we're not using
+ uint32_t l_opbErrorMask = iv_opbErrorMask;
+ if( i_addrInfo.accessInfo.type == TARGETING::FSI_MASTER_TYPE_CMFSI )
+ {
+ l_opbErrorMask &= ~OPB_STAT_ERR_MFSI;
+ }
+ else
+ {
+ l_opbErrorMask &= ~OPB_STAT_ERR_CMFSI;
+ }
+
+ // Fail if there is a relevant error bit or the op never finished
+ if( (i_opbStatReg & l_opbErrorMask)
|| (i_opbStatReg & OPB_STAT_BUSY) )
{
// If we're already in the middle of handling an error and we failed
@@ -1018,14 +1139,15 @@ errlHndl_t FsiDD::handleOpbErrors(FsiAddrInfo_t& i_addrInfo,
return l_err; // just leave
}
- TRACFCOMP( g_trac_fsi, "FsiDD::handleOpbErrors> Error during FSI access to %.8X : relAddr=0x%X, absAddr=0x%X, OPB Status=0x%.8X", TARGETING::get_huid(i_addrInfo.fsiTarg), i_addrInfo.relAddr, i_addrInfo.absAddr, i_opbStatReg );
+ TRACFCOMP( g_trac_fsi, "FsiDD::handleOpbErrors> Error during FSI access to %.8X : relAddr=0x%X, absAddr=%.8X->%.6X, OPB Status=0x%.8X, l_opbErrorMask=%.8X", TARGETING::get_huid(i_addrInfo.fsiTarg), i_addrInfo.relAddr, TARGETING::get_huid(i_addrInfo.opbTarg), i_addrInfo.absAddr, i_opbStatReg, l_opbErrorMask );
/*@
* @errortype
* @moduleid FSI::MOD_FSIDD_HANDLEOPBERRORS
* @reasoncode FSI::RC_OPB_ERROR
- * @userdata1[0:31] Relative FSI Address
+ * @userdata1[00:31] Relative FSI Address
* @userdata1[32:63] Absolute FSI Address
- * @userdata2 OPB Status Register
+ * @userdata2[00:31] OPB Status Register
+ * @userdata2[32:63] FSI Master HUID
* @devdesc FsiDD::handleOpbErrors> Error during FSI access
*/
l_err = new ERRORLOG::ErrlEntry(ERRORLOG::ERRL_SEV_UNRECOVERABLE,
@@ -1034,10 +1156,11 @@ errlHndl_t FsiDD::handleOpbErrors(FsiAddrInfo_t& i_addrInfo,
TWO_UINT32_TO_UINT64(
i_addrInfo.relAddr,
i_addrInfo.absAddr),
- TWO_UINT32_TO_UINT64(i_opbStatReg,0));
+ TWO_UINT32_TO_UINT64(i_opbStatReg,
+ TARGETING::get_huid(i_addrInfo.opbTarg)));
//mask off the bits we're ignoring before looking closer
- uint32_t l_opb_stat = (i_opbStatReg & iv_opbErrorMask);
+ uint32_t l_opb_stat = (i_opbStatReg & l_opbErrorMask);
/*
OPB_errAck
@@ -1098,9 +1221,11 @@ errlHndl_t FsiDD::handleOpbErrors(FsiAddrInfo_t& i_addrInfo,
if( !root_cause_found )
{
// read the Status Bridge0 Register
+ FsiChipInfo_t fsi_info = getFsiInfo( i_addrInfo.fsiTarg );
+ uint64_t ctl_reg = getControlReg(fsi_info.type);
uint32_t mesrb0_data = 0;
tmp_err = read( i_addrInfo.accessInfo.master,
- FSI_MESRB0_1D0,
+ ctl_reg|FSI_MESRB0_1D0,
&mesrb0_data );
if( tmp_err )
{
@@ -1226,9 +1351,20 @@ errlHndl_t FsiDD::pollForComplete(FsiAddrInfo_t& i_addrInfo,
uint32_t* o_readData)
{
errlHndl_t l_err = NULL;
- enum { MAX_OPB_TIMEOUT_NS = 15*NS_PER_MSEC }; //=15ms
+ enum { MAX_OPB_TIMEOUT_NS = 10*NS_PER_MSEC }; //=10ms
do {
+ // Do not look at error bits for the Master we're not using
+ uint32_t l_opbErrorMask = iv_opbErrorMask;
+ if( i_addrInfo.accessInfo.type == TARGETING::FSI_MASTER_TYPE_CMFSI )
+ {
+ l_opbErrorMask &= ~OPB_STAT_ERR_MFSI;
+ }
+ else
+ {
+ l_opbErrorMask &= ~OPB_STAT_ERR_CMFSI;
+ }
+
// poll for complete
uint32_t read_data[2];
size_t scom_size = sizeof(uint64_t);
@@ -1260,7 +1396,7 @@ errlHndl_t FsiDD::pollForComplete(FsiAddrInfo_t& i_addrInfo,
// check for completion or error
TRACUCOMP(g_trac_fsi, "FsiDD::pollForComplete> ScomREAD : read_data[0]=%.8llX", read_data[0] );
if( ((read_data[0] & OPB_STAT_BUSY) == 0) //not busy
- || (read_data[0] & iv_opbErrorMask) ) //error bits
+ || (read_data[0] & l_opbErrorMask) ) //error bits
{
break;
}
@@ -1270,6 +1406,14 @@ errlHndl_t FsiDD::pollForComplete(FsiAddrInfo_t& i_addrInfo,
} while( elapsed_time_ns <= MAX_OPB_TIMEOUT_NS ); // hardware has 1ms limit
if( l_err ) { break; }
+ // check if we got an error from the OPB
+ // (will also check for busy/timeout)
+ l_err = handleOpbErrors( i_addrInfo, read_data[0] );
+ if( l_err )
+ {
+ break;
+ }
+
// we should never timeout because the hardware should set an error
if( elapsed_time_ns > MAX_OPB_TIMEOUT_NS )
{
@@ -1316,20 +1460,17 @@ errlHndl_t FsiDD::pollForComplete(FsiAddrInfo_t& i_addrInfo,
l_err,
i_addrInfo.opbTarg );
+ //Clear out the error indication so that we can
+ // do subsequent FSI operations
+ errlHndl_t tmp_err = errorCleanup( i_addrInfo, FSI::RC_OPB_ERROR );
+ if(tmp_err) { delete tmp_err; }
+
l_err->collectTrace(FSI_COMP_NAME);
l_err->collectTrace(FSIR_TRACE_BUF);
break;
}
- // check if we got an error from the OPB
- // (will also check for busy/timeout)
- l_err = handleOpbErrors( i_addrInfo, read_data[0] );
- if( l_err )
- {
- break;
- }
-
// read valid isn't on
if( o_readData ) // only check if we're doing a read
{
@@ -1537,6 +1678,8 @@ errlHndl_t FsiDD::genFullFsiAddr(FsiAddrInfo_t& io_addrInfo)
!(iv_master->getAttr<TARGETING::ATTR_MODEL>()
== TARGETING::MODEL_VENICE) ) //@fixme-RTC:35041
{
+ //use the local proc to drive the operation instead of
+ // going through the master proc indirectly
io_addrInfo.opbTarg = io_addrInfo.accessInfo.master;
// Note: no need to append the MFSI port since it is now local
}
@@ -1763,40 +1906,16 @@ errlHndl_t FsiDD::initMasterControl(TARGETING::Target* i_master,
l_err = genFullFsiAddr(addr_info);
if( l_err ) { break; }
+ // Ensure we don't have any errors before we even start
uint32_t scom_data[2] = {};
size_t scom_size = sizeof(scom_data);
-
- uint64_t opbaddr = genOpbScomAddr(addr_info,OPB_REG_RES);
- scom_data[0] = 0; scom_data[1] = 0;
- l_err = deviceOp( DeviceFW::WRITE,
- iv_master,
- scom_data,
- scom_size,
- DEVICE_XSCOM_ADDRESS(opbaddr) );
- if( l_err ) { break; }
-
- opbaddr = genOpbScomAddr(addr_info,OPB_REG_STAT);
- scom_data[0] = 0; scom_data[1] = 0;
- l_err = deviceOp( DeviceFW::WRITE,
- iv_master,
- scom_data,
- scom_size,
- DEVICE_XSCOM_ADDRESS(opbaddr) );
- if( l_err ) { break; }
-
- // Ensure we don't have any errors before we even start
- opbaddr = genOpbScomAddr(addr_info,OPB_REG_STAT);
+ uint64_t opbaddr = genOpbScomAddr(addr_info,OPB_REG_STAT);
l_err = deviceOp( DeviceFW::READ,
iv_master,
scom_data,
scom_size,
DEVICE_XSCOM_ADDRESS(opbaddr) );
if( l_err ) { break; }
- // Trace initial state for debug
- TRACFCOMP(g_trac_fsi,"Scom %0.8X = %0.8X %0.8X",
- opbaddr,
- scom_data[0],
- scom_data[1]);
l_err = handleOpbErrors( addr_info, scom_data[0] );
if( l_err )
{
@@ -1858,7 +1977,7 @@ errlHndl_t FsiDD::initMasterControl(TARGETING::Target* i_master,
databuf = 0x50040400;
//Setup timeout so that:
- // code(15ms) > masterproc (0.9ms) > remote fsi master (0.8ms)
+ // code(10ms) > masterproc (0.9ms) > remote fsi master (0.8ms)
if( i_master == iv_master )
{
// 26:27= Timeout (b01) = 0.9ms
@@ -2211,25 +2330,19 @@ errlHndl_t FsiDD::errorCleanup( FsiAddrInfo_t& i_addrInfo,
do {
if( FSI::RC_OPB_ERROR == i_errType )
{
- // Clear out OPB error
- uint64_t scomdata = 0;
- size_t scom_size = sizeof(uint64_t);
- l_err = deviceOp( DeviceFW::WRITE,
- i_addrInfo.opbTarg,
- &scomdata,
- scom_size,
- DEVICE_XSCOM_ADDRESS(0x00020001ull) );
+ //Clear out the pib2opb logic for the master
+ // that failed
+ l_err = resetPib2Opb( i_addrInfo.opbTarg );
if(l_err) break;
}
else if( FSI::RC_ERROR_IN_MAEB == i_errType )
{
- //Reset the port to clear up the residual errors
- // 1= Port: Error reset
- uint32_t data = 0x40000000;
- uint64_t mresp0_reg = getControlReg(i_addrInfo.accessInfo.type)
- | FSI_MRESP0_0D0
- | (i_addrInfo.accessInfo.port*4);
- l_err = write( mresp0_reg, &data );
+ //Reset the bridge to clear up the residual errors
+ // 0=Bridge: General reset
+ uint32_t data = 0x80000000;
+ uint64_t mesrb0_reg = getControlReg(i_addrInfo.accessInfo.type)
+ | FSI_MESRB0_1D0;
+ l_err = write( i_addrInfo.opbTarg, mesrb0_reg, &data );
if(l_err) break;
}
@@ -2260,9 +2373,9 @@ errlHndl_t FsiDD::checkForErrors( FsiAddrInfo_t& i_addrInfo )
{
errlHndl_t l_err = NULL;
- if( i_addrInfo.fsiTarg == iv_master )
+ if( i_addrInfo.fsiTarg == i_addrInfo.opbTarg )
{
- //nothing to check here in operations directed at master proc
+ //nothing to check here in operations directed at FSI Master
return NULL;
}
@@ -2278,7 +2391,7 @@ errlHndl_t FsiDD::checkForErrors( FsiAddrInfo_t& i_addrInfo )
l_err = read( i_addrInfo.accessInfo.master, maeb_reg, &maeb_data );
if( !l_err && (maeb_data != 0) )
{
- TRACFCOMP( g_trac_fsi, "FsiDD::read> Error after read of %.8X, MAEB=%lX", TARGETING::get_huid(i_addrInfo.fsiTarg), maeb_data );
+ TRACFCOMP( g_trac_fsi, "FsiDD::checkForErrors> After op to %.8X, MAEB=%lX (Master=%.8X)", TARGETING::get_huid(i_addrInfo.fsiTarg), maeb_data, TARGETING::get_huid(i_addrInfo.opbTarg) );
/*@
* @errortype
* @moduleid FSI::MOD_FSIDD_CHECKFORERRORS
@@ -2312,6 +2425,8 @@ errlHndl_t FsiDD::checkForErrors( FsiAddrInfo_t& i_addrInfo )
//Reset the port to clean up residual errors
errorCleanup(i_addrInfo,FSI::RC_ERROR_IN_MAEB);
}
+
+ iv_ffdcTask = 0;
}
return l_err;
diff --git a/src/usr/fsi/fsidd.H b/src/usr/fsi/fsidd.H
index 755330b33..99150c277 100644
--- a/src/usr/fsi/fsidd.H
+++ b/src/usr/fsi/fsidd.H
@@ -87,7 +87,7 @@ class FsiDD
* @param[in] i_type FSI Master Type (MFSI or cMFSI)
* @param[in] i_port Slave port number
* @param[out] o_detected Bitstring of detected slaves
- *
+ *
* @return bool true if port sensed as active during FSI initialization
*/
bool isSlavePresent( TARGETING::Target* i_fsiMaster,
@@ -100,7 +100,7 @@ class FsiDD
*
* @param[in] i_target
* @param[out] o_detected Bitstring of detected slaves
- *
+ *
* @return bool true if port sensed as active during FSI initialization
*/
bool isSlavePresent( TARGETING::Target* i_target,
@@ -121,6 +121,15 @@ class FsiDD
errlHndl_t &io_log,
TARGETING::Target* i_target );
+ /**
+ * @brief Cleanup the FSI PIB2OPB logic on the procs
+ *
+ * @param[in] i_target Proc Chip Target to reset
+ *
+ * @return errlHndl_t NULL on success
+ */
+ errlHndl_t resetPib2Opb( TARGETING::Target* i_target );
+
protected:
/**
* @brief Constructor
@@ -391,7 +400,7 @@ class FsiDD
OPB_STAT_BUSY = 0x00010000, /**< Bit 15 is the Busy bit */
OPB_STAT_READ_VALID = 0x00020000, /**< Bit 14 is the Valid Read bit */
OPB_STAT_ERRACK = 0x00100000, /**< 11 is OPB errAck */
- OPB_STAT_ERR_OPB = 0x09F00000, /**< 4,7-11 are OPB errors */
+ OPB_STAT_ERR_OPB = 0xFFFC0000, /**< 0-14 are OPB errors */
OPB_STAT_ERR_CMFSI = 0x0000FC00, /**< 16-21 are cMFSI errors */
OPB_STAT_ERR_MFSI = 0x000000FC, /**< 24-29 are MFSI errors */
OPB_STAT_ERR_ANY = (OPB_STAT_ERR_OPB |
@@ -529,7 +538,7 @@ class FsiDD
* Active slaves, 1 bit per port, 1=active,
* one entry per MFSI port, plus local MFSI and local cMFSI
*/
- uint8_t iv_slaves[MAX_SLAVE_PORTS+2];
+ uint8_t iv_slaves[MAX_SLAVE_PORTS+2];
/**
* Master processor target
@@ -546,6 +555,11 @@ class FsiDD
*/
uint32_t iv_opbErrorMask;
+ /**
+ * Last OPB Command
+ */
+ uint64_t iv_lastOpbCmd;
+
private:
// let my testcase poke around
diff --git a/src/usr/fsiscom/fsiscom.C b/src/usr/fsiscom/fsiscom.C
index ea3631d05..4d390360a 100644
--- a/src/usr/fsiscom/fsiscom.C
+++ b/src/usr/fsiscom/fsiscom.C
@@ -67,19 +67,49 @@ void pib_error_handler( TARGETING::Target* i_target,
//Add this target to the FFDC
ERRORLOG::ErrlUserDetailsTarget(i_target,"SCOM Target").addToLog(i_errlog);
- //Add the callouts for the specific PCB/PIB error
- uint32_t pib_error = i_status >> 12;
- PIB::addFruCallouts( i_target,
- pib_error,
- i_errlog );
-
- //Grab the PIB2OPB Status reg for a Resource Occupied error
- if( pib_error == PIB::PIB_RESOURCE_OCCUPIED ) //piberr=001
+ //Look for a totally dead chip
+ if( i_status == 0xFFFFFFFF )
{
+ // if things are this broken then chances are there are bigger
+ // problems, we can just make some guesses on what to call out
+
+ // make code the highest since there are other issues
+ i_errlog->addProcedureCallout(HWAS::EPUB_PRC_HB_CODE,
+ HWAS::SRCI_PRIORITY_HIGH);
+
+ // callout this chip as Medium and deconfigure it
+ i_errlog->addHwCallout( i_target,
+ HWAS::SRCI_PRIORITY_LOW,
+ HWAS::DECONFIG,
+ HWAS::GARD_NULL );
+
+ // grab all the FFDC we can think of
+ FSI::getFsiFFDC( FSI::FFDC_OPB_FAIL_SLAVE,
+ i_errlog,
+ i_target );
+ FSI::getFsiFFDC( FSI::FFDC_READWRITE_FAIL,
+ i_errlog,
+ i_target );
FSI::getFsiFFDC( FSI::FFDC_PIB_FAIL,
i_errlog,
i_target );
}
+ else
+ {
+ //Add the callouts for the specific PCB/PIB error
+ uint32_t pib_error = i_status >> 12;
+ PIB::addFruCallouts( i_target,
+ pib_error,
+ i_errlog );
+
+ //Grab the PIB2OPB Status reg for a Resource Occupied error
+ if( pib_error == PIB::PIB_RESOURCE_OCCUPIED ) //piberr=001
+ {
+ FSI::getFsiFFDC( FSI::FFDC_PIB_FAIL,
+ i_errlog,
+ i_target );
+ }
+ }
//Recovery sequence from Markus
// if SCOM fails and FSI Master displays "MasterTimeOut"
diff --git a/src/usr/hwas/hwasPlat.C b/src/usr/hwas/hwasPlat.C
index f690dddd6..4a80846ed 100644
--- a/src/usr/hwas/hwasPlat.C
+++ b/src/usr/hwas/hwasPlat.C
@@ -42,6 +42,7 @@
#include <hwas/common/hwas_reasoncodes.H>
#include <targeting/common/utilFilter.H>
+#include <fsi/fsiif.H>
namespace HWAS
{
@@ -95,6 +96,52 @@ errlHndl_t platReadIDEC(const TargetHandle_t &i_target)
DEVICE_FSI_ADDRESS(0x01028));
}
+ //Look for a totally dead chip
+ if( (errl == NULL)
+ && ((id_ec & 0xFFFFFFFF00000000) == 0xFFFFFFFF00000000) )
+ {
+ HWAS_ERR("All FFs for chipid read on %.8X",TARGETING::get_huid(i_target));
+ /*@
+ * @errortype
+ * @moduleid HWAS::MOD_PLAT_READIDEC
+ * @reasoncode HWAS::RC_BAD_CHIPID
+ * @userdata1 Target HUID
+ * @userdata2 <unused>
+ * @devdesc platReadIDEC> Invalid chipid from hardware (all FFs)
+ */
+ errl = new ERRORLOG::ErrlEntry(
+ ERRORLOG::ERRL_SEV_UNRECOVERABLE,
+ HWAS::MOD_PLAT_READIDEC,
+ HWAS::RC_BAD_CHIPID,
+ TARGETING::get_huid(i_target),
+ 0);
+
+ // if things are this broken then chances are there are bigger
+ // problems, we can just make some guesses on what to call out
+
+ // make code the highest since there are other issues
+ errl->addProcedureCallout(HWAS::EPUB_PRC_HB_CODE,
+ HWAS::SRCI_PRIORITY_HIGH);
+
+ // callout this chip as Medium and deconfigure it
+ errl->addHwCallout( i_target,
+ HWAS::SRCI_PRIORITY_LOW,
+ HWAS::DECONFIG,
+ HWAS::GARD_NULL );
+
+ // Grab all the FFDC we can think of
+ FSI::getFsiFFDC( FSI::FFDC_OPB_FAIL_SLAVE,
+ errl,
+ i_target );
+ FSI::getFsiFFDC( FSI::FFDC_READWRITE_FAIL,
+ errl,
+ i_target );
+ FSI::getFsiFFDC( FSI::FFDC_PIB_FAIL,
+ errl,
+ i_target );
+
+ }
+
if (errl == NULL)
{ // no error, so we got a valid ID/EC value back
// EC - nibbles 0,2
diff --git a/src/usr/hwpf/hwp/activate_powerbus/activate_powerbus.C b/src/usr/hwpf/hwp/activate_powerbus/activate_powerbus.C
index e7b3385dc..fc7a69136 100644
--- a/src/usr/hwpf/hwp/activate_powerbus/activate_powerbus.C
+++ b/src/usr/hwpf/hwp/activate_powerbus/activate_powerbus.C
@@ -59,6 +59,7 @@
#include "proc_build_smp/proc_build_smp.H"
#include <intr/interrupt.H>
+#include <fsi/fsiif.H>
namespace ACTIVATE_POWERBUS
{
@@ -286,6 +287,20 @@ void* call_proc_build_smp( void *io_pArgs )
// Turn off FSI scom and turn on Xscom.
l_proc_target->setAttr<ATTR_SCOM_SWITCHES>(l_switches);
+
+ // Reset the FSI2OPB logic on the new chips
+ l_errl = FSI::resetPib2Opb(l_proc_target);
+ if(l_errl)
+ {
+ TRACFCOMP(ISTEPS_TRACE::g_trac_isteps_trace,
+ "ERROR : resetPib2Opb on %.8X",
+ TARGETING::get_huid(l_proc_target));
+ // Create IStep error log and cross reference error that occurred
+ l_StepError.addErrorDetails(l_errl);
+ // Commit error
+ errlCommit( l_errl, HWPF_COMP_ID );
+ break;
+ }
}
}
diff --git a/src/usr/pore/poreve/porevesrc/pib2cfam.C b/src/usr/pore/poreve/porevesrc/pib2cfam.C
index cfd19d455..7e8fe4d9b 100644
--- a/src/usr/pore/poreve/porevesrc/pib2cfam.C
+++ b/src/usr/pore/poreve/porevesrc/pib2cfam.C
@@ -5,7 +5,7 @@
/* */
/* IBM CONFIDENTIAL */
/* */
-/* COPYRIGHT International Business Machines Corp. 2012,2013 */
+/* COPYRIGHT International Business Machines Corp. 2012,2014 */
/* */
/* p1 */
/* */
@@ -101,6 +101,8 @@ Pib2Cfam::operation(Transaction& io_transaction)
me = ME_SUCCESS;
} else {
me = ME_FAILURE;
+ //@todo CQ:SW248690 - need a better way to catch these
+ fapiLogError( rc, fapi::FAPI_ERRL_SEV_UNRECOVERABLE );
}
break;
default:
@@ -131,6 +133,8 @@ Pib2Cfam::operation(Transaction& io_transaction)
me = ME_SUCCESS;
} else {
me = ME_FAILURE;
+ //@todo CQ:SW248690 - need a better way to catch these
+ fapiLogError( rc, fapi::FAPI_ERRL_SEV_UNRECOVERABLE );
}
break;
OpenPOWER on IntegriCloud