summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDan Crowell <dcrowell@us.ibm.com>2014-04-10 09:16:03 -0500
committerA. Patrick Williams III <iawillia@us.ibm.com>2014-06-23 15:24:15 -0500
commit02c81ca3e1e06b910c58905bcab913d30b0a5337 (patch)
treed301ae5056413e37daf15c3dc8cbae8ac1b74c45
parent0ea3be7d289e8e19aae6a426b65ec91a925ea11a (diff)
downloadblackbird-hostboot-02c81ca3e1e06b910c58905bcab913d30b0a5337.tar.gz
blackbird-hostboot-02c81ca3e1e06b910c58905bcab913d30b0a5337.zip
More FFDC for SCOM Fails
Added more address-specific FFDC collection logic to handle more specific error scenarios we've seen or talked about. Change-Id: Ifcc2b98b9c55ed5e6a35d1556cd530438ec120c2 Reviewed-on: http://gfw160.aus.stglabs.ibm.com:8080/gerrit/11257 Tested-by: Jenkins Server Reviewed-by: Michael Baiocchi <baiocchi@us.ibm.com> Reviewed-by: Douglas R. Gilbert <dgilbert@us.ibm.com> Reviewed-by: A. Patrick Williams III <iawillia@us.ibm.com>
-rw-r--r--src/include/usr/xscom/piberror.H5
-rw-r--r--src/usr/fsiscom/fsiscom.C17
-rw-r--r--src/usr/ibscom/ibscom.C1
-rw-r--r--src/usr/scom/scom.C71
-rw-r--r--src/usr/xscom/piberror.C44
-rw-r--r--src/usr/xscom/xscom.C1
6 files changed, 108 insertions, 31 deletions
diff --git a/src/include/usr/xscom/piberror.H b/src/include/usr/xscom/piberror.H
index 9836d9acd..5640c6ce1 100644
--- a/src/include/usr/xscom/piberror.H
+++ b/src/include/usr/xscom/piberror.H
@@ -50,12 +50,15 @@ namespace PIB
*
* @param[in] i_target Operation target
* @param[in] i_pibErrStatus Error Status bits retrieved
+ * @param[in] i_scomAddr Address of SCOM operation that led to the fail,
+ * if operation was not a scom, set to UINT64_MAX
* @param[in/out] io_errl Originating errorlog that we will add Fru
* Callouts to.
* @return none
*/
void addFruCallouts(TARGETING::Target* i_target,
- uint32_t i_pibErrStatus,
+ uint32_t i_pibErrStatus,
+ uint64_t i_scomAddr,
errlHndl_t& io_errl);
diff --git a/src/usr/fsiscom/fsiscom.C b/src/usr/fsiscom/fsiscom.C
index 962cfd5ab..aade95479 100644
--- a/src/usr/fsiscom/fsiscom.C
+++ b/src/usr/fsiscom/fsiscom.C
@@ -60,9 +60,19 @@ union ioData6432
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
+/**
+ * @brief Common function to add callouts and FFDC and recover
+ * from PIB errors
+ *
+ * @param[in] i_target SCom target
+ * @param[in] i_errlog Error log to append to
+ * @param[in] i_status FSI2PIB status register
+ * @param[in] i_scomAddr Address that we failed on
+ */
void pib_error_handler( TARGETING::Target* i_target,
errlHndl_t i_errlog,
- uint32_t i_status )
+ uint32_t i_status,
+ uint32_t i_scomAddr )
{
//Add this target to the FFDC
ERRORLOG::ErrlUserDetailsTarget(i_target,"SCOM Target").addToLog(i_errlog);
@@ -100,6 +110,7 @@ void pib_error_handler( TARGETING::Target* i_target,
uint32_t pib_error = i_status >> 12;
PIB::addFruCallouts( i_target,
pib_error,
+ i_scomAddr,
i_errlog );
//Grab the PIB2OPB Status reg for a Resource Occupied error
@@ -294,7 +305,7 @@ errlHndl_t fsiScomPerformOp(DeviceFW::OperationType i_opType,
l_status));
// call common error handler to do callouts and recovery
- pib_error_handler( i_target, l_err, l_status );
+ pib_error_handler( i_target, l_err, l_status, l_scomAddr );
//Grab the PIB2OPB Status reg for a XSCOM Block error
if( (l_status & 0x00007000) == 0x00001000 ) //piberr=001
@@ -381,7 +392,7 @@ errlHndl_t fsiScomPerformOp(DeviceFW::OperationType i_opType,
l_status));
// call common error handler to do callouts and recovery
- pib_error_handler( i_target, l_err, l_status );
+ pib_error_handler( i_target, l_err, l_status, l_scomAddr );
break;
}
diff --git a/src/usr/ibscom/ibscom.C b/src/usr/ibscom/ibscom.C
index 25b4492d2..405708ee2 100644
--- a/src/usr/ibscom/ibscom.C
+++ b/src/usr/ibscom/ibscom.C
@@ -811,6 +811,7 @@ errlHndl_t doIBScom(DeviceFW::OperationType i_opType,
//add callouts based on the PIB error
PIB::addFruCallouts( i_target,
mbsiberr0.piberr,
+ i_addr,
l_err );
//grab some HW regs via FSISCOM
diff --git a/src/usr/scom/scom.C b/src/usr/scom/scom.C
index e11736f62..6cded14e5 100644
--- a/src/usr/scom/scom.C
+++ b/src/usr/scom/scom.C
@@ -57,8 +57,8 @@ namespace SCOM
* @param[in] i_target Target of SCOM operation
* @param[in] i_addr SCOM address
*/
-void addScomFailFFFDC( errlHndl_t i_err,
- TARGETING::Target* i_target,
+void addScomFailFFDC( errlHndl_t i_err,
+ TARGETING::Target* i_target,
uint64_t i_addr );
@@ -310,6 +310,7 @@ errlHndl_t checkIndirectAndDoScom(DeviceFW::OperationType i_opType,
//Add the callouts for the specific PCB/PIB error
PIB::addFruCallouts( i_target,
scomout.piberr,
+ i_addr,
l_err );
//Add this target to the FFDC
@@ -426,6 +427,7 @@ errlHndl_t checkIndirectAndDoScom(DeviceFW::OperationType i_opType,
//Add the callouts for the specific PCB/PIB error
PIB::addFruCallouts( i_target,
scomout.piberr,
+ i_addr,
l_err );
//Add this target to the FFDC
@@ -553,7 +555,7 @@ errlHndl_t doScomOp(DeviceFW::OperationType i_opType,
//Add some additional FFDC based on the specific operation
if( l_err )
{
- addScomFailFFFDC( l_err, i_target, i_addr );
+ addScomFailFFDC( l_err, i_target, i_addr );
}
return l_err;
@@ -562,18 +564,18 @@ errlHndl_t doScomOp(DeviceFW::OperationType i_opType,
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
-void addScomFailFFFDC( errlHndl_t i_err,
- TARGETING::Target* i_target,
- uint64_t i_addr )
+void addScomFailFFDC( errlHndl_t i_err,
+ TARGETING::Target* i_target,
+ uint64_t i_addr )
{
// Read some error regs from scom
ERRORLOG::ErrlUserDetailsLogRegister l_scom_data(i_target);
bool addit = false;
+ TARGETING::TYPE l_type = i_target->getAttr<TARGETING::ATTR_TYPE>();
//PBA scoms on the processor
if( ((i_addr & 0xFFFFF000) == 0x00064000)
- && (TARGETING::TYPE_PROC
- == i_target->getAttr<TARGETING::ATTR_TYPE>()) )
+ && (TARGETING::TYPE_PROC == l_type) )
{
addit = true;
//look for hung operations on the PBA
@@ -592,6 +594,59 @@ void addScomFailFFFDC( errlHndl_t i_err,
l_scom_data.addData(DEVICE_SCOM_ADDRESS(ffdc_regs[x]));
}
}
+ //EX scoms on the processor (not including PCB slave regs)
+ else if( ((i_addr & 0xF0000000) == 0x10000000)
+ && ((i_addr & 0x00FF0000) != 0x000F0000)
+ && (TARGETING::TYPE_PROC == l_type) )
+ {
+ addit = true;
+ uint64_t ex_offset = 0xFF000000 & i_addr;
+ //grab some data related to the PCB slave state
+ uint64_t ffdc_regs[] = {
+ 0x0F010B, //Special Wakeup
+ 0x0F0012, //GP3
+ 0x0F0100, //PowerManagement GP0
+ 0x0F0106, //PFET Status Core
+ 0x0F010E, //PFET Status ECO
+ 0x0F0111, //PM State History
+ };
+ for( size_t x = 0; x < (sizeof(ffdc_regs)/sizeof(ffdc_regs[0])); x++ )
+ {
+ l_scom_data.addData(DEVICE_SCOM_ADDRESS(ex_offset|ffdc_regs[x]));
+ }
+ }
+
+ //Any non-PCB Slave and non TP reg on the processor
+ if( ((i_addr & 0x00FF0000) != 0x000F0000)
+ && ((i_addr & 0xFF000000) != 0x00000000)
+ && (TARGETING::TYPE_PROC == l_type) )
+ {
+ addit = true;
+ uint64_t chiplet_offset = 0xFF000000 & i_addr;
+ //grab some data related to the PCB slave state
+ uint64_t ffdc_regs[] = {
+ 0x0F0012, //GP3
+ 0x0F001F, //Error capture reg
+ };
+ for( size_t x = 0; x < (sizeof(ffdc_regs)/sizeof(ffdc_regs[0])); x++ )
+ {
+ l_scom_data.addData( DEVICE_SCOM_ADDRESS(
+ chiplet_offset|ffdc_regs[x]) );
+ }
+
+ //grab the clock/osc regs
+ l_scom_data.addData(DEVICE_SCOM_ADDRESS(0x00050019));
+ l_scom_data.addData(DEVICE_SCOM_ADDRESS(0x0005001A));
+ //grab the clock regs via FSI too, just in case
+ TARGETING::Target* mproc = NULL;
+ TARGETING::targetService().masterProcChipTargetHandle(mproc);
+ if( (i_target != TARGETING::MASTER_PROCESSOR_CHIP_TARGET_SENTINEL)
+ && (i_target != mproc) )
+ {
+ l_scom_data.addData(DEVICE_FSI_ADDRESS(0x2864));//==2819
+ l_scom_data.addData(DEVICE_FSI_ADDRESS(0x2868));//==281A
+ }
+ }
if( addit )
{
diff --git a/src/usr/xscom/piberror.C b/src/usr/xscom/piberror.C
index aa769aea5..e879cdc29 100644
--- a/src/usr/xscom/piberror.C
+++ b/src/usr/xscom/piberror.C
@@ -39,20 +39,15 @@ namespace PIB
/**
* @brief Add callouts to an errorlog based on the type of PIB error could be a
* hardware or procedure callout
- *
- * @param[in] i_target Operation target
- * @param[in] i_pibErrStatus Error Status bits retrieved
- * @param[in/out] io_errl Originating errorlog that we will add Fru
- * Callouts to.
- * @return none
*/
void addFruCallouts(TARGETING::Target* i_target,
uint32_t i_pibErrStatus,
+ uint64_t i_scomAddr,
errlHndl_t& io_errl)
{
switch (i_pibErrStatus)
{
- case PIB::PIB_CHIPLET_OFFLINE:
+ case PIB::PIB_CHIPLET_OFFLINE: //b010
//Offline should just be a code bug, but it seems that there are
// cases where bad hardware can also cause this problem
//Since we assume code is good before going out, make the
@@ -65,12 +60,12 @@ void addFruCallouts(TARGETING::Target* i_target,
HWAS::SRCI_PRIORITY_MED);
break;
- case PIB::PIB_PARTIAL_GOOD:
+ case PIB::PIB_PARTIAL_GOOD: //b011
io_errl->addProcedureCallout(HWAS::EPUB_PRC_HB_CODE,
HWAS::SRCI_PRIORITY_HIGH);
break;
- case PIB::PIB_INVALID_ADDRESS:
+ case PIB::PIB_INVALID_ADDRESS: //b100
//Invalid Address should just be a code bug, but it seems that there
// are cases where bad hardware can also cause this problem
//Since we assume code is good before going out, make the
@@ -83,28 +78,40 @@ void addFruCallouts(TARGETING::Target* i_target,
HWAS::SRCI_PRIORITY_MED);
break;
- case PIB::PIB_PARITY_ERROR:
- case PIB::PIB_TIMEOUT:
+ case PIB::PIB_PARITY_ERROR: //b110
+ case PIB::PIB_TIMEOUT: //b111
io_errl->addHwCallout( i_target,
HWAS::SRCI_PRIORITY_LOW,
HWAS::NO_DECONFIG,
HWAS::GARD_NULL );
break;
- case PIB::PIB_CLOCK_ERROR:
+ case PIB::PIB_CLOCK_ERROR: //b101
if (i_target->getAttr<TARGETING::ATTR_TYPE>() ==
TARGETING::TYPE_PROC)
{
- io_errl->addClockCallout(i_target,
- HWAS::OSCREFCLK_TYPE,
- HWAS::SRCI_PRIORITY_LOW);
+ //check for PCI range
+ if( ((i_scomAddr & 0xFF000000) == 0x09000000)
+ && ((i_scomAddr & 0x00FF0000) != 0x000F0000) )
+ {
+ io_errl->addClockCallout(i_target,
+ HWAS::OSCPCICLK_TYPE,
+ HWAS::SRCI_PRIORITY_LOW);
+ }
+ //for everything else blame the ref clock
+ else
+ {
+ io_errl->addClockCallout(i_target,
+ HWAS::OSCREFCLK_TYPE,
+ HWAS::SRCI_PRIORITY_LOW);
+ }
}
else if (i_target->getAttr<TARGETING::ATTR_TYPE>() ==
TARGETING::TYPE_MEMBUF)
{
io_errl->addClockCallout(i_target,
- HWAS::MEMCLK_TYPE,
- HWAS::SRCI_PRIORITY_LOW);
+ HWAS::MEMCLK_TYPE,
+ HWAS::SRCI_PRIORITY_LOW);
}
else // for anything else, just blame the refclock
{
@@ -115,8 +122,7 @@ void addFruCallouts(TARGETING::Target* i_target,
break;
default:
- // should never commit a log that gets here so that is a
- // code bug
+ // Anything else would most likely be a code bug
io_errl->addProcedureCallout(HWAS::EPUB_PRC_HB_CODE,
HWAS::SRCI_PRIORITY_HIGH);
break;
diff --git a/src/usr/xscom/xscom.C b/src/usr/xscom/xscom.C
index 1b5b06558..30df917da 100644
--- a/src/usr/xscom/xscom.C
+++ b/src/usr/xscom/xscom.C
@@ -794,6 +794,7 @@ errlHndl_t xscomPerformOp(DeviceFW::OperationType i_opType,
// Add Callouts to the errorlog
PIB::addFruCallouts(i_target,
l_hmer.mXSComStatus,
+ l_addr,
l_err);
// Call XscomCollectFFDC..
OpenPOWER on IntegriCloud