summaryrefslogtreecommitdiffstats
path: root/src/usr/scom/scom.C
diff options
context:
space:
mode:
Diffstat (limited to 'src/usr/scom/scom.C')
-rw-r--r--src/usr/scom/scom.C212
1 files changed, 162 insertions, 50 deletions
diff --git a/src/usr/scom/scom.C b/src/usr/scom/scom.C
index fb766f50d..b52c33ba0 100644
--- a/src/usr/scom/scom.C
+++ b/src/usr/scom/scom.C
@@ -803,7 +803,7 @@ errlHndl_t doScomOp(DeviceFW::OperationType i_opType,
//Add some additional FFDC based on the specific operation
if( l_err )
{
- //TODO for P9 RTC 167311 addScomFailFFDC( l_err, i_target, i_addr );
+ addScomFailFFDC( l_err, i_target, i_addr );
}
return l_err;
@@ -813,104 +813,216 @@ errlHndl_t doScomOp(DeviceFW::OperationType i_opType,
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
void addScomFailFFDC( errlHndl_t i_err,
- TARGETING::Target* i_target,
+ TARGETING::Target* i_chipTarg,
uint64_t i_addr )
{
// Read some error regs from scom
- ERRORLOG::ErrlUserDetailsLogRegister l_scom_data(i_target);
+ ERRORLOG::ErrlUserDetailsLogRegister l_scom_data(i_chipTarg);
bool addit = false;
TARGETING::TYPE l_type = TARGETING::TYPE_NA;
- if( i_target == TARGETING::MASTER_PROCESSOR_CHIP_TARGET_SENTINEL )
+ uint32_t l_badChiplet = 0x00;
+
+ static bool l_insideFFDC = false;
+ if( l_insideFFDC )
+ {
+ TRACDCOMP( g_trac_scom, "Already gathering FFDC..." );
+ return;
+ }
+ l_insideFFDC = true;
+
+ if( i_chipTarg == TARGETING::MASTER_PROCESSOR_CHIP_TARGET_SENTINEL )
{
l_type = TARGETING::TYPE_PROC;
}
else
{
- l_type = i_target->getAttr<TARGETING::ATTR_TYPE>();
+ l_type = i_chipTarg->getAttr<TARGETING::ATTR_TYPE>();
}
- //PBA scoms on the processor
- if( ((i_addr & 0xFFFFF000) == 0x00064000)
+ //Multicast scoms on the processor
+ if( p9_scom_addr(i_addr).is_multicast()
&& (TARGETING::TYPE_PROC == l_type) )
{
addit = true;
- //look for hung operations on the PBA
- uint64_t ffdc_regs[] = {
- //grab the PBA buffers in case something is hung
- 0x02010850, //PBARBUFVAL0
- 0x02010851, //PBARBUFVAL1
- 0x02010852, //PBARBUFVAL2
- 0x02010858, //PBAWBUFVAL0
- 0x02010859, //PBAWBUFVAL1
+ uint64_t ffdc_regs1[] = {
+ 0x000F001E, // PCBMS.FIRST_ERR_REG
+ 0x000F001F, // PCBMS.ERROR_REG
+ };
+ for( size_t x = 0;
+ x < (sizeof(ffdc_regs1)/sizeof(ffdc_regs1[0]));
+ x++ )
+ {
+ l_scom_data.addData(DEVICE_SCOM_ADDRESS(ffdc_regs1[x]));
+ }
- 0x020F0012, //PB_GP3 (has fence information)
+ uint64_t ffdc_regs2[] = {
+ 0x000F0011, // PCBMS.REC_ERR_REG0
+ 0x000F0012, // PCBMS.REC_ERR_REG1
+ 0x000F0013, // PCBMS.REC_ERR_REG2
+ 0x000F0014, // PCBMS.REC_ERR_REG3
};
- for( size_t x = 0; x < (sizeof(ffdc_regs)/sizeof(ffdc_regs[0])); x++ )
+
+ // save off the responses to figure out which chiplet failed
+ uint8_t l_responses[(sizeof(ffdc_regs2)/sizeof(ffdc_regs2[0]))
+ *sizeof(uint64_t)];
+ uint8_t* l_respPtr = l_responses;
+
+ for( size_t x = 0;
+ x < (sizeof(ffdc_regs2)/sizeof(ffdc_regs2[0]));
+ x++ )
{
- l_scom_data.addData(DEVICE_SCOM_ADDRESS(ffdc_regs[x]));
+ // going to read these manually because we want to look at the data
+ uint64_t l_scomdata = 0;
+ size_t l_scomsize = sizeof(l_scomdata);
+ errlHndl_t l_ignored = doScomOp( DeviceFW::READ,
+ i_chipTarg,
+ &l_scomdata,
+ l_scomsize,
+ DeviceFW::SCOM,
+ ffdc_regs2[x] );
+ if( l_ignored )
+ {
+ delete l_ignored;
+ l_scomdata = 0;
+ }
+ else
+ {
+ l_scom_data.addDataBuffer( &l_scomdata,
+ l_scomsize,
+ DEVICE_SCOM_ADDRESS(ffdc_regs2[x]) );
+ }
+
+ // copy the error data into our big buffer
+ memcpy( l_respPtr, &l_scomdata, l_scomsize );
+ l_respPtr += l_scomsize; // move to the next chunk
}
- }
- //EX scoms on the processor (not including PCB slave regs)
- else if( ((i_addr & 0xF0000000) == 0x10000000)
- && ((i_addr & 0x00FF0000) != 0x000F0000)
- && (TARGETING::TYPE_PROC == l_type) )
- {
- addit = true;
- uint64_t ex_offset = 0xFF000000 & i_addr;
- //grab some data related to the PCB slave state
- uint64_t ffdc_regs[] = {
- 0x0F010B, //Special Wakeup
- 0x0F0012, //GP3
- 0x0F0100, //PowerManagement GP0
- 0x0F0106, //PFET Status Core
- 0x0F010E, //PFET Status ECO
- 0x0F0111, //PM State History
+
+ // find the bad chiplet
+ // 4-bits per chiplet : 1-bit response, 3-bit error code
+ for( size_t x = 0; x < sizeof(l_responses); x++ )
+ {
+ // look for the first non-zero pib error code
+ if( l_responses[x] & 0x70 ) //front nibble
+ {
+ l_badChiplet = x*2;
+ }
+ else if( l_responses[x] & 0x07 ) //back nibble
+ {
+ l_badChiplet = x*2 + 1;
+ }
+ }
+
+ uint64_t ffdc_regs3[] = {
+ 0x0F0001, // multicast group1
+ 0x0F0002, // multicast group2
+ 0x0F0003, // multicast group3
+ 0x0F0004, // multicast group4
};
- for( size_t x = 0; x < (sizeof(ffdc_regs)/sizeof(ffdc_regs[0])); x++ )
+ for( size_t x = 0;
+ x < (sizeof(ffdc_regs3)/sizeof(ffdc_regs3[0]));
+ x++ )
{
- l_scom_data.addData(DEVICE_SCOM_ADDRESS(ex_offset|ffdc_regs[x]));
+ p9_scom_addr l_scom(ffdc_regs3[x]);
+ l_scom.set_chiplet_id(l_badChiplet);
+ l_scom_data.addData(DEVICE_SCOM_ADDRESS(l_scom.get_addr()));
}
}
//Any non-PCB Slave and non TP reg on the processor
- if( ((i_addr & 0x00FF0000) != 0x000F0000)
- && ((i_addr & 0xFF000000) != 0x00000000)
+ if( ((i_addr & 0x00FF0000) != 0x000F0000) //PCB slave
+ && (p9_scom_addr(i_addr).get_chiplet_id() != 0x00) //TP
&& (TARGETING::TYPE_PROC == l_type) )
{
addit = true;
- uint64_t chiplet_offset = 0xFF000000 & i_addr;
+ if( l_badChiplet == 0x00 )
+ {
+ l_badChiplet = p9_scom_addr(i_addr).get_chiplet_id();
+ }
//grab some data related to the PCB slave state
uint64_t ffdc_regs[] = {
- 0x0F0012, //GP3
- 0x0F001F, //Error capture reg
+ 0x0F001F, // PCBSL<cplt>.ERROR_REG
+ 0x03000F, // CC.<chiplet>.ERROR_STATUS
+ 0x010001, // <chiplet>.PSC.PSCOM_STATUS_ERROR_REG
+ 0x010002, // <chiplet>.PSC.PSCOM_ERROR_MASK
};
for( size_t x = 0; x < (sizeof(ffdc_regs)/sizeof(ffdc_regs[0])); x++ )
{
- l_scom_data.addData( DEVICE_SCOM_ADDRESS(
- chiplet_offset|ffdc_regs[x]) );
+ p9_scom_addr l_scom(ffdc_regs[x]);
+ l_scom.set_chiplet_id(l_badChiplet);
+ l_scom_data.addData(DEVICE_SCOM_ADDRESS(l_scom.get_addr()));
}
- //grab the clock/osc regs
- l_scom_data.addData(DEVICE_SCOM_ADDRESS(0x00050019));
- l_scom_data.addData(DEVICE_SCOM_ADDRESS(0x0005001A));
+ //Osc Switch Sense 1 register
+ l_scom_data.addData(DEVICE_SCOM_ADDRESS(0x0005001D));
+ //Osc Switch Sense 2 register
+ l_scom_data.addData(DEVICE_SCOM_ADDRESS(0x0005001E));
//grab the clock regs via FSI too, just in case
- if (i_target != TARGETING::MASTER_PROCESSOR_CHIP_TARGET_SENTINEL)
+ if (i_chipTarg != TARGETING::MASTER_PROCESSOR_CHIP_TARGET_SENTINEL)
{
TARGETING::Target* mproc = NULL;
TARGETING::targetService().masterProcChipTargetHandle(mproc);
- if (i_target != mproc)
+ if (i_chipTarg != mproc)
{
- l_scom_data.addData(DEVICE_FSI_ADDRESS(0x2864));//==2819
- l_scom_data.addData(DEVICE_FSI_ADDRESS(0x2868));//==281A
+ l_scom_data.addData(DEVICE_FSI_ADDRESS(0x2874));//==281D
+ l_scom_data.addData(DEVICE_FSI_ADDRESS(0x2878));//==281E
}
}
}
+ //PBA scoms on the processor
+ if( ((i_addr & 0xFFFFF000) == 0x00068000)
+ && (TARGETING::TYPE_PROC == l_type) )
+ {
+ addit = true;
+ //look for hung operations on the PBA
+ uint64_t ffdc_regs[] = {
+ //grab the PBA buffers in case something is hung
+ 0x05012850, //PBARBUFVAL0
+ 0x05012851, //PBARBUFVAL1
+ 0x05012852, //PBARBUFVAL2
+ 0x05012853, //PBARBUFVAL3
+ 0x05012854, //PBARBUFVAL4
+ 0x05012855, //PBARBUFVAL5
+ 0x05012858, //PBAWBUFVAL0
+ 0x05012859, //PBAWBUFVAL1
+ };
+ for( size_t x = 0; x < (sizeof(ffdc_regs)/sizeof(ffdc_regs[0])); x++ )
+ {
+ l_scom_data.addData(DEVICE_SCOM_ADDRESS(ffdc_regs[x]));
+ }
+ }
+ //Core/EX/EQ scoms on the processor (not including PCB slave regs)
+ else if( (((i_addr & 0xF0000000) == 0x10000000) //CACHE
+ || ((i_addr & 0xF0000000) == 0x20000000)) //CORE
+ && ((i_addr & 0x00FF0000) != 0x000F0000) //PCB slave
+ && (TARGETING::TYPE_PROC == l_type) )
+ {
+ addit = true;
+ uint8_t l_badChiplet = p9_scom_addr(i_addr).get_chiplet_id();
+ //grab some data related to the PCB slave state
+ uint64_t ffdc_regs[] = {
+ 0x0F010A, //Special Wakeup Other
+ 0x0F010B, //Special Wakeup FSP
+ 0x0F010C, //Special Wakeup OCC
+ 0x0F010D, //Special Wakeup HYP
+ 0x0F0111, //PM State History FSP
+ };
+ for( size_t x = 0; x < (sizeof(ffdc_regs)/sizeof(ffdc_regs[0])); x++ )
+ {
+ p9_scom_addr l_scom(ffdc_regs[x]);
+ l_scom.set_chiplet_id(l_badChiplet);
+ l_scom_data.addData(DEVICE_SCOM_ADDRESS(l_scom.get_addr()));
+ }
+ }
+
+
if( addit )
{
l_scom_data.addToLog(i_err);
}
+
+ l_insideFFDC = false;
}
OpenPOWER on IntegriCloud