summaryrefslogtreecommitdiffstats
path: root/src/usr/scom
diff options
context:
space:
mode:
authorDan Crowell <dcrowell@us.ibm.com>2017-02-22 14:50:21 -0600
committerDaniel M. Crowell <dcrowell@us.ibm.com>2017-03-13 16:11:42 -0400
commit21cd4b1c1d176338fac5015c235566e3ff10ab2b (patch)
treea7d1d2d051b5db861a56777f44378e67939c7e88 /src/usr/scom
parent86bae1c698cfef64d050b915654d9f3e03fb9ae3 (diff)
downloadtalos-hostboot-21cd4b1c1d176338fac5015c235566e3ff10ab2b.tar.gz
talos-hostboot-21cd4b1c1d176338fac5015c235566e3ff10ab2b.zip
Fill in P9-specific scom error handling
Adjusted address-specific register gathering to reflect P9 chip logic Fixed some error handling bugs in the testcases Added verbosity to error log parser Change-Id: Iad274e8333adb32deacffd3cb92e40f11c48f884 RTC: 158541 Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/37122 Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com> Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com> Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com> Reviewed-by: Daniel M. Crowell <dcrowell@us.ibm.com>
Diffstat (limited to 'src/usr/scom')
-rw-r--r--src/usr/scom/plugins/errludP_scom.H35
-rw-r--r--src/usr/scom/scom.C212
-rw-r--r--src/usr/scom/test/scomtest.H26
3 files changed, 198 insertions, 75 deletions
diff --git a/src/usr/scom/plugins/errludP_scom.H b/src/usr/scom/plugins/errludP_scom.H
index 6a819a4ec..f3a704cf6 100644
--- a/src/usr/scom/plugins/errludP_scom.H
+++ b/src/usr/scom/plugins/errludP_scom.H
@@ -5,7 +5,7 @@
/* */
/* OpenPOWER HostBoot Project */
/* */
-/* Contributors Listed Below - COPYRIGHT 2016 */
+/* Contributors Listed Below - COPYRIGHT 2016,2017 */
/* [+] International Business Machines Corp. */
/* */
/* */
@@ -73,17 +73,30 @@ namespace SCOM
ErrlUsrParser & i_parser,
void * i_pBuffer,
const uint32_t i_buflen) const
- {
- char* l_databuf = static_cast<char*>(i_pBuffer);
- i_parser.PrintHeading("SCOM PIB ERR");
+ {
+ char* l_databuf = static_cast<char*>(i_pBuffer);
+ i_parser.PrintHeading("SCOM PIB ERR");
- //***** Memory Layout *****
- // 1 bytes : Pib Error
+ //***** Memory Layout *****
+ // 1 bytes : Pib Error
- i_parser.PrintNumber("Pib Err","%.2lX",
- TO_UINT8(l_databuf));
-
- }
+ const char* l_decodeStr = "Unknown";
+ uint8_t l_piberr = TO_UINT8(l_databuf);
+ switch(l_piberr)
+ {
+ case(0): l_decodeStr = "None"; break;
+ case(1): l_decodeStr = "Resource Occupied"; break;
+ case(2): l_decodeStr = "Chiplet Offline"; break;
+ case(3): l_decodeStr = "Partial Good"; break;
+ case(4): l_decodeStr = "Invalid Address"; break;
+ case(5): l_decodeStr = "Clock Error"; break;
+ case(6): l_decodeStr = "Parity Error"; break;
+ case(7): l_decodeStr = "Timeout"; break;
+ }
+ char l_outputStr[30];
+ sprintf( l_outputStr, "%d (%s)", l_piberr, l_decodeStr );
+ i_parser.PrintString("Pib Err", l_outputStr );
+ }
// Disabled
UdParserPib(const UdParserPib&) = delete;
@@ -92,4 +105,4 @@ namespace SCOM
}
-#endif \ No newline at end of file
+#endif
diff --git a/src/usr/scom/scom.C b/src/usr/scom/scom.C
index fb766f50d..b52c33ba0 100644
--- a/src/usr/scom/scom.C
+++ b/src/usr/scom/scom.C
@@ -803,7 +803,7 @@ errlHndl_t doScomOp(DeviceFW::OperationType i_opType,
//Add some additional FFDC based on the specific operation
if( l_err )
{
- //TODO for P9 RTC 167311 addScomFailFFDC( l_err, i_target, i_addr );
+ addScomFailFFDC( l_err, i_target, i_addr );
}
return l_err;
@@ -813,104 +813,216 @@ errlHndl_t doScomOp(DeviceFW::OperationType i_opType,
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
void addScomFailFFDC( errlHndl_t i_err,
- TARGETING::Target* i_target,
+ TARGETING::Target* i_chipTarg,
uint64_t i_addr )
{
// Read some error regs from scom
- ERRORLOG::ErrlUserDetailsLogRegister l_scom_data(i_target);
+ ERRORLOG::ErrlUserDetailsLogRegister l_scom_data(i_chipTarg);
bool addit = false;
TARGETING::TYPE l_type = TARGETING::TYPE_NA;
- if( i_target == TARGETING::MASTER_PROCESSOR_CHIP_TARGET_SENTINEL )
+ uint32_t l_badChiplet = 0x00;
+
+ static bool l_insideFFDC = false;
+ if( l_insideFFDC )
+ {
+ TRACDCOMP( g_trac_scom, "Already gathering FFDC..." );
+ return;
+ }
+ l_insideFFDC = true;
+
+ if( i_chipTarg == TARGETING::MASTER_PROCESSOR_CHIP_TARGET_SENTINEL )
{
l_type = TARGETING::TYPE_PROC;
}
else
{
- l_type = i_target->getAttr<TARGETING::ATTR_TYPE>();
+ l_type = i_chipTarg->getAttr<TARGETING::ATTR_TYPE>();
}
- //PBA scoms on the processor
- if( ((i_addr & 0xFFFFF000) == 0x00064000)
+ //Multicast scoms on the processor
+ if( p9_scom_addr(i_addr).is_multicast()
&& (TARGETING::TYPE_PROC == l_type) )
{
addit = true;
- //look for hung operations on the PBA
- uint64_t ffdc_regs[] = {
- //grab the PBA buffers in case something is hung
- 0x02010850, //PBARBUFVAL0
- 0x02010851, //PBARBUFVAL1
- 0x02010852, //PBARBUFVAL2
- 0x02010858, //PBAWBUFVAL0
- 0x02010859, //PBAWBUFVAL1
+ uint64_t ffdc_regs1[] = {
+ 0x000F001E, // PCBMS.FIRST_ERR_REG
+ 0x000F001F, // PCBMS.ERROR_REG
+ };
+ for( size_t x = 0;
+ x < (sizeof(ffdc_regs1)/sizeof(ffdc_regs1[0]));
+ x++ )
+ {
+ l_scom_data.addData(DEVICE_SCOM_ADDRESS(ffdc_regs1[x]));
+ }
- 0x020F0012, //PB_GP3 (has fence information)
+ uint64_t ffdc_regs2[] = {
+ 0x000F0011, // PCBMS.REC_ERR_REG0
+ 0x000F0012, // PCBMS.REC_ERR_REG1
+ 0x000F0013, // PCBMS.REC_ERR_REG2
+ 0x000F0014, // PCBMS.REC_ERR_REG3
};
- for( size_t x = 0; x < (sizeof(ffdc_regs)/sizeof(ffdc_regs[0])); x++ )
+
+ // save off the responses to figure out which chiplet failed
+ uint8_t l_responses[(sizeof(ffdc_regs2)/sizeof(ffdc_regs2[0]))
+ *sizeof(uint64_t)];
+ uint8_t* l_respPtr = l_responses;
+
+ for( size_t x = 0;
+ x < (sizeof(ffdc_regs2)/sizeof(ffdc_regs2[0]));
+ x++ )
{
- l_scom_data.addData(DEVICE_SCOM_ADDRESS(ffdc_regs[x]));
+ // going to read these manually because we want to look at the data
+ uint64_t l_scomdata = 0;
+ size_t l_scomsize = sizeof(l_scomdata);
+ errlHndl_t l_ignored = doScomOp( DeviceFW::READ,
+ i_chipTarg,
+ &l_scomdata,
+ l_scomsize,
+ DeviceFW::SCOM,
+ ffdc_regs2[x] );
+ if( l_ignored )
+ {
+ delete l_ignored;
+ l_scomdata = 0;
+ }
+ else
+ {
+ l_scom_data.addDataBuffer( &l_scomdata,
+ l_scomsize,
+ DEVICE_SCOM_ADDRESS(ffdc_regs2[x]) );
+ }
+
+ // copy the error data into our big buffer
+ memcpy( l_respPtr, &l_scomdata, l_scomsize );
+ l_respPtr += l_scomsize; // move to the next chunk
}
- }
- //EX scoms on the processor (not including PCB slave regs)
- else if( ((i_addr & 0xF0000000) == 0x10000000)
- && ((i_addr & 0x00FF0000) != 0x000F0000)
- && (TARGETING::TYPE_PROC == l_type) )
- {
- addit = true;
- uint64_t ex_offset = 0xFF000000 & i_addr;
- //grab some data related to the PCB slave state
- uint64_t ffdc_regs[] = {
- 0x0F010B, //Special Wakeup
- 0x0F0012, //GP3
- 0x0F0100, //PowerManagement GP0
- 0x0F0106, //PFET Status Core
- 0x0F010E, //PFET Status ECO
- 0x0F0111, //PM State History
+
+ // find the bad chiplet
+ // 4-bits per chiplet : 1-bit response, 3-bit error code
+ for( size_t x = 0; x < sizeof(l_responses); x++ )
+ {
+ // look for the first non-zero pib error code
+ if( l_responses[x] & 0x70 ) //front nibble
+ {
+ l_badChiplet = x*2;
+ }
+ else if( l_responses[x] & 0x07 ) //back nibble
+ {
+ l_badChiplet = x*2 + 1;
+ }
+ }
+
+ uint64_t ffdc_regs3[] = {
+ 0x0F0001, // multicast group1
+ 0x0F0002, // multicast group2
+ 0x0F0003, // multicast group3
+ 0x0F0004, // multicast group4
};
- for( size_t x = 0; x < (sizeof(ffdc_regs)/sizeof(ffdc_regs[0])); x++ )
+ for( size_t x = 0;
+ x < (sizeof(ffdc_regs3)/sizeof(ffdc_regs3[0]));
+ x++ )
{
- l_scom_data.addData(DEVICE_SCOM_ADDRESS(ex_offset|ffdc_regs[x]));
+ p9_scom_addr l_scom(ffdc_regs3[x]);
+ l_scom.set_chiplet_id(l_badChiplet);
+ l_scom_data.addData(DEVICE_SCOM_ADDRESS(l_scom.get_addr()));
}
}
//Any non-PCB Slave and non TP reg on the processor
- if( ((i_addr & 0x00FF0000) != 0x000F0000)
- && ((i_addr & 0xFF000000) != 0x00000000)
+ if( ((i_addr & 0x00FF0000) != 0x000F0000) //PCB slave
+ && (p9_scom_addr(i_addr).get_chiplet_id() != 0x00) //TP
&& (TARGETING::TYPE_PROC == l_type) )
{
addit = true;
- uint64_t chiplet_offset = 0xFF000000 & i_addr;
+ if( l_badChiplet == 0x00 )
+ {
+ l_badChiplet = p9_scom_addr(i_addr).get_chiplet_id();
+ }
//grab some data related to the PCB slave state
uint64_t ffdc_regs[] = {
- 0x0F0012, //GP3
- 0x0F001F, //Error capture reg
+ 0x0F001F, // PCBSL<cplt>.ERROR_REG
+ 0x03000F, // CC.<chiplet>.ERROR_STATUS
+ 0x010001, // <chiplet>.PSC.PSCOM_STATUS_ERROR_REG
+ 0x010002, // <chiplet>.PSC.PSCOM_ERROR_MASK
};
for( size_t x = 0; x < (sizeof(ffdc_regs)/sizeof(ffdc_regs[0])); x++ )
{
- l_scom_data.addData( DEVICE_SCOM_ADDRESS(
- chiplet_offset|ffdc_regs[x]) );
+ p9_scom_addr l_scom(ffdc_regs[x]);
+ l_scom.set_chiplet_id(l_badChiplet);
+ l_scom_data.addData(DEVICE_SCOM_ADDRESS(l_scom.get_addr()));
}
- //grab the clock/osc regs
- l_scom_data.addData(DEVICE_SCOM_ADDRESS(0x00050019));
- l_scom_data.addData(DEVICE_SCOM_ADDRESS(0x0005001A));
+ //Osc Switch Sense 1 register
+ l_scom_data.addData(DEVICE_SCOM_ADDRESS(0x0005001D));
+ //Osc Switch Sense 2 register
+ l_scom_data.addData(DEVICE_SCOM_ADDRESS(0x0005001E));
//grab the clock regs via FSI too, just in case
- if (i_target != TARGETING::MASTER_PROCESSOR_CHIP_TARGET_SENTINEL)
+ if (i_chipTarg != TARGETING::MASTER_PROCESSOR_CHIP_TARGET_SENTINEL)
{
TARGETING::Target* mproc = NULL;
TARGETING::targetService().masterProcChipTargetHandle(mproc);
- if (i_target != mproc)
+ if (i_chipTarg != mproc)
{
- l_scom_data.addData(DEVICE_FSI_ADDRESS(0x2864));//==2819
- l_scom_data.addData(DEVICE_FSI_ADDRESS(0x2868));//==281A
+ l_scom_data.addData(DEVICE_FSI_ADDRESS(0x2874));//==281D
+ l_scom_data.addData(DEVICE_FSI_ADDRESS(0x2878));//==281E
}
}
}
+ //PBA scoms on the processor
+ if( ((i_addr & 0xFFFFF000) == 0x00068000)
+ && (TARGETING::TYPE_PROC == l_type) )
+ {
+ addit = true;
+ //look for hung operations on the PBA
+ uint64_t ffdc_regs[] = {
+ //grab the PBA buffers in case something is hung
+ 0x05012850, //PBARBUFVAL0
+ 0x05012851, //PBARBUFVAL1
+ 0x05012852, //PBARBUFVAL2
+ 0x05012853, //PBARBUFVAL3
+ 0x05012854, //PBARBUFVAL4
+ 0x05012855, //PBARBUFVAL5
+ 0x05012858, //PBAWBUFVAL0
+ 0x05012859, //PBAWBUFVAL1
+ };
+ for( size_t x = 0; x < (sizeof(ffdc_regs)/sizeof(ffdc_regs[0])); x++ )
+ {
+ l_scom_data.addData(DEVICE_SCOM_ADDRESS(ffdc_regs[x]));
+ }
+ }
+ //Core/EX/EQ scoms on the processor (not including PCB slave regs)
+ else if( (((i_addr & 0xF0000000) == 0x10000000) //CACHE
+ || ((i_addr & 0xF0000000) == 0x20000000)) //CORE
+ && ((i_addr & 0x00FF0000) != 0x000F0000) //PCB slave
+ && (TARGETING::TYPE_PROC == l_type) )
+ {
+ addit = true;
+ uint8_t l_badChiplet = p9_scom_addr(i_addr).get_chiplet_id();
+ //grab some data related to the PCB slave state
+ uint64_t ffdc_regs[] = {
+ 0x0F010A, //Special Wakeup Other
+ 0x0F010B, //Special Wakeup FSP
+ 0x0F010C, //Special Wakeup OCC
+ 0x0F010D, //Special Wakeup HYP
+ 0x0F0111, //PM State History FSP
+ };
+ for( size_t x = 0; x < (sizeof(ffdc_regs)/sizeof(ffdc_regs[0])); x++ )
+ {
+ p9_scom_addr l_scom(ffdc_regs[x]);
+ l_scom.set_chiplet_id(l_badChiplet);
+ l_scom_data.addData(DEVICE_SCOM_ADDRESS(l_scom.get_addr()));
+ }
+ }
+
+
if( addit )
{
l_scom_data.addToLog(i_err);
}
+
+ l_insideFFDC = false;
}
diff --git a/src/usr/scom/test/scomtest.H b/src/usr/scom/test/scomtest.H
index 1358a5f4a..eb712665e 100644
--- a/src/usr/scom/test/scomtest.H
+++ b/src/usr/scom/test/scomtest.H
@@ -472,16 +472,16 @@ public:
DEVICE_SCOM_ADDRESS(test_data[x].addr) );
if(!test_data[x].isFail && l_err )
{
- TRACFCOMP(g_trac_scom, "ScomTest::test_IndirectScom_proc> [%d] Write: Error from device : addr=0x%X, RC=%X", x, test_data[x].addr, l_err->reasonCode() );
- TS_FAIL( "ScomTest::test_IndirectScom_proc> ERROR : Unexpected error log from device write: addr=0x%X, RC=%X ", test_data[x].addr, l_err->reasonCode() );
+ TRACFCOMP(g_trac_scom, "ScomTest::test_IndirectScom> [%d] Write: Error from device : addr=0x%X, RC=%X", x, test_data[x].addr, l_err->reasonCode() );
+ TS_FAIL( "ScomTest::test_IndirectScom> ERROR : Unexpected error log from device write: addr=0x%X, RC=%X ", test_data[x].addr, l_err->reasonCode() );
fails++;
errlCommit(l_err,SCOM_COMP_ID);
l_err = NULL;
}
else if(test_data[x].isFail && !l_err )
{
- TRACFCOMP(g_trac_scom, "ScomTest::test_IndirectScom_proc> [%d] Write: Expected an Error from device write: addr=0x%X", x, test_data[x].addr );
- TS_FAIL( "ScomTest::test_IndirectScom_proc> ERROR : Expected an error log from device write and did not get one : addr=0x%X", test_data[x].addr );
+ TRACFCOMP(g_trac_scom, "ScomTest::test_IndirectScom> [%d] Write: Expected an Error from device write: addr=0x%X", x, test_data[x].addr );
+ TS_FAIL( "ScomTest::test_IndirectScom> ERROR : Expected an error log from device write and did not get one : addr=0x%X", test_data[x].addr );
fails++;
}
else if(l_err)
@@ -515,16 +515,16 @@ public:
if(!test_data[x].isFail && l_err )
{
- TRACFCOMP(g_trac_scom, "ScomTest::test_IndirectScomreadWrite_proc> [%d] Read: Error from device : addr=0x%X, RC=%X", x, test_data[x].addr, l_err->reasonCode() );
- TS_FAIL( "ScomTest::test_IndirectScomreadWrite_proc> ERROR : Unexpected error log from read device : addr=0x%X, RC=%X", test_data[x].addr, l_err->reasonCode() );
+ TRACFCOMP(g_trac_scom, "ScomTest::test_IndirectScomreadWrite> [%d] Read: Error from device : addr=0x%X, RC=%X", x, test_data[x].addr, l_err->reasonCode() );
+ TS_FAIL( "ScomTest::test_IndirectScomreadWrite> ERROR : Unexpected error log from read device : addr=0x%X, RC=%X", test_data[x].addr, l_err->reasonCode() );
fails++;
errlCommit(l_err,SCOM_COMP_ID);
l_err = NULL;
}
else if(test_data[x].isFail && !l_err )
{
- TRACFCOMP(g_trac_scom, "ScomTest::test_IndirectScom_proc> [%d] Read: Expected an Error from device read : addr=0x%X", x, test_data[x].addr );
- TS_FAIL( "ScomTest::test_IndirectScom_proc> ERROR : Expected an error log from device read and did not get one : addr=0x%X", test_data[x].addr );
+ TRACFCOMP(g_trac_scom, "ScomTest::test_IndirectScom> [%d] Read: Expected an Error from device read : addr=0x%X", x, test_data[x].addr );
+ TS_FAIL( "ScomTest::test_IndirectScom> ERROR : Expected an error log from device read and did not get one : addr=0x%X", test_data[x].addr );
fails++;
}
else if(!test_data[x].isFail &&
@@ -532,8 +532,8 @@ public:
(test_data[x].data & 0x000000000000FFFF))
)
{
- TRACFCOMP(g_trac_scom, "ScomTest::test_IndirectScomreadWrite_proc> [%d] Read: Data miss-match : addr=0x%X, read_data=0x%llx, write_data=0x%llx", x, test_data[x].addr, read_data[x], test_data[x].data);
- TS_FAIL( "ScomTest::test_IndirectScomreadWrite_proc> ERROR : Data miss-match between read and expected data read_data" );
+ TRACFCOMP(g_trac_scom, "ScomTest::test_IndirectScomreadWrite> [%d] Read: Data miss-match : addr=0x%X, read_data=0x%llx, write_data=0x%llx", x, test_data[x].addr, read_data[x], test_data[x].data);
+ TS_FAIL( "ScomTest::test_IndirectScomreadWrite> ERROR : Data miss-match between read and expected data read_data" );
fails++;
}
else if(l_err)
@@ -544,7 +544,7 @@ public:
}
- TRACFCOMP( g_trac_scom, "ScomTest::test_IndirectScomreadWrite_proc> %d/%d fails", fails, total );
+ TRACFCOMP( g_trac_scom, "ScomTest::test_IndirectScomreadWrite> %d/%d fails", fails, total );
}
/**
@@ -1117,8 +1117,6 @@ public:
TS_FAIL( "ScomTest::test_P9_translate> ERROR : Unexpected error log from write1" );
fails++;
errlCommit(l_err,SCOM_COMP_ID);
-
- delete l_err;
}
else if(l_err == NULL && test_data[i].expectError)
{
@@ -1139,7 +1137,7 @@ public:
else if(l_err && test_data[i].expectError)
{
delete l_err;
- TRACFCOMP(g_trac_scom, "ScomTest::test_P9_translate_scom> Previous error expected");
+ TRACFCOMP(g_trac_scom, "ScomTest::test_P9_translate_scom> Previous error expected");
}
}
else
OpenPOWER on IntegriCloud