summaryrefslogtreecommitdiffstats
path: root/src/occ_gpe0
diff options
context:
space:
mode:
authorZane Shelley <zshelle@us.ibm.com>2017-10-21 13:49:55 -0500
committerMartha Broyles <mbroyles@us.ibm.com>2017-10-24 09:23:00 -0400
commit20879cb32111afd06cc7db6ea1fd0aaf3ef32ee6 (patch)
treeebb1694ca2d874fc8ec4a46322cffb41c518ffb1 /src/occ_gpe0
parent6404302e1b1eae001edcec4a5c8e4c4ce5cdaaa3 (diff)
downloadtalos-occ-20879cb32111afd06cc7db6ea1fd0aaf3ef32ee6.tar.gz
talos-occ-20879cb32111afd06cc7db6ea1fd0aaf3ef32ee6.zip
FIRDATA: isolation issues due to SCOM errors from 0x50040018
The first change is to reset the SBE FIFO after SCOM failures on slave processors. This ensures the FIFO is realigned for subsequent SCOMs. The second change is to ignore chiplet offline SCOM errors from 0x50040018 on master processors. These are expected if a core is asleep at the time of the checkstop. The data returned from the SCOM will still show active attentions from chiplets that are online. This commit does not fix chiplet offline SCOM errors from 0x50040018 on slave processors. That will come later in another commit. Change-Id: Ie60329efed169c9f4afa94154fd86fe466384609 CQ: SW404945 Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/48666 Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com> Reviewed-by: Prachi Gupta <pragupta@us.ibm.com> Reviewed-by: William G. Hoffa <wghoffa@us.ibm.com> Reviewed-by: Daniel M. Crowell <dcrowell@us.ibm.com> Reviewed-by: William A. Bryan <wilbryan@us.ibm.com> Reviewed-by: Brian J. Stegmiller <bjs@us.ibm.com> Reviewed-by: Martha Broyles <mbroyles@us.ibm.com>
Diffstat (limited to 'src/occ_gpe0')
-rw-r--r--src/occ_gpe0/firdata/firData.c16
-rw-r--r--src/occ_gpe0/firdata/sbe_fifo.c14
-rw-r--r--src/occ_gpe0/firdata/scom_util.c12
3 files changed, 33 insertions, 9 deletions
diff --git a/src/occ_gpe0/firdata/firData.c b/src/occ_gpe0/firdata/firData.c
index 6906b2e..78468ce 100644
--- a/src/occ_gpe0/firdata/firData.c
+++ b/src/occ_gpe0/firdata/firData.c
@@ -83,7 +83,7 @@ typedef struct
/* Uncomment for additional debug traces */
-#if 0
+#if 0
#define DEBUG_PRD_CHKSTOP_ANALYSIS
#endif
@@ -152,10 +152,10 @@ bool FirData_addRegToPnor( FirData_t * io_fd, PNOR_Trgt_t * io_pTrgt,
rc = SCOM_getScom( i_sTrgt, i_addr, &(reg.val) );
if ( SUCCESS != rc )
{
- TRAC_ERR( "[FirData_addRegToPnor] t=%d p=%d u=%d rc=%d ",
- i_sTrgt.type, i_sTrgt.procPos, i_sTrgt.procUnitPos, rc);
- TRAC_ERR( "addr=0x%08x val=0x%08x%08x",i_addr,
- (uint32_t)(reg.val >> 32), (uint32_t)reg.val );
+ TRAC_ERR( "SCOM ERROR: rc=%d t=%u p=%u u=%u",
+ rc, i_sTrgt.type, i_sTrgt.procPos, i_sTrgt.procUnitPos );
+ TRAC_ERR( " addr=0x%08x val=0x%08x%08x",
+ i_addr, (uint32_t)(reg.val >> 32), (uint32_t)reg.val );
if ( io_pTrgt->scomErrs < PNOR_Trgt_MAX_SCOM_ERRORS )
io_pTrgt->scomErrs++;
@@ -175,7 +175,7 @@ bool FirData_addRegToPnor( FirData_t * io_fd, PNOR_Trgt_t * io_pTrgt,
TRAC_IMP("addRegToPnor: got scom value, addr=0x%08X value=0x%08X %08X",
i_addr, (uint32_t)(reg.val>>32), reg.val);
#endif
-
+
full = FirData_addDataToPnor( io_fd, &reg, sizeof(reg) );
if ( full ) break;
@@ -768,7 +768,7 @@ bool FirData_addTrgtToPnor( FirData_t * io_fd, SCOM_Trgt_t i_sTrgt,
/* had some issue with local var return in diff function so take out */
/* init to zero */
PNOR_Trgt_t tmp_pTrgt; memset( &tmp_pTrgt, 0x00, sizeof(tmp_pTrgt) );
-
+
tmp_pTrgt.trgtType = i_sTrgt.type;
tmp_pTrgt.chipPos = i_sTrgt.procPos,
tmp_pTrgt.unitPos = i_sTrgt.procUnitPos;
@@ -947,7 +947,7 @@ void FirData_addTrgtsToPnor( FirData_t * io_fd )
TRAC_IMP(" Masks XBUS:%X OBUS:%X ",
l_existBits.xbusMask, l_existBits.obusMask);
TRAC_IMP(" Masks EC:%X EQ:%X EX:%X",
- l_existBits.ecMask, l_existBits.eqMask, l_existBits.exMask);
+ l_existBits.ecMask, l_existBits.eqMask, l_existBits.exMask);
TRAC_IMP(" Masks CAPP:%X PEC:%X PHB:%X",l_existBits.cappMask,
l_existBits.pecMask, l_existBits.phbMask );
TRAC_IMP(" Masks MCBIST:%X MCS:%X MCA:%X",
diff --git a/src/occ_gpe0/firdata/sbe_fifo.c b/src/occ_gpe0/firdata/sbe_fifo.c
index 1cddde5..aac8b15 100644
--- a/src/occ_gpe0/firdata/sbe_fifo.c
+++ b/src/occ_gpe0/firdata/sbe_fifo.c
@@ -386,6 +386,13 @@ int32_t putFifoScom(SCOM_Trgt_t* i_target, uint64_t i_addr, uint64_t i_data)
(uint32_t*)&l_fifoResponse,
sizeof(struct fifoPutScomResponse));
+ if ( l_rc != SUCCESS )
+ {
+ // Reset the FIFO for subsequent SCOMs
+ uint32_t l_data = 0xDEAD;
+ putfsi( i_target, 0x2450, l_data );
+ }
+
return l_rc;
}
@@ -413,6 +420,13 @@ int32_t getFifoScom(SCOM_Trgt_t* i_target, uint64_t i_addr, uint64_t* o_data)
(uint32_t*)&l_fifoResponse,
sizeof(struct fifoGetScomResponse));
+ if ( l_rc != SUCCESS )
+ {
+ // Reset the FIFO for subsequent SCOMs
+ uint32_t l_data = 0xDEAD;
+ putfsi( i_target, 0x2450, l_data );
+ }
+
//Always return data even if there is an error
*o_data = l_fifoResponse.data;
diff --git a/src/occ_gpe0/firdata/scom_util.c b/src/occ_gpe0/firdata/scom_util.c
index 3f14acb..ed806cb 100644
--- a/src/occ_gpe0/firdata/scom_util.c
+++ b/src/occ_gpe0/firdata/scom_util.c
@@ -361,11 +361,21 @@ int32_t SCOM_getScom( SCOM_Trgt_t i_trgt, uint32_t i_addr, uint64_t * o_val )
//Use SBE FIFO if it's a slave proc
if(!chip_targ.isMaster)
{
- return getFifoScom(&chip_targ, trans_addr, o_val);
+ rc = getFifoScom(&chip_targ, trans_addr, o_val);
}
else
{
rc = getscom_abs(trans_addr, o_val);
+
+ // Add exception for chiplet offline errors on the UNIT_CS or
+ // HOST_ATTN broadcast registers. The value returned will still be
+ // valid. Even though one or more of the chiplets may have been
+ // offline.
+ if ( 2 == rc &&
+ (0x50040018 == i_addr || 0x50040009 == i_addr) )
+ {
+ rc = SUCCESS;
+ }
}
}
OpenPOWER on IntegriCloud