diff options
author | Zane Shelley <zshelle@us.ibm.com> | 2017-10-21 13:49:55 -0500 |
---|---|---|
committer | Martha Broyles <mbroyles@us.ibm.com> | 2017-10-24 09:23:00 -0400 |
commit | 20879cb32111afd06cc7db6ea1fd0aaf3ef32ee6 (patch) | |
tree | ebb1694ca2d874fc8ec4a46322cffb41c518ffb1 /src/occ_gpe0 | |
parent | 6404302e1b1eae001edcec4a5c8e4c4ce5cdaaa3 (diff) | |
download | talos-occ-20879cb32111afd06cc7db6ea1fd0aaf3ef32ee6.tar.gz talos-occ-20879cb32111afd06cc7db6ea1fd0aaf3ef32ee6.zip |
FIRDATA: isolation issues due to SCOM errors from 0x50040018
The first change is to reset the SBE FIFO after SCOM failures on
slave processors. This ensures the FIFO is realigned for subsequent
SCOMs.
The second change is to ignore chiplet offline SCOM errors from
0x50040018 on master processors. These are expected if a core is
asleep at the time of the checkstop. The data returned from the
SCOM will still show active attentions from chiplets that are online.
This commit does not fix chiplet offline SCOM errors from 0x50040018
on slave processors. That will come later in another commit.
Change-Id: Ie60329efed169c9f4afa94154fd86fe466384609
CQ: SW404945
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/48666
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Reviewed-by: Prachi Gupta <pragupta@us.ibm.com>
Reviewed-by: William G. Hoffa <wghoffa@us.ibm.com>
Reviewed-by: Daniel M. Crowell <dcrowell@us.ibm.com>
Reviewed-by: William A. Bryan <wilbryan@us.ibm.com>
Reviewed-by: Brian J. Stegmiller <bjs@us.ibm.com>
Reviewed-by: Martha Broyles <mbroyles@us.ibm.com>
Diffstat (limited to 'src/occ_gpe0')
-rw-r--r-- | src/occ_gpe0/firdata/firData.c | 16 | ||||
-rw-r--r-- | src/occ_gpe0/firdata/sbe_fifo.c | 14 | ||||
-rw-r--r-- | src/occ_gpe0/firdata/scom_util.c | 12 |
3 files changed, 33 insertions, 9 deletions
diff --git a/src/occ_gpe0/firdata/firData.c b/src/occ_gpe0/firdata/firData.c index 6906b2e..78468ce 100644 --- a/src/occ_gpe0/firdata/firData.c +++ b/src/occ_gpe0/firdata/firData.c @@ -83,7 +83,7 @@ typedef struct /* Uncomment for additional debug traces */ -#if 0 +#if 0 #define DEBUG_PRD_CHKSTOP_ANALYSIS #endif @@ -152,10 +152,10 @@ bool FirData_addRegToPnor( FirData_t * io_fd, PNOR_Trgt_t * io_pTrgt, rc = SCOM_getScom( i_sTrgt, i_addr, &(reg.val) ); if ( SUCCESS != rc ) { - TRAC_ERR( "[FirData_addRegToPnor] t=%d p=%d u=%d rc=%d ", - i_sTrgt.type, i_sTrgt.procPos, i_sTrgt.procUnitPos, rc); - TRAC_ERR( "addr=0x%08x val=0x%08x%08x",i_addr, - (uint32_t)(reg.val >> 32), (uint32_t)reg.val ); + TRAC_ERR( "SCOM ERROR: rc=%d t=%u p=%u u=%u", + rc, i_sTrgt.type, i_sTrgt.procPos, i_sTrgt.procUnitPos ); + TRAC_ERR( " addr=0x%08x val=0x%08x%08x", + i_addr, (uint32_t)(reg.val >> 32), (uint32_t)reg.val ); if ( io_pTrgt->scomErrs < PNOR_Trgt_MAX_SCOM_ERRORS ) io_pTrgt->scomErrs++; @@ -175,7 +175,7 @@ bool FirData_addRegToPnor( FirData_t * io_fd, PNOR_Trgt_t * io_pTrgt, TRAC_IMP("addRegToPnor: got scom value, addr=0x%08X value=0x%08X %08X", i_addr, (uint32_t)(reg.val>>32), reg.val); #endif - + full = FirData_addDataToPnor( io_fd, ®, sizeof(reg) ); if ( full ) break; @@ -768,7 +768,7 @@ bool FirData_addTrgtToPnor( FirData_t * io_fd, SCOM_Trgt_t i_sTrgt, /* had some issue with local var return in diff function so take out */ /* init to zero */ PNOR_Trgt_t tmp_pTrgt; memset( &tmp_pTrgt, 0x00, sizeof(tmp_pTrgt) ); - + tmp_pTrgt.trgtType = i_sTrgt.type; tmp_pTrgt.chipPos = i_sTrgt.procPos, tmp_pTrgt.unitPos = i_sTrgt.procUnitPos; @@ -947,7 +947,7 @@ void FirData_addTrgtsToPnor( FirData_t * io_fd ) TRAC_IMP(" Masks XBUS:%X OBUS:%X ", l_existBits.xbusMask, l_existBits.obusMask); TRAC_IMP(" Masks EC:%X EQ:%X EX:%X", - l_existBits.ecMask, l_existBits.eqMask, l_existBits.exMask); + l_existBits.ecMask, l_existBits.eqMask, l_existBits.exMask); TRAC_IMP(" Masks CAPP:%X PEC:%X PHB:%X",l_existBits.cappMask, l_existBits.pecMask, l_existBits.phbMask ); TRAC_IMP(" Masks MCBIST:%X MCS:%X MCA:%X", diff --git a/src/occ_gpe0/firdata/sbe_fifo.c b/src/occ_gpe0/firdata/sbe_fifo.c index 1cddde5..aac8b15 100644 --- a/src/occ_gpe0/firdata/sbe_fifo.c +++ b/src/occ_gpe0/firdata/sbe_fifo.c @@ -386,6 +386,13 @@ int32_t putFifoScom(SCOM_Trgt_t* i_target, uint64_t i_addr, uint64_t i_data) (uint32_t*)&l_fifoResponse, sizeof(struct fifoPutScomResponse)); + if ( l_rc != SUCCESS ) + { + // Reset the FIFO for subsequent SCOMs + uint32_t l_data = 0xDEAD; + putfsi( i_target, 0x2450, l_data ); + } + return l_rc; } @@ -413,6 +420,13 @@ int32_t getFifoScom(SCOM_Trgt_t* i_target, uint64_t i_addr, uint64_t* o_data) (uint32_t*)&l_fifoResponse, sizeof(struct fifoGetScomResponse)); + if ( l_rc != SUCCESS ) + { + // Reset the FIFO for subsequent SCOMs + uint32_t l_data = 0xDEAD; + putfsi( i_target, 0x2450, l_data ); + } + //Always return data even if there is an error *o_data = l_fifoResponse.data; diff --git a/src/occ_gpe0/firdata/scom_util.c b/src/occ_gpe0/firdata/scom_util.c index 3f14acb..ed806cb 100644 --- a/src/occ_gpe0/firdata/scom_util.c +++ b/src/occ_gpe0/firdata/scom_util.c @@ -361,11 +361,21 @@ int32_t SCOM_getScom( SCOM_Trgt_t i_trgt, uint32_t i_addr, uint64_t * o_val ) //Use SBE FIFO if it's a slave proc if(!chip_targ.isMaster) { - return getFifoScom(&chip_targ, trans_addr, o_val); + rc = getFifoScom(&chip_targ, trans_addr, o_val); } else { rc = getscom_abs(trans_addr, o_val); + + // Add exception for chiplet offline errors on the UNIT_CS or + // HOST_ATTN broadcast registers. The value returned will still be + // valid. Even though one or more of the chiplets may have been + // offline. + if ( 2 == rc && + (0x50040018 == i_addr || 0x50040009 == i_addr) ) + { + rc = SUCCESS; + } } } |