diff options
author | Dan Crowell <dcrowell@us.ibm.com> | 2017-03-16 14:02:36 -0500 |
---|---|---|
committer | Matthew A. Ploetz <maploetz@us.ibm.com> | 2017-03-24 11:43:40 -0400 |
commit | 8dbacd9b1a4bcf81b75edaed5807c3dd956ec46e (patch) | |
tree | 08a6adfe60e16db4dc77341a0e8a4b397e7cdb08 /src/usr/scom | |
parent | 6cfd9c7e4dcb22c4d8ce1bf2feb99328ed185781 (diff) | |
download | talos-hostboot-8dbacd9b1a4bcf81b75edaed5807c3dd956ec46e.tar.gz talos-hostboot-8dbacd9b1a4bcf81b75edaed5807c3dd956ec46e.zip |
Do not fail on Resource Occupied error indirect scom
There is a race in the indirect scom status reading where
we can catch a 001 pib status that is not a real error.
Note - Fix already added in P8 but forwardport got missed
Change-Id: Ida6fe97fb9a0281640c6cc089482dada5dad1c33
CQ: SW384238
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/38063
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Reviewed-by: Thi N. Tran <thi@us.ibm.com>
Reviewed-by: Christian R. Geddes <crgeddes@us.ibm.com>
Reviewed-by: Matthew A. Ploetz <maploetz@us.ibm.com>
Diffstat (limited to 'src/usr/scom')
-rw-r--r-- | src/usr/scom/scom.C | 49 |
1 files changed, 38 insertions, 11 deletions
diff --git a/src/usr/scom/scom.C b/src/usr/scom/scom.C index b52c33ba0..cca8b45d6 100644 --- a/src/usr/scom/scom.C +++ b/src/usr/scom/scom.C @@ -359,11 +359,10 @@ errlHndl_t doForm0IndirectScom(DeviceFW::OperationType i_opType, // or we saw an error, then we're done if (scomout.done || scomout.piberr) { - // we should never see this error code so we are most - // likely going to fail, but since the hardware team - // cannot explain why we get this we're going to - // poll for awhile just in case it could work with - // a retry + // there is a small chance for a race if we check the + // status very quickly, the hardware sets the status + // to 001=Resource Occupied when the command first + // starts so keep polling if( scomout.piberr != PIB::PIB_RESOURCE_OCCUPIED ) { break; @@ -426,6 +425,8 @@ errlHndl_t doForm0IndirectScom(DeviceFW::OperationType i_opType, //Add this target to the FFDC ERRORLOG::ErrlUserDetailsTarget(i_target,"IndSCOM Target") .addToLog(l_err); + + l_err->collectTrace( SCOM_COMP_NAME, 256); } // if we got a timeout, create an errorlog. else if( scomout.done == 0 ) @@ -459,6 +460,8 @@ errlHndl_t doForm0IndirectScom(DeviceFW::OperationType i_opType, //Add this target to the FFDC ERRORLOG::ErrlUserDetailsTarget(i_target,"IndSCOM Target") .addToLog(l_err); + + l_err->collectTrace( SCOM_COMP_NAME, 256); } else // It worked { @@ -508,7 +511,14 @@ errlHndl_t doForm0IndirectScom(DeviceFW::OperationType i_opType, // or we saw an error, then we're done if (scomout.done || scomout.piberr) { - break; + // there is a small chance for a race if we check the + // status very quickly, the hardware sets the status + // to 001=Resource Occupied when the command first + // starts so keep polling + if( scomout.piberr != PIB::PIB_RESOURCE_OCCUPIED ) + { + break; + } } nanosleep( 0, 10000 ); //sleep for 10,000 ns @@ -541,15 +551,30 @@ errlHndl_t doForm0IndirectScom(DeviceFW::OperationType i_opType, i_addr, scomout.data64); - //Add the callouts for the specific PCB/PIB error - PIB::addFruCallouts( i_target, - scomout.piberr, - i_addr, - l_err ); + // we should never hit this so if we do we are going + // to blame hardware + if( scomout.piberr == PIB::PIB_RESOURCE_OCCUPIED ) + { + SCOM::UdPibInfo(scomout.piberr).addToLog(l_err); + l_err->addHwCallout( i_target, + HWAS::SRCI_PRIORITY_HIGH, + HWAS::NO_DECONFIG, + HWAS::GARD_NULL ); + } + else + { + //Add the callouts for the specific PCB/PIB error + PIB::addFruCallouts( i_target, + scomout.piberr, + i_addr, + l_err ); + } //Add this target to the FFDC ERRORLOG::ErrlUserDetailsTarget(i_target,"IndSCOM Target") .addToLog(l_err); + + l_err->collectTrace( SCOM_COMP_NAME, 256); } // if we got a timeout, create an errorlog. else if( scomout.done == 0 ) @@ -583,6 +608,8 @@ errlHndl_t doForm0IndirectScom(DeviceFW::OperationType i_opType, //Add this target to the FFDC ERRORLOG::ErrlUserDetailsTarget(i_target,"IndSCOM Target") .addToLog(l_err); + + l_err->collectTrace( SCOM_COMP_NAME, 256); } } // end of write } while(0); |