summaryrefslogtreecommitdiffstats
path: root/src/usr/scom
diff options
context:
space:
mode:
authorDan Crowell <dcrowell@us.ibm.com>2017-03-16 14:02:36 -0500
committerMatthew A. Ploetz <maploetz@us.ibm.com>2017-03-24 11:43:40 -0400
commit8dbacd9b1a4bcf81b75edaed5807c3dd956ec46e (patch)
tree08a6adfe60e16db4dc77341a0e8a4b397e7cdb08 /src/usr/scom
parent6cfd9c7e4dcb22c4d8ce1bf2feb99328ed185781 (diff)
downloadtalos-hostboot-8dbacd9b1a4bcf81b75edaed5807c3dd956ec46e.tar.gz
talos-hostboot-8dbacd9b1a4bcf81b75edaed5807c3dd956ec46e.zip
Do not fail on Resource Occupied error indirect scom
There is a race in the indirect scom status reading where we can catch a 001 pib status that is not a real error. Note - Fix already added in P8 but forwardport got missed Change-Id: Ida6fe97fb9a0281640c6cc089482dada5dad1c33 CQ: SW384238 Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/38063 Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com> Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com> Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com> Reviewed-by: Thi N. Tran <thi@us.ibm.com> Reviewed-by: Christian R. Geddes <crgeddes@us.ibm.com> Reviewed-by: Matthew A. Ploetz <maploetz@us.ibm.com>
Diffstat (limited to 'src/usr/scom')
-rw-r--r--src/usr/scom/scom.C49
1 files changed, 38 insertions, 11 deletions
diff --git a/src/usr/scom/scom.C b/src/usr/scom/scom.C
index b52c33ba0..cca8b45d6 100644
--- a/src/usr/scom/scom.C
+++ b/src/usr/scom/scom.C
@@ -359,11 +359,10 @@ errlHndl_t doForm0IndirectScom(DeviceFW::OperationType i_opType,
// or we saw an error, then we're done
if (scomout.done || scomout.piberr)
{
- // we should never see this error code so we are most
- // likely going to fail, but since the hardware team
- // cannot explain why we get this we're going to
- // poll for awhile just in case it could work with
- // a retry
+ // there is a small chance for a race if we check the
+ // status very quickly, the hardware sets the status
+ // to 001=Resource Occupied when the command first
+ // starts so keep polling
if( scomout.piberr != PIB::PIB_RESOURCE_OCCUPIED )
{
break;
@@ -426,6 +425,8 @@ errlHndl_t doForm0IndirectScom(DeviceFW::OperationType i_opType,
//Add this target to the FFDC
ERRORLOG::ErrlUserDetailsTarget(i_target,"IndSCOM Target")
.addToLog(l_err);
+
+ l_err->collectTrace( SCOM_COMP_NAME, 256);
}
// if we got a timeout, create an errorlog.
else if( scomout.done == 0 )
@@ -459,6 +460,8 @@ errlHndl_t doForm0IndirectScom(DeviceFW::OperationType i_opType,
//Add this target to the FFDC
ERRORLOG::ErrlUserDetailsTarget(i_target,"IndSCOM Target")
.addToLog(l_err);
+
+ l_err->collectTrace( SCOM_COMP_NAME, 256);
}
else // It worked
{
@@ -508,7 +511,14 @@ errlHndl_t doForm0IndirectScom(DeviceFW::OperationType i_opType,
// or we saw an error, then we're done
if (scomout.done || scomout.piberr)
{
- break;
+ // there is a small chance for a race if we check the
+ // status very quickly, the hardware sets the status
+ // to 001=Resource Occupied when the command first
+ // starts so keep polling
+ if( scomout.piberr != PIB::PIB_RESOURCE_OCCUPIED )
+ {
+ break;
+ }
}
nanosleep( 0, 10000 ); //sleep for 10,000 ns
@@ -541,15 +551,30 @@ errlHndl_t doForm0IndirectScom(DeviceFW::OperationType i_opType,
i_addr,
scomout.data64);
- //Add the callouts for the specific PCB/PIB error
- PIB::addFruCallouts( i_target,
- scomout.piberr,
- i_addr,
- l_err );
+ // we should never hit this so if we do we are going
+ // to blame hardware
+ if( scomout.piberr == PIB::PIB_RESOURCE_OCCUPIED )
+ {
+ SCOM::UdPibInfo(scomout.piberr).addToLog(l_err);
+ l_err->addHwCallout( i_target,
+ HWAS::SRCI_PRIORITY_HIGH,
+ HWAS::NO_DECONFIG,
+ HWAS::GARD_NULL );
+ }
+ else
+ {
+ //Add the callouts for the specific PCB/PIB error
+ PIB::addFruCallouts( i_target,
+ scomout.piberr,
+ i_addr,
+ l_err );
+ }
//Add this target to the FFDC
ERRORLOG::ErrlUserDetailsTarget(i_target,"IndSCOM Target")
.addToLog(l_err);
+
+ l_err->collectTrace( SCOM_COMP_NAME, 256);
}
// if we got a timeout, create an errorlog.
else if( scomout.done == 0 )
@@ -583,6 +608,8 @@ errlHndl_t doForm0IndirectScom(DeviceFW::OperationType i_opType,
//Add this target to the FFDC
ERRORLOG::ErrlUserDetailsTarget(i_target,"IndSCOM Target")
.addToLog(l_err);
+
+ l_err->collectTrace( SCOM_COMP_NAME, 256);
}
} // end of write
} while(0);
OpenPOWER on IntegriCloud