summaryrefslogtreecommitdiffstats
path: root/src/usr/diag
diff options
context:
space:
mode:
authorZane Shelley <zshelle@us.ibm.com>2018-08-02 11:24:25 -0500
committerZane C. Shelley <zshelle@us.ibm.com>2018-08-06 10:03:37 -0500
commitd37ee6f5a97b469eec786d79df3612803f87b225 (patch)
treec1e997363dbbd3da4c758ac94c39f992a83eec40 /src/usr/diag
parentc17bbad98d89aa354ac52511751c1327e11603bd (diff)
downloadtalos-hostboot-d37ee6f5a97b469eec786d79df3612803f87b225.tar.gz
talos-hostboot-d37ee6f5a97b469eec786d79df3612803f87b225.zip
PRD: getScom() retry for HBRT channel failures
Change-Id: If643b696fc834685da1fc8124b02d90507d5de89 CQ: SW439917 Reviewed-on: http://rchgit01.rchland.ibm.com/gerrit1/63795 Reviewed-by: Daniel M. Crowell <dcrowell@us.ibm.com> Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com> Reviewed-by: Benjamin J. Weisenbeck <bweisenb@us.ibm.com> Reviewed-by: Caleb N. Palmer <cnpalmer@us.ibm.com> Reviewed-by: Brian J. Stegmiller <bjs@us.ibm.com> Reviewed-by: Zane C. Shelley <zshelle@us.ibm.com> Reviewed-on: http://rchgit01.rchland.ibm.com/gerrit1/63885 Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com> Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com> Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Diffstat (limited to 'src/usr/diag')
-rw-r--r--src/usr/diag/prdf/plat/prdfPlatServices.C56
1 files changed, 43 insertions, 13 deletions
diff --git a/src/usr/diag/prdf/plat/prdfPlatServices.C b/src/usr/diag/prdf/plat/prdfPlatServices.C
index c4ddc4a07..ad742e30d 100644
--- a/src/usr/diag/prdf/plat/prdfPlatServices.C
+++ b/src/usr/diag/prdf/plat/prdfPlatServices.C
@@ -51,6 +51,7 @@
#include <devicefw/userif.H>
#include <iipMopRegisterAccess.h>
#include <ibscomreasoncodes.H>
+#include <scom/scomreasoncodes.H>
#include <p9_proc_gettracearray.H>
#include <fapi2_spd_access.H>
#include <p9c_mss_maint_cmds.H>
@@ -99,27 +100,56 @@ bool isSpConfigFsp()
uint32_t getScom(TARGETING::TargetHandle_t i_target, BitString& io_bs,
uint64_t i_address)
{
- errlHndl_t errl = NULL;
+ errlHndl_t errl = nullptr;
uint32_t rc = SUCCESS;
size_t bsize = (io_bs.getBitLen()+7)/8;
CPU_WORD* buffer = io_bs.getBufAddr();
errl = deviceRead(i_target, buffer, bsize, DEVICE_SCOM_ADDRESS(i_address));
- if(( NULL != errl ) && ( IBSCOM::IBSCOM_BUS_FAILURE == errl->reasonCode() ))
+ if ( nullptr != errl )
{
- PRDF_SET_ERRL_SEV(errl, ERRL_SEV_INFORMATIONAL);
- PRDF_COMMIT_ERRL(errl, ERRL_ACTION_HIDDEN);
- PRDF_INF( "Register access failed with reason code IBSCOM_BUS_FAILURE."
- " Trying again, Target HUID:0x%08X Register 0x%016X Op:%u",
- PlatServices::getHuid( i_target), i_address,
- MopRegisterAccess::READ );
-
- errl = deviceRead(i_target, buffer, bsize,
- DEVICE_SCOM_ADDRESS(i_address));
+ bool doRetry = false;
+
+ #ifdef __HOSTBOOT_RUNTIME
+
+ // We don't have a good mechanism at this time to determine if the SCOM
+ // failed because of a channel failure. So we will just assume any SCOM
+ // error on the Centaur means there is a channel failure and that we
+ // will need to retry.
+ if ( SCOM::SCOM_RUNTIME_HYP_ERR == errl->reasonCode() &&
+ ( (TYPE_MEMBUF == getTargetType(i_target)) ||
+ (TYPE_MBA == getTargetType(i_target)) ) )
+ {
+ doRetry = true;
+ }
+
+ #else
+
+ // An inband SCOM failure likely means the memory channel has failed.
+ // Hostboot will have switched over to FSI SCOMs. So retry.
+ if ( IBSCOM::IBSCOM_BUS_FAILURE == errl->reasonCode() )
+ {
+ doRetry = true;
+ }
+
+ #endif
+
+ if ( doRetry )
+ {
+ PRDF_INF( "deviceRead(0x%08x,0x%016x) failed with reason code "
+ "0x%04x, retrying...", PlatServices::getHuid(i_target),
+ i_address, errl->reasonCode() );
+
+ PRDF_SET_ERRL_SEV( errl, ERRL_SEV_INFORMATIONAL );
+ PRDF_COMMIT_ERRL( errl, ERRL_ACTION_HIDDEN );
+
+ errl = deviceRead( i_target, buffer, bsize,
+ DEVICE_SCOM_ADDRESS(i_address) );
+ }
}
- if( NULL != errl )
+ if ( nullptr != errl )
{
PRDF_ERR( "getScom() failed on i_target=0x%08x i_address=0x%016llx",
getHuid(i_target), i_address );
@@ -137,7 +167,7 @@ uint32_t getScom(TARGETING::TargetHandle_t i_target, BitString& io_bs,
else
{
delete errl;
- errl = NULL;
+ errl = nullptr;
}
}
OpenPOWER on IntegriCloud