summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristian Geddes <crgeddes@us.ibm.com>2018-04-05 13:05:31 -0500
committerDaniel M. Crowell <dcrowell@us.ibm.com>2018-04-10 13:46:19 -0400
commit7ee5536ef2a1302676976133c905a3a4c8812c9a (patch)
treeaa2160cc4a467b8420028696e23c4f829e5e93d0
parent885dde53dea48b51d1bdd64774000391d2d962fc (diff)
downloadtalos-hostboot-7ee5536ef2a1302676976133c905a3a4c8812c9a.tar.gz
talos-hostboot-7ee5536ef2a1302676976133c905a3a4c8812c9a.zip
Enable OpenPower SBE Vital Attention handler
If PRD notices that a sbe vital attention is set (TP_LFIR 26) then it will call hostboot code to attempt to recover the SBE. If this occurs during IPL time then hostboot will not be able to recover the SBE and we will deconfigure the processor. If this occurs during runtime HBRT will attempt to run the retry_handler. This will result in us calling hreset on the SBE that failed. If we were able to recover the SBE then no error will be returned. If we are unable to recover the SBE then we will return an error with a deconfig record. Change-Id: I3da6ec932ef8e59f7b2a184621a47e88d465e0c5 RTC: 167191 Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/56821 CI-Ready: Daniel M. Crowell <dcrowell@us.ibm.com> Reviewed-by: Martin Gloff <mgloff@us.ibm.com> Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com> Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com> Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com> Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com> Reviewed-by: Roland Veloz <rveloz@us.ibm.com> Reviewed-by: Daniel M. Crowell <dcrowell@us.ibm.com>
-rw-r--r--src/include/usr/sbeio/sbe_retry_handler.H20
-rw-r--r--src/include/usr/sbeio/sbeioreasoncodes.H2
-rw-r--r--src/usr/sbeio/common/sbe_attn.C102
-rw-r--r--src/usr/sbeio/common/sbe_retry_handler.C44
-rw-r--r--src/usr/sbeio/runtime/sbeio_vital_attn.C10
-rw-r--r--src/usr/sbeio/sbe_fifodd.C2
-rw-r--r--src/usr/sbeio/sbe_psudd.C2
7 files changed, 133 insertions, 49 deletions
diff --git a/src/include/usr/sbeio/sbe_retry_handler.H b/src/include/usr/sbeio/sbe_retry_handler.H
index ae014d6c5..deef15c32 100644
--- a/src/include/usr/sbeio/sbe_retry_handler.H
+++ b/src/include/usr/sbeio/sbe_retry_handler.H
@@ -36,26 +36,6 @@ namespace SBEIO
class SbeRetryHandler
{
public:
-
- //There are only 2 sides to the seeproms, so we only want to flip sides once
- static constexpr uint8_t MAX_SWITCH_SIDE_COUNT = 1;
- //We only want to attempt to boot with the same side seeprom twice
- static constexpr uint8_t MAX_SIDE_BOOT_ATTEMPTS = 2;
- // Currently we expect a maxiumum of 2 FFDC packets, the one
- // that is useful to HB is the HWP FFDC. It is possible there is
- // a packet that details an internal sbe fail that hostboot will
- // add to an errorlog but otherwise ignores
- static constexpr uint8_t MAX_EXPECTED_FFDC_PACKAGES = 2;
- // action_for_ffdc_rc will figure out what action we should do
- // for each p9_extract_sbe_rc return code. If the RC does not match
- // any return code from p9_extract_sbe_rc then we want to have a
- // known "no action found" value which is defined here
- static constexpr uint32_t NO_ACTION_FOUND_FOR_THIS_RC = 0xFFFF;
-
- static constexpr uint64_t SBE_RETRY_TIMEOUT_HW = 60*NS_PER_SEC; // 60 seconds
- static constexpr uint64_t SBE_RETRY_TIMEOUT_SIMICS = 600*NS_PER_SEC; // 600 seconds
- static constexpr uint32_t SBE_RETRY_NUM_LOOPS = 100;
-
enum SBE_REG_RETURN
{
FAILED_COLLECTING_REG = 0, // Error returned from HWP
diff --git a/src/include/usr/sbeio/sbeioreasoncodes.H b/src/include/usr/sbeio/sbeioreasoncodes.H
index cc93d9004..25822912d 100644
--- a/src/include/usr/sbeio/sbeioreasoncodes.H
+++ b/src/include/usr/sbeio/sbeioreasoncodes.H
@@ -56,7 +56,7 @@ enum sbeioModuleId
SBEIO_HANDLE_SBE_REG_VALUE = 0x0B,
SBEIO_GET_FFDC_HANDLER = 0x0C,
SBEIO_GET_SBE_RC = 0x0D,
- SBEIO_RUNTIME_HANDLE_VITAL_ATTN = 0x0E,
+ SBEIO_HANDLE_VITAL_ATTN = 0x0E,
};
/**
diff --git a/src/usr/sbeio/common/sbe_attn.C b/src/usr/sbeio/common/sbe_attn.C
index 5c151b4eb..2fb0cf1da 100644
--- a/src/usr/sbeio/common/sbe_attn.C
+++ b/src/usr/sbeio/common/sbe_attn.C
@@ -30,6 +30,7 @@
#include <errl/errlentry.H>
#include <errl/errlmanager.H>
#include <errl/errludtarget.H>
+#include <errl/errludcallout.H>
#include <sbeio/sbe_attn.H>
#include <fapi2/target.H>
#include <fapi2/plat_hwp_invoker.H>
@@ -53,44 +54,117 @@ namespace SBEIO
TRACFCOMP( g_trac_sbeio,
ENTER_MRK "handleVitalAttn> i_procTarg=",
TARGETING::get_huid(i_procTarg) );
- errlHndl_t l_errhdl = nullptr;
+ errlHndl_t l_err = nullptr;
- // TODO 167191 Full SBE Belly-Up Handling for OP
#ifdef __HOSTBOOT_RUNTIME
// Inform OPAL, SBE is currently disabled
if (TARGETING::is_sapphire_load())
{
// Inform OPAL of the inoperable SBE
- l_errhdl = RT_SBEIO::vital_attn_inform_opal(i_procTarg,
+ l_err = RT_SBEIO::vital_attn_inform_opal(i_procTarg,
RT_SBEIO::SBE_DISABLED);
- }
-#endif
- // @todo - RTC:180242 - Restart SBE
+ if(l_err)
+ {
+ errlCommit(l_err, SBEIO_COMP_ID);
+ }
+ }
SbeRetryHandler l_sbeObj = SbeRetryHandler(
SbeRetryHandler::SBE_MODE_OF_OPERATION::ATTEMPT_REBOOT);
- //l_sbeObj.main_sbe_handler(i_procTarg);
+ l_sbeObj.main_sbe_handler(i_procTarg);
+ // Check if the SBE made it back to runtime, this tells us if the retry was a
+ // success or not
+ if (!l_sbeObj.isSbeAtRuntime())
+ {
+ /*@
+ * @errortype ERRL_SEV_PREDICTIVE
+ * @moduleid SBEIO_HANDLE_VITAL_ATTN
+ * @reasoncode SBEIO_NO_RECOVERY_ACTION
+ * @userdata1 Huid of processor
+ * @userdata2 Unused
+ * @devdesc PRD detected an error with the SBE and HB failed to
+ * recover
+ * @custdesc Processor Error
+ */
+ l_err = new ERRORLOG::ErrlEntry(
+ ERRORLOG::ERRL_SEV_PREDICTIVE,
+ SBEIO_HANDLE_VITAL_ATTN,
+ SBEIO_NO_RECOVERY_ACTION,
+ TARGETING::get_huid(i_procTarg),
+ 0);
-#ifdef __HOSTBOOT_RUNTIME
- // Inform OPAL the state of the SBE after a retry
- if (l_sbeObj.isSbeAtRuntime())
+ l_err->addProcedureCallout( HWAS::EPUB_PRC_SUE_PREVERROR,
+ HWAS::SRCI_PRIORITY_HIGH);
+
+ l_err->addHwCallout( i_procTarg,
+ HWAS::SRCI_PRIORITY_LOW,
+ HWAS::NO_DECONFIG,
+ HWAS::GARD_NULL );
+
+ l_err->collectTrace( SBEIO_COMP_NAME, 256);
+ // @todo - RTC:180244 - Disable the OCC
+ }
+ // Inform OPAL the state of the SBE after a retry is successful
+ else
{
if (TARGETING::is_sapphire_load())
{
- l_errhdl = RT_SBEIO::vital_attn_inform_opal(i_procTarg,
+ l_err = RT_SBEIO::vital_attn_inform_opal(i_procTarg,
RT_SBEIO::SBE_ENABLED);
+ if(l_err)
+ {
+ errlCommit(l_err, SBEIO_COMP_ID);
+ }
}
+ }
+#else
+ const fapi2::Target<fapi2::TARGET_TYPE_PROC_CHIP> l_fapi2ProcTarget(
+ const_cast<TARGETING::Target*> (i_procTarg));
- // @todo - RTC:180244 - Disable the OCC
+ //Unused in the context, but required for p9_extract_sbe_rc
+ P9_EXTRACT_SBE_RC::RETURN_ACTION l_ret =
+ P9_EXTRACT_SBE_RC::REIPL_UPD_SEEPROM;
+
+ FAPI_INVOKE_HWP(l_err, p9_extract_sbe_rc,
+ l_fapi2ProcTarget, l_ret);
+
+ // p9_extract_sbe_rc should always return w/ an error
+ // make sure at least some sort of error gets returned
+ // because we are not attempting to recover anything
+ if(!l_err)
+ {
+ /*@
+ * @errortype ERRL_SEV_PREDICTIVE
+ * @moduleid SBEIO_HANDLE_VITAL_ATTN
+ * @reasoncode SBEIO_EXTRACT_RC_ERROR
+ * @userdata1 Huid of processor
+ * @userdata2 Return action from extract_rc
+ * @devdesc We expected an error log to be returned from
+ p9_extract_rc but there wasn't one
+ * @custdesc Processor Error
+ */
+ l_err = new ERRORLOG::ErrlEntry(
+ ERRORLOG::ERRL_SEV_PREDICTIVE,
+ SBEIO_HANDLE_VITAL_ATTN,
+ SBEIO_EXTRACT_RC_ERROR,
+ TARGETING::get_huid(i_procTarg),
+ l_ret);
}
+
+ //We want to deconfigure the processor where the error was detected
+ l_err->addHwCallout( i_procTarg,
+ HWAS::SRCI_PRIORITY_HIGH,
+ HWAS::DELAYED_DECONFIG,
+ HWAS::GARD_NULL );
#endif
+
TRACFCOMP( g_trac_sbeio,
EXIT_MRK "handleVitalAttn> ");
- return l_errhdl;
+ return l_err;
}
-};
+}; \ No newline at end of file
diff --git a/src/usr/sbeio/common/sbe_retry_handler.C b/src/usr/sbeio/common/sbe_retry_handler.C
index f68c587d3..5cfa6f32f 100644
--- a/src/usr/sbeio/common/sbe_retry_handler.C
+++ b/src/usr/sbeio/common/sbe_retry_handler.C
@@ -82,6 +82,32 @@ using namespace ERRORLOG;
namespace SBEIO
{
+// Define constant expressions to be used
+
+//There are only 2 sides to the seeproms, so we only want to flip sides once
+constexpr uint8_t MAX_SWITCH_SIDE_COUNT = 1;
+
+//We only want to attempt to boot with the same side seeprom twice
+constexpr uint8_t MAX_SIDE_BOOT_ATTEMPTS = 2;
+
+// Currently we expect a maxiumum of 2 FFDC packets, the one
+// that is useful to HB is the HWP FFDC. It is possible there is
+// a packet that details an internal sbe fail that hostboot will
+// add to an errorlog but otherwise ignores
+constexpr uint8_t MAX_EXPECTED_FFDC_PACKAGES = 2;
+
+// action_for_ffdc_rc will figure out what action we should do
+// for each p9_extract_sbe_rc return code. If the RC does not match
+// any return code from p9_extract_sbe_rc then we want to have a
+// known "no action found" value which is defined here
+constexpr uint32_t NO_ACTION_FOUND_FOR_THIS_RC = 0xFFFF;
+
+// Set up constants that will be used for setting up the timeout for
+// reading the sbe message register
+constexpr uint64_t SBE_RETRY_TIMEOUT_HW_SEC = 60; // 60 seconds
+constexpr uint64_t SBE_RETRY_TIMEOUT_SIMICS_SEC = 600; // 600 seconds
+constexpr uint32_t SBE_RETRY_NUM_LOOPS = 60;
+
SbeRetryHandler::SbeRetryHandler(SBE_MODE_OF_OPERATION i_sbeMode)
: SbeRetryHandler(i_sbeMode, 0)
{
@@ -121,7 +147,8 @@ void SbeRetryHandler::main_sbe_handler( TARGETING::Target * i_target )
errlHndl_t l_errl = nullptr;
// Only set the secure debug bit (SDB) if we are not using xscom yet
- if(!i_target->getAttr<TARGETING::ATTR_SCOM_SWITCHES>().useXscom)
+ if(!i_target->getAttr<TARGETING::ATTR_SCOM_SWITCHES>().useXscom &&
+ !i_target->getAttr<TARGETING::ATTR_PROC_SBE_MASTER_CHIP>())
{
this->iv_useSDB = true;
}
@@ -256,7 +283,7 @@ void SbeRetryHandler::main_sbe_handler( TARGETING::Target * i_target )
l_errl->collectTrace( SBEIO_COMP_NAME, 256);
l_errl->addHwCallout( i_target,
HWAS::SRCI_PRIORITY_HIGH,
- HWAS::DECONFIG,
+ HWAS::DELAYED_DECONFIG,
HWAS::GARD_NULL );
// Set the PLID of the error log to caller's PLID,
@@ -584,14 +611,15 @@ errlHndl_t SbeRetryHandler::sbe_poll_status_reg(TARGETING::Target * i_target)
// Each sbe gets 60s to respond with the fact that it's
// booted and at runtime (stable state)
- uint64_t l_sbeTimeout = SBE_RETRY_TIMEOUT_HW; // 60 seconds
+ uint64_t l_sbeTimeout = SBE_RETRY_TIMEOUT_HW_SEC; // 60 seconds
// Bump this up really high for simics, things are slow there
if( Util::isSimicsRunning() )
{
- l_sbeTimeout = SBE_RETRY_TIMEOUT_SIMICS; // 600 seconds
+ l_sbeTimeout = SBE_RETRY_TIMEOUT_SIMICS_SEC; // 600 seconds
}
- const uint64_t SBE_WAIT_SLEEP = (l_sbeTimeout/SBE_RETRY_NUM_LOOPS);
+ //Sleep time should be 1 second on HW, 10 seconds on simics
+ const uint64_t SBE_WAIT_SLEEP_SEC = (l_sbeTimeout/SBE_RETRY_NUM_LOOPS);
SBE_TRACF("Running p9_get_sbe_msg_register HWP on proc target %.8X",
TARGETING::get_huid(i_target));
@@ -646,7 +674,7 @@ errlHndl_t SbeRetryHandler::sbe_poll_status_reg(TARGETING::Target * i_target)
// reset watchdog before performing the nanosleep
INITSERVICE::sendProgressCode();
#endif
- nanosleep(0,SBE_WAIT_SLEEP);
+ nanosleep(SBE_WAIT_SLEEP_SEC,0);
}
}
@@ -1055,8 +1083,6 @@ errlHndl_t SbeRetryHandler::switch_sbe_sides(TARGETING::Target * i_target)
SBE_TRACF(ENTER_MRK "switch_sbe_sides()");
errlHndl_t l_errl = nullptr;
- TARGETING::ATTR_PROC_SBE_MASTER_CHIP_type l_isMaster =
- i_target->getAttr<TARGETING::ATTR_PROC_SBE_MASTER_CHIP>();
#ifdef __HOSTBOOT_RUNTIME
const bool l_isRuntime = true;
@@ -1066,7 +1092,7 @@ errlHndl_t SbeRetryHandler::switch_sbe_sides(TARGETING::Target * i_target)
do{
- if(!l_isRuntime && !l_isMaster)
+ if(!l_isRuntime && !i_target->getAttr<TARGETING::ATTR_PROC_SBE_MASTER_CHIP>())
{
const uint32_t l_sbeBootSelectMask = SBE::SBE_BOOT_SELECT_MASK >> 32;
// Read PERV_SB_CS_FSI_BYTE 0x2820 for target proc
diff --git a/src/usr/sbeio/runtime/sbeio_vital_attn.C b/src/usr/sbeio/runtime/sbeio_vital_attn.C
index 8498d2c61..8f8d6bfa8 100644
--- a/src/usr/sbeio/runtime/sbeio_vital_attn.C
+++ b/src/usr/sbeio/runtime/sbeio_vital_attn.C
@@ -52,17 +52,21 @@ namespace RT_SBEIO
/*@
* @errortype
* @severity ERRL_SEV_INFORMATIONAL
- * @moduleid SBEIO_RUNTIME_HANDLE_VITAL_ATTN
+ * @moduleid SBEIO_HANDLE_VITAL_ATTN
* @reasoncode SBEIO_RT_NULL_FIRMWARE_REQUEST_PTR
* @userdata1 HUID of target
* @userdata2 none
* @devdesc Unable to inform OPAL of SBE failure
*/
l_err = new ErrlEntry( ERRL_SEV_INFORMATIONAL,
- SBEIO_RUNTIME_HANDLE_VITAL_ATTN,
+ SBEIO_HANDLE_VITAL_ATTN,
SBEIO_RT_NULL_FIRMWARE_REQUEST_PTR,
get_huid(i_procTarg),
- 0, true);
+ 0, false);
+
+ l_err->addProcedureCallout(HWAS::EPUB_PRC_PHYP_CODE,
+ HWAS::SRCI_PRIORITY_HIGH);
+
break;
}
diff --git a/src/usr/sbeio/sbe_fifodd.C b/src/usr/sbeio/sbe_fifodd.C
index d82f038f2..fabfe1405 100644
--- a/src/usr/sbeio/sbe_fifodd.C
+++ b/src/usr/sbeio/sbe_fifodd.C
@@ -677,7 +677,7 @@ errlHndl_t SbeFifo::waitDnFifoReady(TARGETING::Target * i_target,
{
errl->addHwCallout( i_target,
HWAS::SRCI_PRIORITY_HIGH,
- HWAS::DECONFIG,
+ HWAS::DELAYED_DECONFIG,
HWAS::GARD_NULL );
}
diff --git a/src/usr/sbeio/sbe_psudd.C b/src/usr/sbeio/sbe_psudd.C
index a4916d5c2..9df26744a 100644
--- a/src/usr/sbeio/sbe_psudd.C
+++ b/src/usr/sbeio/sbe_psudd.C
@@ -553,7 +553,7 @@ errlHndl_t SbePsu::pollForPsuComplete(TARGETING::Target * i_target,
{
l_errl->addHwCallout( i_target,
HWAS::SRCI_PRIORITY_HIGH,
- HWAS::DECONFIG,
+ HWAS::DELAYED_DECONFIG,
HWAS::GARD_NULL );
}
OpenPOWER on IntegriCloud