summaryrefslogtreecommitdiffstats
path: root/src/usr/sbeio
diff options
context:
space:
mode:
Diffstat (limited to 'src/usr/sbeio')
-rw-r--r--src/usr/sbeio/common/sbe_attn.C102
-rw-r--r--src/usr/sbeio/common/sbe_retry_handler.C44
-rw-r--r--src/usr/sbeio/runtime/sbeio_vital_attn.C10
-rw-r--r--src/usr/sbeio/sbe_fifodd.C2
-rw-r--r--src/usr/sbeio/sbe_psudd.C2
5 files changed, 132 insertions, 28 deletions
diff --git a/src/usr/sbeio/common/sbe_attn.C b/src/usr/sbeio/common/sbe_attn.C
index 5c151b4eb..2fb0cf1da 100644
--- a/src/usr/sbeio/common/sbe_attn.C
+++ b/src/usr/sbeio/common/sbe_attn.C
@@ -30,6 +30,7 @@
#include <errl/errlentry.H>
#include <errl/errlmanager.H>
#include <errl/errludtarget.H>
+#include <errl/errludcallout.H>
#include <sbeio/sbe_attn.H>
#include <fapi2/target.H>
#include <fapi2/plat_hwp_invoker.H>
@@ -53,44 +54,117 @@ namespace SBEIO
TRACFCOMP( g_trac_sbeio,
ENTER_MRK "handleVitalAttn> i_procTarg=",
TARGETING::get_huid(i_procTarg) );
- errlHndl_t l_errhdl = nullptr;
+ errlHndl_t l_err = nullptr;
- // TODO 167191 Full SBE Belly-Up Handling for OP
#ifdef __HOSTBOOT_RUNTIME
// Inform OPAL, SBE is currently disabled
if (TARGETING::is_sapphire_load())
{
// Inform OPAL of the inoperable SBE
- l_errhdl = RT_SBEIO::vital_attn_inform_opal(i_procTarg,
+ l_err = RT_SBEIO::vital_attn_inform_opal(i_procTarg,
RT_SBEIO::SBE_DISABLED);
- }
-#endif
- // @todo - RTC:180242 - Restart SBE
+ if(l_err)
+ {
+ errlCommit(l_err, SBEIO_COMP_ID);
+ }
+ }
SbeRetryHandler l_sbeObj = SbeRetryHandler(
SbeRetryHandler::SBE_MODE_OF_OPERATION::ATTEMPT_REBOOT);
- //l_sbeObj.main_sbe_handler(i_procTarg);
+ l_sbeObj.main_sbe_handler(i_procTarg);
+ // Check if the SBE made it back to runtime, this tells us if the retry was a
+ // success or not
+ if (!l_sbeObj.isSbeAtRuntime())
+ {
+ /*@
+ * @errortype ERRL_SEV_PREDICTIVE
+ * @moduleid SBEIO_HANDLE_VITAL_ATTN
+ * @reasoncode SBEIO_NO_RECOVERY_ACTION
+ * @userdata1 Huid of processor
+ * @userdata2 Unused
+ * @devdesc PRD detected an error with the SBE and HB failed to
+ * recover
+ * @custdesc Processor Error
+ */
+ l_err = new ERRORLOG::ErrlEntry(
+ ERRORLOG::ERRL_SEV_PREDICTIVE,
+ SBEIO_HANDLE_VITAL_ATTN,
+ SBEIO_NO_RECOVERY_ACTION,
+ TARGETING::get_huid(i_procTarg),
+ 0);
-#ifdef __HOSTBOOT_RUNTIME
- // Inform OPAL the state of the SBE after a retry
- if (l_sbeObj.isSbeAtRuntime())
+ l_err->addProcedureCallout( HWAS::EPUB_PRC_SUE_PREVERROR,
+ HWAS::SRCI_PRIORITY_HIGH);
+
+ l_err->addHwCallout( i_procTarg,
+ HWAS::SRCI_PRIORITY_LOW,
+ HWAS::NO_DECONFIG,
+ HWAS::GARD_NULL );
+
+ l_err->collectTrace( SBEIO_COMP_NAME, 256);
+ // @todo - RTC:180244 - Disable the OCC
+ }
+ // Inform OPAL the state of the SBE after a retry is successful
+ else
{
if (TARGETING::is_sapphire_load())
{
- l_errhdl = RT_SBEIO::vital_attn_inform_opal(i_procTarg,
+ l_err = RT_SBEIO::vital_attn_inform_opal(i_procTarg,
RT_SBEIO::SBE_ENABLED);
+ if(l_err)
+ {
+ errlCommit(l_err, SBEIO_COMP_ID);
+ }
}
+ }
+#else
+ const fapi2::Target<fapi2::TARGET_TYPE_PROC_CHIP> l_fapi2ProcTarget(
+ const_cast<TARGETING::Target*> (i_procTarg));
- // @todo - RTC:180244 - Disable the OCC
+ //Unused in the context, but required for p9_extract_sbe_rc
+ P9_EXTRACT_SBE_RC::RETURN_ACTION l_ret =
+ P9_EXTRACT_SBE_RC::REIPL_UPD_SEEPROM;
+
+ FAPI_INVOKE_HWP(l_err, p9_extract_sbe_rc,
+ l_fapi2ProcTarget, l_ret);
+
+ // p9_extract_sbe_rc should always return w/ an error
+ // make sure at least some sort of error gets returned
+ // because we are not attempting to recover anything
+ if(!l_err)
+ {
+ /*@
+ * @errortype ERRL_SEV_PREDICTIVE
+ * @moduleid SBEIO_HANDLE_VITAL_ATTN
+ * @reasoncode SBEIO_EXTRACT_RC_ERROR
+ * @userdata1 Huid of processor
+ * @userdata2 Return action from extract_rc
+ * @devdesc We expected an error log to be returned from
+ p9_extract_rc but there wasn't one
+ * @custdesc Processor Error
+ */
+ l_err = new ERRORLOG::ErrlEntry(
+ ERRORLOG::ERRL_SEV_PREDICTIVE,
+ SBEIO_HANDLE_VITAL_ATTN,
+ SBEIO_EXTRACT_RC_ERROR,
+ TARGETING::get_huid(i_procTarg),
+ l_ret);
}
+
+ //We want to deconfigure the processor where the error was detected
+ l_err->addHwCallout( i_procTarg,
+ HWAS::SRCI_PRIORITY_HIGH,
+ HWAS::DELAYED_DECONFIG,
+ HWAS::GARD_NULL );
#endif
+
TRACFCOMP( g_trac_sbeio,
EXIT_MRK "handleVitalAttn> ");
- return l_errhdl;
+ return l_err;
}
-};
+}; \ No newline at end of file
diff --git a/src/usr/sbeio/common/sbe_retry_handler.C b/src/usr/sbeio/common/sbe_retry_handler.C
index f68c587d3..5cfa6f32f 100644
--- a/src/usr/sbeio/common/sbe_retry_handler.C
+++ b/src/usr/sbeio/common/sbe_retry_handler.C
@@ -82,6 +82,32 @@ using namespace ERRORLOG;
namespace SBEIO
{
+// Define constant expressions to be used
+
+//There are only 2 sides to the seeproms, so we only want to flip sides once
+constexpr uint8_t MAX_SWITCH_SIDE_COUNT = 1;
+
+//We only want to attempt to boot with the same side seeprom twice
+constexpr uint8_t MAX_SIDE_BOOT_ATTEMPTS = 2;
+
+// Currently we expect a maxiumum of 2 FFDC packets, the one
+// that is useful to HB is the HWP FFDC. It is possible there is
+// a packet that details an internal sbe fail that hostboot will
+// add to an errorlog but otherwise ignores
+constexpr uint8_t MAX_EXPECTED_FFDC_PACKAGES = 2;
+
+// action_for_ffdc_rc will figure out what action we should do
+// for each p9_extract_sbe_rc return code. If the RC does not match
+// any return code from p9_extract_sbe_rc then we want to have a
+// known "no action found" value which is defined here
+constexpr uint32_t NO_ACTION_FOUND_FOR_THIS_RC = 0xFFFF;
+
+// Set up constants that will be used for setting up the timeout for
+// reading the sbe message register
+constexpr uint64_t SBE_RETRY_TIMEOUT_HW_SEC = 60; // 60 seconds
+constexpr uint64_t SBE_RETRY_TIMEOUT_SIMICS_SEC = 600; // 600 seconds
+constexpr uint32_t SBE_RETRY_NUM_LOOPS = 60;
+
SbeRetryHandler::SbeRetryHandler(SBE_MODE_OF_OPERATION i_sbeMode)
: SbeRetryHandler(i_sbeMode, 0)
{
@@ -121,7 +147,8 @@ void SbeRetryHandler::main_sbe_handler( TARGETING::Target * i_target )
errlHndl_t l_errl = nullptr;
// Only set the secure debug bit (SDB) if we are not using xscom yet
- if(!i_target->getAttr<TARGETING::ATTR_SCOM_SWITCHES>().useXscom)
+ if(!i_target->getAttr<TARGETING::ATTR_SCOM_SWITCHES>().useXscom &&
+ !i_target->getAttr<TARGETING::ATTR_PROC_SBE_MASTER_CHIP>())
{
this->iv_useSDB = true;
}
@@ -256,7 +283,7 @@ void SbeRetryHandler::main_sbe_handler( TARGETING::Target * i_target )
l_errl->collectTrace( SBEIO_COMP_NAME, 256);
l_errl->addHwCallout( i_target,
HWAS::SRCI_PRIORITY_HIGH,
- HWAS::DECONFIG,
+ HWAS::DELAYED_DECONFIG,
HWAS::GARD_NULL );
// Set the PLID of the error log to caller's PLID,
@@ -584,14 +611,15 @@ errlHndl_t SbeRetryHandler::sbe_poll_status_reg(TARGETING::Target * i_target)
// Each sbe gets 60s to respond with the fact that it's
// booted and at runtime (stable state)
- uint64_t l_sbeTimeout = SBE_RETRY_TIMEOUT_HW; // 60 seconds
+ uint64_t l_sbeTimeout = SBE_RETRY_TIMEOUT_HW_SEC; // 60 seconds
// Bump this up really high for simics, things are slow there
if( Util::isSimicsRunning() )
{
- l_sbeTimeout = SBE_RETRY_TIMEOUT_SIMICS; // 600 seconds
+ l_sbeTimeout = SBE_RETRY_TIMEOUT_SIMICS_SEC; // 600 seconds
}
- const uint64_t SBE_WAIT_SLEEP = (l_sbeTimeout/SBE_RETRY_NUM_LOOPS);
+ //Sleep time should be 1 second on HW, 10 seconds on simics
+ const uint64_t SBE_WAIT_SLEEP_SEC = (l_sbeTimeout/SBE_RETRY_NUM_LOOPS);
SBE_TRACF("Running p9_get_sbe_msg_register HWP on proc target %.8X",
TARGETING::get_huid(i_target));
@@ -646,7 +674,7 @@ errlHndl_t SbeRetryHandler::sbe_poll_status_reg(TARGETING::Target * i_target)
// reset watchdog before performing the nanosleep
INITSERVICE::sendProgressCode();
#endif
- nanosleep(0,SBE_WAIT_SLEEP);
+ nanosleep(SBE_WAIT_SLEEP_SEC,0);
}
}
@@ -1055,8 +1083,6 @@ errlHndl_t SbeRetryHandler::switch_sbe_sides(TARGETING::Target * i_target)
SBE_TRACF(ENTER_MRK "switch_sbe_sides()");
errlHndl_t l_errl = nullptr;
- TARGETING::ATTR_PROC_SBE_MASTER_CHIP_type l_isMaster =
- i_target->getAttr<TARGETING::ATTR_PROC_SBE_MASTER_CHIP>();
#ifdef __HOSTBOOT_RUNTIME
const bool l_isRuntime = true;
@@ -1066,7 +1092,7 @@ errlHndl_t SbeRetryHandler::switch_sbe_sides(TARGETING::Target * i_target)
do{
- if(!l_isRuntime && !l_isMaster)
+ if(!l_isRuntime && !i_target->getAttr<TARGETING::ATTR_PROC_SBE_MASTER_CHIP>())
{
const uint32_t l_sbeBootSelectMask = SBE::SBE_BOOT_SELECT_MASK >> 32;
// Read PERV_SB_CS_FSI_BYTE 0x2820 for target proc
diff --git a/src/usr/sbeio/runtime/sbeio_vital_attn.C b/src/usr/sbeio/runtime/sbeio_vital_attn.C
index 8498d2c61..8f8d6bfa8 100644
--- a/src/usr/sbeio/runtime/sbeio_vital_attn.C
+++ b/src/usr/sbeio/runtime/sbeio_vital_attn.C
@@ -52,17 +52,21 @@ namespace RT_SBEIO
/*@
* @errortype
* @severity ERRL_SEV_INFORMATIONAL
- * @moduleid SBEIO_RUNTIME_HANDLE_VITAL_ATTN
+ * @moduleid SBEIO_HANDLE_VITAL_ATTN
* @reasoncode SBEIO_RT_NULL_FIRMWARE_REQUEST_PTR
* @userdata1 HUID of target
* @userdata2 none
* @devdesc Unable to inform OPAL of SBE failure
*/
l_err = new ErrlEntry( ERRL_SEV_INFORMATIONAL,
- SBEIO_RUNTIME_HANDLE_VITAL_ATTN,
+ SBEIO_HANDLE_VITAL_ATTN,
SBEIO_RT_NULL_FIRMWARE_REQUEST_PTR,
get_huid(i_procTarg),
- 0, true);
+ 0, false);
+
+ l_err->addProcedureCallout(HWAS::EPUB_PRC_PHYP_CODE,
+ HWAS::SRCI_PRIORITY_HIGH);
+
break;
}
diff --git a/src/usr/sbeio/sbe_fifodd.C b/src/usr/sbeio/sbe_fifodd.C
index d82f038f2..fabfe1405 100644
--- a/src/usr/sbeio/sbe_fifodd.C
+++ b/src/usr/sbeio/sbe_fifodd.C
@@ -677,7 +677,7 @@ errlHndl_t SbeFifo::waitDnFifoReady(TARGETING::Target * i_target,
{
errl->addHwCallout( i_target,
HWAS::SRCI_PRIORITY_HIGH,
- HWAS::DECONFIG,
+ HWAS::DELAYED_DECONFIG,
HWAS::GARD_NULL );
}
diff --git a/src/usr/sbeio/sbe_psudd.C b/src/usr/sbeio/sbe_psudd.C
index a4916d5c2..9df26744a 100644
--- a/src/usr/sbeio/sbe_psudd.C
+++ b/src/usr/sbeio/sbe_psudd.C
@@ -553,7 +553,7 @@ errlHndl_t SbePsu::pollForPsuComplete(TARGETING::Target * i_target,
{
l_errl->addHwCallout( i_target,
HWAS::SRCI_PRIORITY_HIGH,
- HWAS::DECONFIG,
+ HWAS::DELAYED_DECONFIG,
HWAS::GARD_NULL );
}
OpenPOWER on IntegriCloud