summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorAmit Tendolkar <amit.tendolkar@in.ibm.com>2017-11-08 12:38:46 -0600
committerSachin Gupta <sgupta2m@in.ibm.com>2017-12-14 13:25:52 -0500
commite97e84d0f782643a6661d18d8cb5f8062144eeb0 (patch)
tree9f9e54f86ef26478dfe2345fe52f64153a22e66f /src
parentc21e72cf477a19a221f84437b4a961f6678fa686 (diff)
downloadtalos-sbe-e97e84d0f782643a6661d18d8cb5f8062144eeb0.tar.gz
talos-sbe-e97e84d0f782643a6661d18d8cb5f8062144eeb0.zip
Enable FFDC Collection for SBE Deadman Timeout
Collects SBE Async FFDC calling p9_collect_deadman_ffdc via the Get SBE FFDC chip-op, based on SBE State of the DMT failure. Misc. changes in the SBE DMT flow. Change-Id: Ie7a0347034cf447613bc206ec6fcfd13b5bc530e RTC: 179364 Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/49473 Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com> Reviewed-by: Shakeeb A. Pasha B K <shakeebbk@in.ibm.com> Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com> Reviewed-by: Sachin Gupta <sgupta2m@in.ibm.com>
Diffstat (limited to 'src')
-rw-r--r--src/sbefw/sbeFFDC.C24
-rw-r--r--src/sbefw/sbeFFDC.H8
-rw-r--r--src/sbefw/sbecmdcntrldmt.C237
-rw-r--r--src/sbefw/sbecmdcntrldmt.H7
-rw-r--r--src/sbefw/sbecmdgeneric.C10
-rw-r--r--src/sbefw/sbeglobals.H4
-rw-r--r--src/sbefw/sberegaccess.H12
7 files changed, 209 insertions, 93 deletions
diff --git a/src/sbefw/sbeFFDC.C b/src/sbefw/sbeFFDC.C
index 7f1203c3..b47091ed 100644
--- a/src/sbefw/sbeFFDC.C
+++ b/src/sbefw/sbeFFDC.C
@@ -30,6 +30,7 @@
#include "sbeFFDC.H"
#include "sbe_build_info.H"
#include "sbeglobals.H"
+#include "sbecmdcntrldmt.H"
void SbeFFDCPackage::updateUserDataHeader(uint32_t i_fieldsConfig)
{
@@ -51,6 +52,29 @@ void SbeFFDCPackage::updateUserDataHeader(uint32_t i_fieldsConfig)
}
}
+uint32_t SbeFFDCPackage::collectAsyncHwpFfdc (void)
+{
+ #define SBE_FUNC "collectAsyncHwpFfdc"
+ uint32_t l_rc = SBE_SEC_OPERATION_SUCCESSFUL;
+
+ switch (SBE_GLOBAL->asyncFfdcRC)
+ {
+ case fapi2::RC_CHECK_MASTER_STOP15_DEADMAN_TIMEOUT:
+ case fapi2::RC_CHECK_MASTER_STOP15_INVALID_STATE:
+ case fapi2::RC_BLOCK_WAKEUP_INTR_CHECK_FAIL:
+ SBE_INFO (SBE_FUNC "Collecting DMT Async FFDC for RC 0x%08x",
+ SBE_GLOBAL->asyncFfdcRC);
+ l_rc = sbeCollectDeadmanFfdc ();
+ break;
+ default:
+ SBE_INFO (SBE_FUNC"No specific Async FFDC to collect");
+ break;
+ }
+
+ return l_rc;
+ #undef SBE_FUNC
+}
+
uint32_t SbeFFDCPackage::sendOverFIFO(const sbeRespGenHdr_t &i_hdr,
const uint32_t i_fieldsConfig,
uint32_t &o_bytesSent,
diff --git a/src/sbefw/sbeFFDC.H b/src/sbefw/sbeFFDC.H
index 1b652f75..75c4d757 100644
--- a/src/sbefw/sbeFFDC.H
+++ b/src/sbefw/sbeFFDC.H
@@ -98,6 +98,14 @@ public:
}
/*
+ * @brief collectAsyncHwpFfdc - method to check and force collect
+ * HWP FFDC to SBE global FFDC region,
+ * asynchronous to the HWP execution
+ * @return - SBE secondary RC
+ */
+ uint32_t collectAsyncHwpFfdc (void);
+
+ /*
* @brief sendOverFIFO - method to pack and send SBE internal FFDC
* only if isSendInternalFFDCSet() is true
* over FIFO interface
diff --git a/src/sbefw/sbecmdcntrldmt.C b/src/sbefw/sbecmdcntrldmt.C
index 5fc248b3..c1311a34 100644
--- a/src/sbefw/sbecmdcntrldmt.C
+++ b/src/sbefw/sbecmdcntrldmt.C
@@ -40,6 +40,9 @@
#include "fapi2.H"
#include "plat_hw_access.H"
#include "p9_sbe_check_master_stop15.H"
+#ifdef DD2
+#include "p9_collect_deadman_ffdc.H"
+#endif
#include "p9_perv_scom_addresses.H"
#include "p9_block_wakeup_intr.H"
#include "sbeTimerSvc.H"
@@ -50,9 +53,9 @@ using namespace fapi2;
#ifdef SEEPROM_IMAGE
// Using Function pointer to force long call
p9_sbe_check_master_stop15_FP_t p9_sbe_check_master_stop15_hwp =
- &p9_sbe_check_master_stop15;
+ &p9_sbe_check_master_stop15;
p9_block_wakeup_intr_FP_t p9_block_wakeup_intr_hwp =
- &p9_block_wakeup_intr;
+ &p9_block_wakeup_intr;
#endif
////////////////////////////////////////////////////////////////////
@@ -64,7 +67,7 @@ static timerService g_sbe_pk_dmt_timer;
void sbeDmtPkExpiryCallback(void *)
{
#define SBE_FUNC "sbeDmtPkExpiryCallback"
- SBE_INFO(SBE_FUNC" DMT Callback Timer has expired..Checkstop the system ");
+ SBE_INFO (SBE_FUNC "DMT Callback Timer has expired..Checkstop the system");
ReturnCode fapiRc = FAPI2_RC_SUCCESS;
(void)SbeRegAccess::theSbeRegAccess().stateTransition(
@@ -77,12 +80,42 @@ void sbeDmtPkExpiryCallback(void *)
if(fapiRc != FAPI2_RC_SUCCESS)
{
// Scom failed
- SBE_ERROR(SBE_FUNC "PutScom failed for REG PERV_N3_LOCAL_FIR");
+ SBE_ERROR (SBE_FUNC "PutScom failed: REG PERV_N3_LOCAL_FIR");
pk_halt();
}
+
(void)SbeRegAccess::theSbeRegAccess().updateAsyncFFDCBit(true);
- // TODO - Store the response in Async Response
- // RTC:149074
+ #undef SBE_FUNC
+}
+
+/////////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////////
+uint32_t sbeCollectDeadmanFfdc (void)
+{
+ #define SBE_FUNC "sbeCollectDeadmanFfdc"
+ uint32_t rc = SBE_SEC_OPERATION_SUCCESSFUL;
+
+ // trace the saved aync ffdc reason and SBE state as info for debug
+ SBE_INFO (SBE_FUNC "FFDC Reason: 0x%08X States - Curr: %d Prev: %d",
+ SBE_GLOBAL->asyncFfdcRC,
+ SbeRegAccess::theSbeRegAccess().getSbeState(),
+ SbeRegAccess::theSbeRegAccess().getSbePrevState());
+
+ fapi2::Target<fapi2::TARGET_TYPE_CORE> coreTarget (
+ plat_getTargetHandleByChipletNumber <fapi2::TARGET_TYPE_CORE> (
+ (SBE_GLOBAL->deadmanCore + CORE_CHIPLET_OFFSET) ));
+
+#ifdef DD2
+ ReturnCode fapiRc = FAPI2_RC_SUCCESS;
+ // p9_collect_deadman_ffdc collects the required ffdc into the fapi rc
+ // which will be available in the SBE Global HWP FFDC region
+ SBE_EXEC_HWP ( fapiRc,
+ p9_collect_deadman_ffdc,
+ coreTarget,
+ SBE_GLOBAL->asyncFfdcRC );
+#endif
+
+ return rc;
#undef SBE_FUNC
}
@@ -123,9 +156,13 @@ uint32_t sbeStartCntlDmt()
SBE_ERROR(SBE_FUNC" Failed to send response to Hostboot ");
break;
}
+
// Set DMT State
(void)SbeRegAccess::theSbeRegAccess().stateTransition(
SBE_DMT_ENTER_EVENT);
+ // To start, assume no errors will hit when starting DMT and hence
+ // default to potential timeout in stopping DMT for FFDC
+ SBE_GLOBAL->asyncFfdcRC = RC_CHECK_MASTER_STOP15_DEADMAN_TIMEOUT;
Target<TARGET_TYPE_PROC_CHIP > l_procTgt = plat_getChipTarget();
// Fetch the Master EX
@@ -136,99 +173,113 @@ uint32_t sbeStartCntlDmt()
fapi2::Target<fapi2::TARGET_TYPE_EX >
exTgt(plat_getTargetHandleByInstance<fapi2::TARGET_TYPE_EX>(exId));
- // Call Hwp p9_sbe_check_master_stop15 and loop
- // Go around a loop till you get FAPI2_RC_SUCCESS
+ bool hwpFailed = false;
+ // Initialise both cores with fapi2::RC_CHECK_MASTER_STOP15_PENDING
+ uint32_t rcFapi[2] = {RC_CHECK_MASTER_STOP15_PENDING};
+
+ // Call HWP p9_sbe_check_master_stop15 in a loop as long as the timer is
+ // active and HWP returns RC_CHECK_MASTER_STOP15_PENDING
do
{
- //Initilise both core's fapirc with Success, If it's a non-fused
- //mode then only Core0's fapiRC will get modified below, second
- //fapiRc will remain Success
- uint32_t rcFapi[2] = {FAPI2_RC_SUCCESS};
uint8_t coreCnt = 0;
+
for (auto &coreTgt : exTgt.getChildren<fapi2::TARGET_TYPE_CORE>())
{
- // Core0 is assumed to be the master core
- SBE_INFO(SBE_FUNC "Executing p9_sbe_check_master_stop15_hwp for Core[%d]",
- coreTgt.get().getTargetInstance());
-
- SBE_EXEC_HWP(l_fapiRc, p9_sbe_check_master_stop15_hwp, coreTgt);
- rcFapi[coreCnt++] = l_fapiRc;
- if( (l_fapiRc != fapi2::RC_CHECK_MASTER_STOP15_PENDING) &&
- (l_fapiRc != FAPI2_RC_SUCCESS))
- {
- SBE_ERROR(SBE_FUNC" p9_sbe_check_master_stop15 returned "
- "failure for Core[%d]",coreTgt.get().getTargetInstance());
- // Async Response to be stored
- // RTC:149074
- break;
- }
- if(!fuseMode)
+ // Skip calling on core that already entered stop15
+ if (rcFapi[coreCnt] == RC_CHECK_MASTER_STOP15_PENDING)
{
- // This is non-fuse mode, so break here, no need to do the
- // p9_sbe_check_master_stop15_hwp on second core.
- break;
- }
- }
- // Break from do..while(timer.active), if error already happened
- if( (l_fapiRc != fapi2::RC_CHECK_MASTER_STOP15_PENDING) &&
- (l_fapiRc != FAPI2_RC_SUCCESS) )
- {
- break; //do..while(timer.active)
- }
+ SBE_GLOBAL->deadmanCore = coreTgt.get().getTargetInstance();
+ // Core0 is assumed to be the master core
+ SBE_INFO ( SBE_FUNC
+ "Executing p9_sbe_check_master_stop15_hwp for"
+ " Core[%d]", SBE_GLOBAL->deadmanCore );
+ SBE_EXEC_HWP ( l_fapiRc,
+ p9_sbe_check_master_stop15_hwp,
+ coreTgt);
+ rcFapi[coreCnt++] = l_fapiRc;
- // Only for Pending and Success case,
- // If non-fuse core mode then single core status is Pending/Success,
- // if fuse core mode then both core's status is pending/success
-
- if(RC_CHECK_MASTER_STOP15_PENDING != rcFapi[0] &&
- RC_CHECK_MASTER_STOP15_PENDING != rcFapi[1]) // Success
- {
- for (auto coreTgt : exTgt.getChildren<fapi2::TARGET_TYPE_CORE>())
- {
- SBE_INFO(SBE_FUNC "Executing p9_block_wakeup_intr_hwp for Core[%d]",
- coreTgt.get().getTargetInstance());
- SBE_EXEC_HWP(l_fapiRc, p9_block_wakeup_intr_hwp, coreTgt,
- p9pmblockwkup::CLEAR);
- if( l_fapiRc )
+ if (! ((FAPI2_RC_SUCCESS == l_fapiRc) ||
+ (RC_CHECK_MASTER_STOP15_PENDING == l_fapiRc)) )
{
- SBE_ERROR(SBE_FUNC" p9_block_wakeup_intr failed for "
- "Core[%d]",coreTgt.get().getTargetInstance());
- // TODO via RTC 149074
- // Async Response to be stored.
- // Also checkstop the system.
+ hwpFailed = true;
+ // Mark the failure point ..
+ SBE_GLOBAL->asyncFfdcRC =
+ RC_CHECK_MASTER_STOP15_INVALID_STATE;
+ SBE_ERROR ( SBE_FUNC" p9_sbe_check_master_stop15 failed"
+ "on core[%d]", SBE_GLOBAL->deadmanCore );
break;
}
- // If Success for the First core & it's a Fuse core then
- // continue here for the Second core then go on to press the
- // Door Bell
- if(!fuseMode)
- {
+
+ if (!fuseMode)
+ { // mark odd core as succeeded & exit the core loop
+ rcFapi[coreCnt] = FAPI2_RC_SUCCESS;
break;
}
}
-
- // Break out for the p9_block_wakeup_intr failure above
- // Dont press the Door bell
- if(l_fapiRc)
- {
- break;
- }
- // indicate the Host via Bit SBE_SBE2PSU_DOORBELL_SET_BIT2
- // that Stop15 exit
- l_rc = sbeSetSbe2PsuDbBitX(SBE_SBE2PSU_DOORBELL_SET_BIT2);
- if(l_rc)
- {
- SBE_ERROR(SBE_FUNC " Failed to Write "
- "SBE_SBE2PSU_DOORBELL_SET_BIT2");
- }
- break; // Breakout from do..while()
+ } // Core loop for check master stop 15
+
+ // Either Core failed or Both Cores succeeded
+ if ( hwpFailed || ((FAPI2_RC_SUCCESS == rcFapi[0]) &&
+ (FAPI2_RC_SUCCESS == rcFapi[1])))
+ { // Exit timer loop
+ break;
}
- // Stop 15 Pending Case
+
+ // Wait if either or both cores are pending to enter stop 15
+ // and no error on either cores
pk_sleep(PK_MILLISECONDS(SBE_DMT_SLEEP_INTERVAL));
- }while( g_sbe_pk_dmt_timer.isActive()); // Inner Loop
+ // loop back only if timer is still active
+ } while (g_sbe_pk_dmt_timer.isActive());
+
+ if (hwpFailed)
+ { // exit the do .. while (0) outermost loop
+ break;
+ }
+
+ // Both cores entered stop 15 successfully, now unblock interrupts
+ for (auto coreTgt : exTgt.getChildren<fapi2::TARGET_TYPE_CORE>())
+ {
+ SBE_GLOBAL->deadmanCore = coreTgt.get().getTargetInstance();
+ SBE_INFO(SBE_FUNC "Executing p9_block_wakeup_intr_hwp for Core[%d]",
+ SBE_GLOBAL->deadmanCore);
+ SBE_EXEC_HWP(l_fapiRc, p9_block_wakeup_intr_hwp, coreTgt,
+ p9pmblockwkup::CLEAR);
+ if (l_fapiRc)
+ {
+ // Mark the failure point .. SBE waits for DMT timer to expire
+ SBE_GLOBAL->asyncFfdcRC = RC_BLOCK_WAKEUP_INTR_CHECK_FAIL;
+ SBE_ERROR(SBE_FUNC" p9_block_wakeup_intr failed for "
+ "Core[%d]", SBE_GLOBAL->deadmanCore);
+
+ break;
+ }
+ // If Success for the First core & it's a Fuse core then
+ // continue here for the Second core then go on to press the
+ // Door Bell
+ if(!fuseMode)
+ {
+ break;
+ }
+ }
+
+ // Break out for the p9_block_wakeup_intr failure above
+ // Dont press the Door bell
+ if(l_fapiRc)
+ {
+ break;
+ }
- }while(0); // Outer loop
+ // Entered stop15 and unblocked interrupts ..
+ // Indicate the Host via Bit SBE_SBE2PSU_DOORBELL_SET_BIT2
+ // that Stop15 exit
+ l_rc = sbeSetSbe2PsuDbBitX(SBE_SBE2PSU_DOORBELL_SET_BIT2);
+ if(l_rc)
+ {
+ SBE_ERROR(SBE_FUNC " Failed to Write "
+ "SBE_SBE2PSU_DOORBELL_SET_BIT2");
+ }
+ } while(0); // Outer loop
return l_rc;
#undef SBE_FUNC
@@ -244,15 +295,19 @@ uint32_t sbeStopCntlDmt()
do
{
- SBE_INFO(SBE_FUNC "Stop Timer.");
- l_rc = g_sbe_pk_dmt_timer.stopTimer( );
- if(SBE_SEC_OPERATION_SUCCESSFUL != l_rc)
- {
- SBE_GLOBAL->sbeSbe2PsuRespHdr.setStatus(SBE_PRI_INTERNAL_ERROR, l_rc);
- SBE_ERROR(SBE_FUNC"g_sbe_pk_dmt_timer.stopTimer failed");
- l_rc = SBE_SEC_OPERATION_SUCCESSFUL;
- break;
- }
+ SBE_INFO(SBE_FUNC "Stop Timer.");
+ l_rc = g_sbe_pk_dmt_timer.stopTimer( );
+ if(SBE_SEC_OPERATION_SUCCESSFUL != l_rc)
+ {
+ SBE_GLOBAL->sbeSbe2PsuRespHdr.setStatus ( SBE_PRI_INTERNAL_ERROR,
+ l_rc );
+ SBE_ERROR(SBE_FUNC"g_sbe_pk_dmt_timer.stopTimer failed");
+ l_rc = SBE_SEC_OPERATION_SUCCESSFUL;
+ break;
+ }
+
+ // Reset Async FFDC RC to default success
+ SBE_GLOBAL->asyncFfdcRC = FAPI2_RC_SUCCESS;
// Set Runtime State
(void)SbeRegAccess::theSbeRegAccess().stateTransition(
SBE_DMT_COMP_EVENT);
diff --git a/src/sbefw/sbecmdcntrldmt.H b/src/sbefw/sbecmdcntrldmt.H
index a397e153..e83d5b7e 100644
--- a/src/sbefw/sbecmdcntrldmt.H
+++ b/src/sbefw/sbecmdcntrldmt.H
@@ -50,6 +50,13 @@ static const uint64_t N3_FIR_CORE_CHECKSTOP_BIT = 31; // 63-32 = 31
void sbeDmtPkExpiryCallback(void *arg);
/**
+ * @brief Called to collect FFDC for the Deadman loop timeout
+ * into the SBE global HWP FFDC region
+ * @return SBE Secondary RC
+ */
+uint32_t sbeCollectDeadmanFfdc (void);
+
+/**
* @brief Control Deadman Timer command (0xD101)
*
* @param[in] i_pArg Buffer to be passed to the function (not used as of now)
diff --git a/src/sbefw/sbecmdgeneric.C b/src/sbefw/sbecmdgeneric.C
index 1eef3b6d..cbba9caa 100644
--- a/src/sbefw/sbecmdgeneric.C
+++ b/src/sbefw/sbecmdgeneric.C
@@ -167,7 +167,12 @@ uint32_t sbeGetFfdc (uint8_t *i_pArg)
}
SbeFFDCPackage sbeFfdcPack;
- sbeResponseFfdc_t l_ffdc ;
+ sbeResponseFfdc_t l_ffdc;
+
+ // If need be, force collect HWP FFDC async to the real HWP fail.
+ // Else, just send back what the SBE already has.
+ sbeFfdcPack.collectAsyncHwpFfdc ();
+
l_ffdc.setRc(g_FfdcData.fapiRc);
SBE_INFO(SBE_FUNC"FAPI RC is %x", g_FfdcData.fapiRc);
// If no ffdc , exit;
@@ -220,8 +225,9 @@ uint32_t sbeGetFfdc (uint8_t *i_pArg)
{
break;
}
- // If we are able to send ffdc, turn off asny ffdc bit
+ // If we are able to send ffdc, turn off async ffdc bit
(void)SbeRegAccess::theSbeRegAccess().updateAsyncFFDCBit(false);
+ SBE_GLOBAL->asyncFfdcRC = FAPI2_RC_SUCCESS;
}while(0);
diff --git a/src/sbefw/sbeglobals.H b/src/sbefw/sbeglobals.H
index 176d2720..ececd9fd 100644
--- a/src/sbefw/sbeglobals.H
+++ b/src/sbefw/sbeglobals.H
@@ -88,6 +88,10 @@ class SBEGlobalsSingleton
sbeStashMemoryPair_t sbeKeyAddrPair;
// SBE FW security enabled; 0 - disabled; 1 - enabled
uint8_t sbeFWSecurityEnabled;
+ // Instance of Master EX core to be used for DMT FFDC collection
+ uint8_t deadmanCore;
+ // Cached HWP Return Code that hints at what HWP FFDC to collect
+ uint32_t asyncFfdcRC;
// SBE commit id
static uint32_t fwCommitId;
diff --git a/src/sbefw/sberegaccess.H b/src/sbefw/sberegaccess.H
index 49262375..9e47200a 100644
--- a/src/sbefw/sberegaccess.H
+++ b/src/sbefw/sberegaccess.H
@@ -198,6 +198,18 @@ class SbeRegAccess
}
/**
+ * @brief Get the SBE previous State
+ *
+ * @return SBE previous State, sbeState enum
+ *
+ */
+ uint64_t getSbePrevState() const
+ {
+ return iv_prevState;
+ }
+
+
+ /**
* @brief Get the SBE major istep number
*
* @return SBE current major istep number
OpenPOWER on IntegriCloud