summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorDoug Gilbert <dgilbert@us.ibm.com>2015-03-03 16:00:29 -0600
committerA. Patrick Williams III <iawillia@us.ibm.com>2015-03-10 22:47:21 -0500
commit2a1d6e44f9dee4b665978be5e0eb0ef47a9e0be0 (patch)
treef8ecd1df6a7105b9aa6985f51585f65c677f37a8 /src
parent9f5ce5b51abea9ac2fc04fb231e03d619f942696 (diff)
downloadtalos-hostboot-2a1d6e44f9dee4b665978be5e0eb0ef47a9e0be0.tar.gz
talos-hostboot-2a1d6e44f9dee4b665978be5e0eb0ef47a9e0be0.zip
HTMGT add attempt to reset OCC when OCC Activate fails
Change-Id: I964d2b68216c3ddabae73ce3b851bbc468ec96a7 RTC: 123180 Reviewed-on: http://gfw160.aus.stglabs.ibm.com:8080/gerrit/16145 Reviewed-by: Christopher Cain <cjcain@us.ibm.com> Tested-by: Jenkins Server Reviewed-by: Matt Spinler <spinler@us.ibm.com> Reviewed-by: Daniel M. Crowell <dcrowell@us.ibm.com> Reviewed-by: A. Patrick Williams III <iawillia@us.ibm.com>
Diffstat (limited to 'src')
-rw-r--r--src/include/usr/htmgt/htmgt_reasoncodes.H1
-rw-r--r--src/usr/htmgt/htmgt.C123
-rw-r--r--src/usr/htmgt/htmgt_activate.C9
-rw-r--r--src/usr/htmgt/htmgt_occ.C34
-rw-r--r--src/usr/htmgt/htmgt_occ.H1
5 files changed, 118 insertions, 50 deletions
diff --git a/src/include/usr/htmgt/htmgt_reasoncodes.H b/src/include/usr/htmgt/htmgt_reasoncodes.H
index ade192d13..6fe269de8 100644
--- a/src/include/usr/htmgt/htmgt_reasoncodes.H
+++ b/src/include/usr/htmgt/htmgt_reasoncodes.H
@@ -48,6 +48,7 @@ namespace HTMGT
HTMGT_MOD_CHECK_OCC_RSP = 0x92,
HTMGT_MOD_PARSE_OCC_RSP = 0x94,
HTMGT_MOD_HANLDE_OCC_EXCEPTION = 0xE0,
+ HTMGT_MOD_ENABLE_OCC_ACTUATION = 0xE1,
};
enum htmgtReasonCode
diff --git a/src/usr/htmgt/htmgt.C b/src/usr/htmgt/htmgt.C
index aff25008a..a2f556f94 100644
--- a/src/usr/htmgt/htmgt.C
+++ b/src/usr/htmgt/htmgt.C
@@ -184,42 +184,30 @@ namespace HTMGT
if (NULL != l_err)
{
- TMGT_ERR("OCCs not all active. System will stay in safe mode");
+ TMGT_ERR("OCCs not all active. Attempting OCC Reset");
TMGT_CONSOLE("OCCs are not active (rc=0x%04X). "
- "System will remain in safe mode",
+ "Attempting OCC Reset",
l_err->reasonCode());
- TMGT_INF("Calling HBOCC::stopAllOCCs");
- errlHndl_t err2 = HBOCC::stopAllOCCs();
+ TMGT_INF("Calling resetOccs");
+ errlHndl_t err2 = OccManager::resetOccs(NULL);
if(err2)
{
- TMGT_ERR("stopAllOCCs() failed with 0x%04X",
+ TMGT_ERR("OccManager:;resetOccs failed with 0x%04X",
err2->reasonCode());
- ERRORLOG::errlCommit(err2, HTMGT_COMP_ID);
- }
-
- // Update error log to unrecoverable and set SRC
- // to indicate the system will remain in safe mode
- /*@
- * @errortype
- * @reasoncode HTMGT_RC_OCC_CRIT_FAILURE
- * @moduleid HTMGT_MOD_LOAD_START_STATUS
- * @userdata1[0:7] load/start completed
- * @devdesc OCCs did not all reach active state,
- * system will be in Safe Mode
- */
- bldErrLog(l_err, HTMGT_MOD_LOAD_START_STATUS,
- HTMGT_RC_OCC_CRIT_FAILURE,
- i_startCompleted, 0, 0, 1,
- ERRORLOG::ERRL_SEV_UNRECOVERABLE);
- // Add level 2 support callout
- l_err->addProcedureCallout(HWAS::EPUB_PRC_LVL_SUPP,
- HWAS::SRCI_PRIORITY_MED);
- // Add HB firmware callout
- l_err->addProcedureCallout(HWAS::EPUB_PRC_HB_CODE,
- HWAS::SRCI_PRIORITY_MED);
+ // Set original error log as unrecoverable and commit
+ l_err->setSev(ERRORLOG::ERRL_SEV_UNRECOVERABLE);
+ ERRORLOG::errlCommit(l_err, HTMGT_COMP_ID);
- ERRORLOG::errlCommit(l_err, HTMGT_COMP_ID);
+ // Commit occReset error
+ ERRORLOG::errlCommit(err2, HTMGT_COMP_ID);
+ }
+ else
+ {
+ // retry worked - commit original error as informational
+ l_err->setSev(ERRORLOG::ERRL_SEV_INFORMATIONAL);
+ ERRORLOG::errlCommit(l_err, HTMGT_COMP_ID);
+ }
}
} // end processOccStartStatus()
@@ -229,6 +217,19 @@ namespace HTMGT
// Notify HTMGT that an OCC has an error to report
void processOccError(TARGETING::Target * i_procTarget)
{
+ TARGETING::Target* sys = NULL;
+ TARGETING::targetService().getTopLevelTarget(sys);
+ uint8_t safeMode = 0;
+
+ // If the system is in safemode then can't talk to OCCs -
+ // ignore call to processOccError
+ if(sys &&
+ sys->tryGetAttr<TARGETING::ATTR_HTMGT_SAFEMODE>(safeMode) &&
+ safeMode)
+ {
+ return;
+ }
+
bool polledOneOcc = false;
OccManager::buildOccs();
@@ -347,29 +348,57 @@ namespace HTMGT
// Set the OCC state
errlHndl_t enableOccActuation(bool i_occActivation)
{
- occStateId targetState = OCC_STATE_ACTIVE;
- if (false == i_occActivation)
- {
- targetState = OCC_STATE_OBSERVATION;
- }
+ errlHndl_t l_err = NULL;
+ TARGETING::Target* sys = NULL;
+
+ TARGETING::targetService().getTopLevelTarget(sys);
+ uint8_t safeMode = 0;
- // Set state for all OCCs
- errlHndl_t l_err = OccManager::setOccState(targetState);
- if (NULL == l_err)
+ // If the system is in safemode then can't talk to OCCs -
+ // ignore call to enableOccActuation
+ if(sys &&
+ sys->tryGetAttr<TARGETING::ATTR_HTMGT_SAFEMODE>(safeMode) &&
+ safeMode)
{
- TMGT_INF("enableOccActuation: OCC states updated to 0x%02X",
- targetState);
+ /*@
+ * @errortype
+ * @reasoncode HTMGT_RC_OCC_CRIT_FAILURE
+ * @moduleid HTMGT_MOD_ENABLE_OCC_ACTUATION
+ * @userdata1[0:7] OCC activate [1==true][0==false]
+ * @devdesc Invalid operation when OCCs are in safemode
+ */
+ bldErrLog(l_err,
+ HTMGT_MOD_ENABLE_OCC_ACTUATION,
+ HTMGT_RC_OCC_CRIT_FAILURE,
+ i_occActivation, 0, 0, 1,
+ ERRORLOG::ERRL_SEV_UNRECOVERABLE);
}
-
- if (OccManager::occNeedsReset())
+ else
{
- TMGT_ERR("enableOccActuation(): OCCs need to be reset");
- // Don't pass failed target as OCC should have already
- // been marked as failed during the poll.
- errlHndl_t err2 = OccManager::resetOccs(NULL);
- if(err2)
+ occStateId targetState = OCC_STATE_ACTIVE;
+ if (false == i_occActivation)
{
- ERRORLOG::errlCommit(err2, HTMGT_COMP_ID);
+ targetState = OCC_STATE_OBSERVATION;
+ }
+
+ // Set state for all OCCs
+ l_err = OccManager::setOccState(targetState);
+ if (NULL == l_err)
+ {
+ TMGT_INF("enableOccActuation: OCC states updated to 0x%02X",
+ targetState);
+ }
+
+ if (OccManager::occNeedsReset())
+ {
+ TMGT_ERR("enableOccActuation(): OCCs need to be reset");
+ // Don't pass failed target as OCC should have already
+ // been marked as failed during the poll.
+ errlHndl_t err2 = OccManager::resetOccs(NULL);
+ if(err2)
+ {
+ ERRORLOG::errlCommit(err2, HTMGT_COMP_ID);
+ }
}
}
diff --git a/src/usr/htmgt/htmgt_activate.C b/src/usr/htmgt/htmgt_activate.C
index 7f54d6dbe..4cb46f033 100644
--- a/src/usr/htmgt/htmgt_activate.C
+++ b/src/usr/htmgt/htmgt_activate.C
@@ -39,6 +39,7 @@
#include <ipmi/ipmisensor.H>
#include <sys/time.h>
+#include <console/consoleif.H>
using namespace TARGETING;
@@ -163,6 +164,14 @@ namespace HTMGT
l_err = occ->ipmiSensor(i_activate);
if( l_err )
{
+ TMGT_ERR("setOccActiveSensors failed. (OCC%d state:%d)",
+ occ->getInstance(),
+ i_activate);
+
+ TMGT_CONSOLE("setOccActiveSensors failed. (OCC%d state:%d)",
+ occ->getInstance(),
+ i_activate);
+
ERRORLOG::errlCommit(l_err, HTMGT_COMP_ID);
}
}
diff --git a/src/usr/htmgt/htmgt_occ.C b/src/usr/htmgt/htmgt_occ.C
index 8a539f446..6bbc0ae57 100644
--- a/src/usr/htmgt/htmgt_occ.C
+++ b/src/usr/htmgt/htmgt_occ.C
@@ -248,7 +248,8 @@ namespace HTMGT
OccManager::OccManager()
:iv_occMaster(NULL),
iv_state(OCC_STATE_UNKNOWN),
- iv_targetState(OCC_STATE_ACTIVE)
+ iv_targetState(OCC_STATE_ACTIVE),
+ iv_resetCount(0)
{
}
@@ -605,6 +606,21 @@ namespace HTMGT
}
}
+ if(false == _occNeedsReset())
+ {
+ // No occ target needs reset - increment system reset count
+ ++iv_resetCount;
+
+ TMGT_INF("resetOCCs: Incrementing system OCC reset count to %d",
+ iv_resetCount);
+
+ if(iv_resetCount > OCC_RESET_COUNT_THRESHOLD)
+ {
+ atThreshold = true;
+ }
+
+ }
+
uint64_t retryCount = OCC_RESET_COUNT_THRESHOLD;
while(retryCount)
{
@@ -663,7 +679,7 @@ namespace HTMGT
*/
bldErrLog(err,
HTMTG_MOD_OCC_RESET,
- HTMGT_RC_OCC_RESET_THREHOLD,
+ HTMGT_RC_OCC_CRIT_FAILURE,
0, 0, 0, 0,
ERRORLOG::ERRL_SEV_UNRECOVERABLE);
}
@@ -673,6 +689,13 @@ namespace HTMGT
{
err->setSev(ERRORLOG::ERRL_SEV_UNRECOVERABLE);
+ // Add level 2 support callout
+ err->addProcedureCallout(HWAS::EPUB_PRC_LVL_SUPP,
+ HWAS::SRCI_PRIORITY_MED);
+ // Add HB firmware callout
+ err->addProcedureCallout(HWAS::EPUB_PRC_HB_CODE,
+ HWAS::SRCI_PRIORITY_MED);
+
TARGETING::Target* sys = NULL;
TARGETING::targetService().getTopLevelTarget(sys);
uint8_t safeMode = 1;
@@ -683,8 +706,13 @@ namespace HTMGT
sys->setAttr<TARGETING::ATTR_HTMGT_SAFEMODE>(safeMode);
}
- TMGT_ERR("_resetOccs: Safe Mode RC: 0x%04X (OCC%d)",
+ TMGT_ERR("_resetOccs: Safe Mode (RC: 0x%04X OCC%d)",
cv_safeReturnCode, cv_safeOccInstance);
+
+ TMGT_CONSOLE("OCCs are not active. The system will remain in "
+ "safe mode (RC: 0x%04x for OCC%d)",
+ cv_safeReturnCode,
+ cv_safeOccInstance);
}
return err;
diff --git a/src/usr/htmgt/htmgt_occ.H b/src/usr/htmgt/htmgt_occ.H
index dec19b883..5ac545ab7 100644
--- a/src/usr/htmgt/htmgt_occ.H
+++ b/src/usr/htmgt/htmgt_occ.H
@@ -507,6 +507,7 @@ namespace HTMGT
occList_t iv_occArray;
occStateId iv_state;
occStateId iv_targetState;
+ uint8_t iv_resetCount;
/**
* @brief SRC that caused system to enter safe mode
OpenPOWER on IntegriCloud