summaryrefslogtreecommitdiffstats
path: root/src/usr/htmgt/htmgt_occ.C
diff options
context:
space:
mode:
Diffstat (limited to 'src/usr/htmgt/htmgt_occ.C')
-rw-r--r--src/usr/htmgt/htmgt_occ.C530
1 files changed, 310 insertions, 220 deletions
diff --git a/src/usr/htmgt/htmgt_occ.C b/src/usr/htmgt/htmgt_occ.C
index 496c7864b..5534a8c58 100644
--- a/src/usr/htmgt/htmgt_occ.C
+++ b/src/usr/htmgt/htmgt_occ.C
@@ -281,8 +281,10 @@ namespace HTMGT
// Query the functional OCCs and build OCC objects
- uint32_t OccManager::_buildOccs()
+ errlHndl_t OccManager::_buildOccs()
{
+ errlHndl_t err = NULL;
+ bool safeModeNeeded = false;
TMGT_INF("buildOccs called");
// Only build OCC objects once.
@@ -290,7 +292,7 @@ namespace HTMGT
{
TMGT_INF("buildOccs: Existing OCC Targets kept = %d",
iv_occArray.size());
- return iv_occArray.size();
+ return err;
}
// Remove existing OCC objects
@@ -338,36 +340,105 @@ namespace HTMGT
}
#endif
- // Get functional OCC (one per proc)
- TARGETING::TargetHandleList pOccs;
- getChildChiplets(pOccs, *proc, TARGETING::TYPE_OCC);
- if (pOccs.size() > 0)
+ if ((NULL != homer) && (NULL != homerPhys))
{
- const unsigned long huid =
- pOccs[0]->getAttr<TARGETING::ATTR_HUID>();
- const bool masterCapable =
- pOccs[0]->getAttr<TARGETING::ATTR_OCC_MASTER_CAPABLE>();
-
- TMGT_INF("Found OCC %d - HUID: 0x%0lX, masterCapable: %c,"
- " homer: 0x%0lX",
- instance, huid, masterCapable?'Y':'N', homer);
- _addOcc(instance, masterCapable, homer, pOccs[0]);
+ // Get functional OCC (one per proc)
+ TARGETING::TargetHandleList pOccs;
+ getChildChiplets(pOccs, *proc, TARGETING::TYPE_OCC);
+ if (pOccs.size() > 0)
+ {
+ const unsigned long huid =
+ pOccs[0]->getAttr<TARGETING::ATTR_HUID>();
+ const bool masterCapable =
+ pOccs[0]->
+ getAttr<TARGETING::ATTR_OCC_MASTER_CAPABLE>();
+
+ TMGT_INF("Found OCC%d - HUID: 0x%0lX, masterCapable:"
+ " %c, homer: 0x%0lX",
+ instance, huid, masterCapable?'Y':'N', homer);
+ _addOcc(instance, masterCapable, homer, pOccs[0]);
+ }
+ else
+ {
+ // OCC must not be functional
+ TMGT_ERR("OCC%d not functional", instance);
+ }
}
else
{
- // OCC must not be functional
- TMGT_ERR("OCC%d not functional", instance);
+ // OCC will not be functional with no HOMER address
+ TMGT_ERR("HOMER address for OCC%d is NULL!", instance);
+ safeModeNeeded = true;
+ if (NULL == err)
+ {
+ /*@
+ * @errortype
+ * @moduleid HTMGT_MOD_BUILD_OCCS
+ * @reasoncode HTMGT_RC_OCC_CRIT_FAILURE
+ * @devdesc Homer pointer is NULL, unable to communicate
+ * with the OCCs. Leaving system in safe mode.
+ */
+ bldErrLog(err,
+ HTMGT_MOD_BUILD_OCCS,
+ HTMGT_RC_OCC_CRIT_FAILURE,
+ 0, 0, 0, 0,
+ ERRORLOG::ERRL_SEV_UNRECOVERABLE);
+ }
}
- }
+ } // for each processor
}
else
{
TMGT_ERR("No functional processors found");
}
+ if (0 == _getNumOccs())
+ {
+ TMGT_ERR("Unable to find any functional OCCs");
+ if (NULL == err)
+ {
+ /*@
+ * @errortype
+ * @reasoncode HTMGT_RC_OCC_UNAVAILABLE
+ * @moduleid HTMGT_MOD_BUILD_OCCS
+ * @userdata1 functional processor count
+ * @devdesc No functional OCCs were found
+ */
+ bldErrLog(err, HTMGT_MOD_BUILD_OCCS,
+ HTMGT_RC_OCC_UNAVAILABLE,
+ 0, pProcs.size(), 0, 0,
+ ERRORLOG::ERRL_SEV_UNRECOVERABLE);
+ }
+ safeModeNeeded = true;
+ }
+
+ if (safeModeNeeded)
+ {
+ // Clear OCC active sensors
+ errlHndl_t err2 = setOccActiveSensors(false);
+ if (err2)
+ {
+ TMGT_ERR("_buildOccs: Set OCC active sensor to false failed.");
+ ERRORLOG::errlCommit(err2, HTMGT_COMP_ID);
+ }
+
+ // Reset all OCCs
+ TMGT_INF("Calling HBOCC::stopAllOCCs");
+ err2 = HBOCC::stopAllOCCs();
+ if (NULL != err2)
+ {
+ TMGT_ERR("_buildOccs: stopAllOCCs failed with rc 0x%04X",
+ err2->reasonCode());
+ err2->collectTrace("HTMGT");
+ ERRORLOG::errlCommit(err2, HTMGT_COMP_ID);
+ }
+
+ updateForSafeMode(err);
+ }
+
TMGT_INF("buildOccs: OCC Targets found = %d", _getNumOccs());
- return _getNumOccs();
+ return err;
} // end OccManager::_buildOccs()
@@ -433,118 +504,122 @@ namespace HTMGT
// requests a new state, so we can update target here.
iv_targetState = requestedState;
- _buildOccs(); // if not already built.
-
- // Send poll cmd to confirm comm has been established.
- // Flush old errors to ensure any new errors will be collected
- l_err = _sendOccPoll(true, NULL);
- if (l_err)
- {
- TMGT_ERR("_setOccState: Poll OCCs failed.");
- // Proceed with reset even if failed
- ERRORLOG::errlCommit(l_err, HTMGT_COMP_ID);
- }
-
- if (NULL != iv_occMaster)
+ l_err = _buildOccs(); // if not already built.
+ if (NULL == l_err)
{
- TMGT_INF("_setOccState(state=0x%02X)", requestedState);
+ // Send poll cmd to confirm comm has been established.
+ // Flush old errors to ensure any new errors will be collected
+ l_err = _sendOccPoll(true, NULL);
+ if (l_err)
+ {
+ TMGT_ERR("_setOccState: Poll OCCs failed.");
+ // Proceed with reset even if failed
+ ERRORLOG::errlCommit(l_err, HTMGT_COMP_ID);
+ }
- const uint8_t occInstance = iv_occMaster->getInstance();
- bool needsRetry = false;
- do
+ if (NULL != iv_occMaster)
{
- l_err = iv_occMaster->setState(requestedState);
- if (NULL == l_err)
- {
- needsRetry = false;
- }
- else
+ TMGT_INF("_setOccState(state=0x%02X)", requestedState);
+
+ const uint8_t occInstance = iv_occMaster->getInstance();
+ bool needsRetry = false;
+ do
{
- TMGT_ERR("_setOccState: Failed to set OCC%d state,"
- " rc=0x%04X",
- occInstance, l_err->reasonCode());
- if (false == needsRetry)
+ l_err = iv_occMaster->setState(requestedState);
+ if (NULL == l_err)
{
- ERRORLOG::errlCommit(l_err, HTMGT_COMP_ID);
- needsRetry = true;
+ needsRetry = false;
}
else
{
- // Only one retry, return error handle
- needsRetry = false;
+ TMGT_ERR("_setOccState: Failed to set OCC%d state,"
+ " rc=0x%04X",
+ occInstance, l_err->reasonCode());
+ if (false == needsRetry)
+ {
+ ERRORLOG::errlCommit(l_err, HTMGT_COMP_ID);
+ needsRetry = true;
+ }
+ else
+ {
+ // Only one retry, return error handle
+ needsRetry = false;
+ }
}
}
+ while (needsRetry);
}
- while (needsRetry);
- }
- else
- {
- /*@
- * @errortype
- * @moduleid HTMGT_MOD_OCCMGR_SET_STATE
- * @reasoncode HTMGT_RC_INTERNAL_ERROR
- * @devdesc Unable to set state of master OCC
- */
- bldErrLog(l_err, HTMGT_MOD_OCCMGR_SET_STATE,
- HTMGT_RC_INTERNAL_ERROR,
- 0, 0, 0, 0,
- ERRORLOG::ERRL_SEV_INFORMATIONAL);
- }
-
- if (NULL == l_err)
- {
- // Send poll to query state of all OCCs
- // and flush any errors reported by the OCCs
- l_err = sendOccPoll(true);
- if (l_err)
+ else
{
- TMGT_ERR("_setOccState: Poll all OCCs failed");
- ERRORLOG::errlCommit(l_err, HTMGT_COMP_ID);
+ /*@
+ * @errortype
+ * @moduleid HTMGT_MOD_OCCMGR_SET_STATE
+ * @reasoncode HTMGT_RC_INTERNAL_ERROR
+ * @devdesc Unable to set state of master OCC
+ */
+ bldErrLog(l_err, HTMGT_MOD_OCCMGR_SET_STATE,
+ HTMGT_RC_INTERNAL_ERROR,
+ 0, 0, 0, 0,
+ ERRORLOG::ERRL_SEV_INFORMATIONAL);
}
- // Make sure all OCCs went to active state
- for (std::vector<Occ*>::iterator pOcc = iv_occArray.begin();
- pOcc < iv_occArray.end();
- pOcc++)
+ if (NULL == l_err)
{
- if (requestedState != (*pOcc)->getState())
+ // Send poll to query state of all OCCs
+ // and flush any errors reported by the OCCs
+ l_err = sendOccPoll(true);
+ if (l_err)
{
- TMGT_ERR("_setOccState: OCC%d is not in 0x%02X state",
- (*pOcc)->getInstance(), requestedState);
- /*@
- * @errortype
- * @moduleid HTMGT_MOD_OCCMGR_SET_STATE
- * @reasoncode HTMGT_RC_OCC_UNEXPECTED_STATE
- * @userdata1[0-15] requested state
- * @userdata1[16-31] OCC state
- * @userdata2[0-15] OCC instance
- * @devdesc OCC did not change to requested state
- */
- bldErrLog(l_err, HTMGT_MOD_OCCMGR_SET_STATE,
- HTMGT_RC_OCC_UNEXPECTED_STATE,
- requestedState, (*pOcc)->getState(),
- (*pOcc)->getInstance(), 0,
- ERRORLOG::ERRL_SEV_INFORMATIONAL);
- break;
+ TMGT_ERR("_setOccState: Poll all OCCs failed");
+ ERRORLOG::errlCommit(l_err, HTMGT_COMP_ID);
}
- }
- if (NULL == l_err)
- {
- TMGT_INF("_setOccState: All OCCs have reached state 0x%02X",
- requestedState);
-
- if (OCC_STATE_ACTIVE == requestedState)
+ // Make sure all OCCs went to active state
+ for (std::vector<Occ*>::iterator pOcc = iv_occArray.begin();
+ pOcc < iv_occArray.end();
+ pOcc++)
{
- TMGT_CONSOLE("OCCs are now running in ACTIVE state");
+ if (requestedState != (*pOcc)->getState())
+ {
+ TMGT_ERR("_setOccState: OCC%d is not in 0x%02X "
+ "state",
+ (*pOcc)->getInstance(), requestedState);
+ /*@
+ * @errortype
+ * @moduleid HTMGT_MOD_OCCMGR_SET_STATE
+ * @reasoncode HTMGT_RC_OCC_UNEXPECTED_STATE
+ * @userdata1[0-15] requested state
+ * @userdata1[16-31] OCC state
+ * @userdata2[0-15] OCC instance
+ * @devdesc OCC did not change to requested state
+ */
+ bldErrLog(l_err, HTMGT_MOD_OCCMGR_SET_STATE,
+ HTMGT_RC_OCC_UNEXPECTED_STATE,
+ requestedState, (*pOcc)->getState(),
+ (*pOcc)->getInstance(), 0,
+ ERRORLOG::ERRL_SEV_INFORMATIONAL);
+ break;
+ }
}
- else
+
+ if (NULL == l_err)
{
- TMGT_CONSOLE("OCCs are now running in OBSERVATION "
- "state");
+ TMGT_INF("_setOccState: All OCCs have reached state "
+ "0x%02X", requestedState);
+
+ if (OCC_STATE_ACTIVE == requestedState)
+ {
+ TMGT_CONSOLE("OCCs are now running in ACTIVE "
+ "state");
+ }
+ else
+ {
+ TMGT_CONSOLE("OCCs are now running in OBSERVATION "
+ "state");
+ }
}
- }
+ }
}
}
else
@@ -568,155 +643,170 @@ namespace HTMGT
} // end OccManager::_setOccState()
+
errlHndl_t OccManager::_resetOccs(TARGETING::Target * i_failedOccTarget)
{
errlHndl_t err = NULL;
bool atThreshold = false;
- _buildOccs(); // if not a already built.
- err = setOccActiveSensors(false); // Set OCC sensor to inactive
- if( err )
- {
- TMGT_ERR("_resetOccs: Set OCC sensors to inactive failed.");
- // log and continue
- ERRORLOG::errlCommit(err, HTMGT_COMP_ID);
- }
-
- // Send poll cmd to all OCCs to establish comm
- err = _sendOccPoll(false,NULL);
- if (err)
- {
- TMGT_ERR("_resetOccs: Poll OCCs failed.");
- // Proceed with reset even if failed
- ERRORLOG::errlCommit(err, HTMGT_COMP_ID);
- }
-
- for(occList_t::const_iterator occ = iv_occArray.begin();
- occ != iv_occArray.end();
- ++occ)
+ err = _buildOccs(); // if not a already built.
+ if (NULL == err)
{
- if((*occ)->getTarget() == i_failedOccTarget)
+ err = setOccActiveSensors(false); // Set OCC sensor to inactive
+ if( err )
{
- (*occ)->failed(true);
+ TMGT_ERR("_resetOccs: Set OCC sensors to inactive failed.");
+ // log and continue
+ ERRORLOG::errlCommit(err, HTMGT_COMP_ID);
}
- if((*occ)->resetPrep())
+ // Send poll cmd to all OCCs to establish comm
+ err = _sendOccPoll(false,NULL);
+ if (err)
{
- atThreshold = true;
+ TMGT_ERR("_resetOccs: Poll OCCs failed.");
+ // Proceed with reset even if failed
+ ERRORLOG::errlCommit(err, HTMGT_COMP_ID);
}
- }
- if(false == _occNeedsReset())
- {
- // No occ target needs reset - increment system reset count
- ++iv_resetCount;
+ for(occList_t::const_iterator occ = iv_occArray.begin();
+ occ != iv_occArray.end();
+ ++occ)
+ {
+ if((*occ)->getTarget() == i_failedOccTarget)
+ {
+ (*occ)->failed(true);
+ }
- TMGT_INF("resetOCCs: Incrementing system OCC reset count to %d",
- iv_resetCount);
+ if((*occ)->resetPrep())
+ {
+ atThreshold = true;
+ }
+ }
- if(iv_resetCount > OCC_RESET_COUNT_THRESHOLD)
+ if(false == _occNeedsReset())
{
- atThreshold = true;
- }
+ // No occ target needs reset - increment system reset count
+ ++iv_resetCount;
- }
+ TMGT_INF("resetOCCs: Incrementing system OCC reset count to %d",
+ iv_resetCount);
+
+ if(iv_resetCount > OCC_RESET_COUNT_THRESHOLD)
+ {
+ atThreshold = true;
+ }
- uint64_t retryCount = OCC_RESET_COUNT_THRESHOLD;
- while(retryCount)
- {
- // Reset all OCCs
- TMGT_INF("Calling HBOCC::stopAllOCCs");
- err = HBOCC::stopAllOCCs();
- if(!err)
- {
- break;
}
- --retryCount;
- if(retryCount)
+ uint64_t retryCount = OCC_RESET_COUNT_THRESHOLD;
+ while(retryCount)
{
- // log if not last retry
- ERRORLOG::errlCommit(err, HTMGT_COMP_ID);
+ // Reset all OCCs
+ TMGT_INF("Calling HBOCC::stopAllOCCs");
+ err = HBOCC::stopAllOCCs();
+ if(!err)
+ {
+ break;
+ }
+ --retryCount;
+
+ if(retryCount)
+ {
+ // log if not last retry
+ ERRORLOG::errlCommit(err, HTMGT_COMP_ID);
+ }
+ else
+ {
+ TMGT_ERR("_resetOCCs: stopAllOCCs failed. "
+ "Leaving OCCs in reset state");
+ // pass err handle back
+ err->collectTrace("HTMGT");
+ }
}
- else
+
+ if(!atThreshold && !err)
{
- TMGT_ERR("_resetOCCs: stopAllOCCs failed. "
- "Leaving OCCs in reset state");
- // pass err handle back
- err->collectTrace("HTMGT");
- }
- }
+ for(occList_t::const_iterator occ = iv_occArray.begin();
+ occ != iv_occArray.end();
+ ++occ)
+ {
+ // After OCCs have been reset, clear flags
+ (*occ)->postResetClear();
+ }
- if(!atThreshold && !err)
- {
- for(occList_t::const_iterator occ = iv_occArray.begin();
- occ != iv_occArray.end();
- ++occ)
+ TMGT_INF("Calling HBOCC::activateOCCs");
+ err = HBOCC::activateOCCs();
+ if(err)
+ {
+ TMGT_ERR("_resetOCCs: activateOCCs failed. ");
+ err->collectTrace("HTMGT");
+ }
+ }
+ else if (!err) // Reset Threshold reached and no other err
{
- // After OCCs have been reset, clear flags
- (*occ)->postResetClear();
+ // Create threshold error
+ TMGT_ERR("_resetOCCs: Retry Threshold reached. "
+ "Leaving OCCs in reset state");
+ /*@
+ * @errortype
+ * @moduleid HTMGT_MOD_OCC_RESET
+ * @reasoncode HTMGT_RC_OCC_RESET_THREHOLD
+ * @devdesc OCC reset threshold reached.
+ * Leaving OCCs in reset state
+ */
+ bldErrLog(err,
+ HTMGT_MOD_OCC_RESET,
+ HTMGT_RC_OCC_CRIT_FAILURE,
+ 0, 0, 0, 0,
+ ERRORLOG::ERRL_SEV_UNRECOVERABLE);
}
- TMGT_INF("Calling HBOCC::activateOCCs");
- err = HBOCC::activateOCCs();
+ // Any error at this point means OCCs were not reactivated
if(err)
{
- TMGT_ERR("_resetOCCs: activateOCCs failed. ");
- err->collectTrace("HTMGT");
+ updateForSafeMode(err);
}
}
- else if (!err) // Reset Threshold reached and no other err
- {
- // Create threshold error
- TMGT_ERR("_resetOCCs: Retry Threshold reached. "
- "Leaving OCCs in reset state");
- /*@
- * @errortype
- * @moduleid HTMTG_MOD_OCC_RESET
- * @reasoncode HTMGT_RC_OCC_RESET_THREHOLD
- * @devdesc OCC reset threshold reached.
- * Leaving OCCs in reset state
- */
- bldErrLog(err,
- HTMTG_MOD_OCC_RESET,
- HTMGT_RC_OCC_CRIT_FAILURE,
- 0, 0, 0, 0,
- ERRORLOG::ERRL_SEV_UNRECOVERABLE);
- }
- // Any error at this point means OCCs were not reactivated
- if(err)
- {
- err->setSev(ERRORLOG::ERRL_SEV_UNRECOVERABLE);
+ return err;
- // Add level 2 support callout
- err->addProcedureCallout(HWAS::EPUB_PRC_LVL_SUPP,
- HWAS::SRCI_PRIORITY_MED);
- // Add HB firmware callout
- err->addProcedureCallout(HWAS::EPUB_PRC_HB_CODE,
- HWAS::SRCI_PRIORITY_MED);
+ } // end OccManager::_resetOccs()
- TARGETING::Target* sys = NULL;
- TARGETING::targetService().getTopLevelTarget(sys);
- uint8_t safeMode = 1;
- // Put into safemode
- if(sys)
- {
- sys->setAttr<TARGETING::ATTR_HTMGT_SAFEMODE>(safeMode);
- }
+ void OccManager::updateForSafeMode(errlHndl_t & io_err)
+ {
+ io_err->setSev(ERRORLOG::ERRL_SEV_UNRECOVERABLE);
+
+ // Add level 2 support callout
+ io_err->addProcedureCallout(HWAS::EPUB_PRC_LVL_SUPP,
+ HWAS::SRCI_PRIORITY_MED);
+ // Add HB firmware callout
+ io_err->addProcedureCallout(HWAS::EPUB_PRC_HB_CODE,
+ HWAS::SRCI_PRIORITY_MED);
- TMGT_ERR("_resetOccs: Safe Mode (RC: 0x%04X OCC%d)",
- cv_safeReturnCode, cv_safeOccInstance);
+ TARGETING::Target* sys = NULL;
+ TARGETING::targetService().getTopLevelTarget(sys);
+ const uint8_t safeMode = 1;
- TMGT_CONSOLE("OCCs are not active. The system will remain in "
- "safe mode (RC: 0x%04x for OCC%d)",
- cv_safeReturnCode,
- cv_safeOccInstance);
+ // Put into safemode
+ if(sys)
+ {
+ sys->setAttr<TARGETING::ATTR_HTMGT_SAFEMODE>(safeMode);
}
- return err;
- }
+ _updateSafeModeReason(io_err->reasonCode(), 0);
+
+ TMGT_ERR("updateForSafeMode: Safe Mode (RC: 0x%04X OCC%d)",
+ cv_safeReturnCode, cv_safeOccInstance);
+
+ TMGT_CONSOLE("OCCs are not active. The system will remain in "
+ "safe mode (RC: 0x%04x for OCC%d)",
+ cv_safeReturnCode,
+ cv_safeOccInstance);
+
+ } // end OccManager::updateForSafeMode()
+
// Wait for all OCCs to reach communications checkpoint
void OccManager::_waitForOccCheckpoint()
@@ -842,7 +932,7 @@ namespace HTMGT
}
- uint32_t OccManager::buildOccs()
+ errlHndl_t OccManager::buildOccs()
{
return Singleton<OccManager>::instance()._buildOccs();
}
OpenPOWER on IntegriCloud