diff options
author | Chris Cain <cjcain@us.ibm.com> | 2015-02-24 16:13:42 -0600 |
---|---|---|
committer | A. Patrick Williams III <iawillia@us.ibm.com> | 2015-02-28 05:53:20 -0600 |
commit | f3d348bde5bd0fbd9a707fd1635bcb3a90d9210d (patch) | |
tree | 52961091c8a645eb99e3adc1d88d59d6687dc162 /src/usr/htmgt/htmgt.C | |
parent | 29581aca6a1ed02d3374e5688e5f32fcb6f104bc (diff) | |
download | talos-hostboot-f3d348bde5bd0fbd9a707fd1635bcb3a90d9210d.tar.gz talos-hostboot-f3d348bde5bd0fbd9a707fd1635bcb3a90d9210d.zip |
Support for OCC error reporting
Change-Id: If8cce2f960b28cda2f039f68e9527df92f9233f2
RTC: 121729
Reviewed-on: http://gfw160.aus.stglabs.ibm.com:8080/gerrit/15971
Tested-by: Jenkins Server
Reviewed-by: Daniel M. Crowell <dcrowell@us.ibm.com>
Reviewed-by: A. Patrick Williams III <iawillia@us.ibm.com>
Diffstat (limited to 'src/usr/htmgt/htmgt.C')
-rw-r--r-- | src/usr/htmgt/htmgt.C | 124 |
1 files changed, 75 insertions, 49 deletions
diff --git a/src/usr/htmgt/htmgt.C b/src/usr/htmgt/htmgt.C index 33d4f16c2..aff25008a 100644 --- a/src/usr/htmgt/htmgt.C +++ b/src/usr/htmgt/htmgt.C @@ -68,32 +68,27 @@ namespace HTMGT if (i_startCompleted) { // Query functional OCCs - const uint8_t numOccs = occMgr::instance().buildOccs(); + const uint8_t numOccs = OccManager::buildOccs(); if (numOccs > 0) { - if (NULL != occMgr::instance().getMasterOcc()) + if (NULL != OccManager::getMasterOcc()) { do { #ifndef __HOSTBOOT_RUNTIME - if (false == occMgr::instance().iv_configDataBuilt) + // Build pstate tables (once per IPL) + l_err = genPstateTables(); + if(l_err) { - // Build pstate tables (once per IPL) - l_err = genPstateTables(); - if(l_err) - { - break; - } - - // Calc memory throttles (once per IPL) - calcMemThrottles(); - - occMgr::instance().iv_configDataBuilt = true; + break; } + + // Calc memory throttles (once per IPL) + calcMemThrottles(); #endif // Make sure OCCs are ready for communication - occMgr::instance().waitForOccCheckpoint(); + OccManager::waitForOccCheckpoint(); #ifdef __HOSTBOOT_RUNTIME // TODO RTC 124738 Final solution TBD @@ -137,22 +132,6 @@ namespace HTMGT ERRORLOG::errlCommit(l_err, HTMGT_COMP_ID); } - // @TODO RTC 120059 remove after elog alerts supported -#ifdef CONFIG_DELAY_AFTER_OCC_ACTIVATION - // Delay to allow the OCC to complete several - // sensor readings and create errors if necessary - TMGT_INF("Delay after OCC activation"); - nanosleep(30, 0); - // Poll the OCCs to retrieve any errors that may - // have been created - TMGT_INF("Send final poll to all OCCs"); - l_err = OccManager::sendOccPoll(true); - if (l_err) - { - ERRORLOG::errlCommit(l_err, HTMGT_COMP_ID); - } -#endif - } while(0); } else @@ -206,13 +185,17 @@ namespace HTMGT if (NULL != l_err) { TMGT_ERR("OCCs not all active. System will stay in safe mode"); -#ifndef __HOSTBOOT_RUNTIME - CONSOLE::displayf(HTMGT_COMP_NAME, "OCCs are not active " - "(rc=0x%04X). System will remain in safe mode", - l_err->reasonCode()); -#endif - // TODO: RTC 109066 - //stopAllOccs(); + TMGT_CONSOLE("OCCs are not active (rc=0x%04X). " + "System will remain in safe mode", + l_err->reasonCode()); + TMGT_INF("Calling HBOCC::stopAllOCCs"); + errlHndl_t err2 = HBOCC::stopAllOCCs(); + if(err2) + { + TMGT_ERR("stopAllOCCs() failed with 0x%04X", + err2->reasonCode()); + ERRORLOG::errlCommit(err2, HTMGT_COMP_ID); + } // Update error log to unrecoverable and set SRC // to indicate the system will remain in safe mode @@ -244,21 +227,52 @@ namespace HTMGT // Notify HTMGT that an OCC has an error to report - void processOccError(TARGETING::Target * i_proc) + void processOccError(TARGETING::Target * i_procTarget) { - const uint32_t l_huid = i_proc->getAttr<TARGETING::ATTR_HUID>(); - TMGT_INF("processOccError(HUID=0x%08X) called", l_huid); + bool polledOneOcc = false; + OccManager::buildOccs(); - //TARGETING::Target * failedOccTarget = NULL; - // Get OCC target (one per proc) - TARGETING::TargetHandleList pOccs; - getChildChiplets(pOccs, i_proc, TARGETING::TYPE_OCC); - if (pOccs.size() > 0) + if (i_procTarget != NULL) + { + const uint32_t l_huid = + i_procTarget->getAttr<TARGETING::ATTR_HUID>(); + TMGT_INF("processOccError(HUID=0x%08X) called", l_huid); + + TARGETING::TargetHandleList pOccs; + getChildChiplets(pOccs, i_procTarget, TARGETING::TYPE_OCC); + if (pOccs.size() > 0) + { + // Poll specified OCC flushing any errors + errlHndl_t err = OccManager::sendOccPoll(true, pOccs[0]); + if (err) + { + ERRORLOG::errlCommit(err, HTMGT_COMP_ID); + } + polledOneOcc = true; + } + } + + if ((OccManager::getNumOccs() > 1) || (false == polledOneOcc)) { - // failedOccTarget = pOccs[0]; + // Send POLL command to all OCCs to flush any other errors + errlHndl_t err = OccManager::sendOccPoll(true); + if (err) + { + ERRORLOG::errlCommit(err, HTMGT_COMP_ID); + } } - // TODO RTC 109224 + if (OccManager::occNeedsReset()) + { + TMGT_ERR("processOccError(): OCCs need to be reset"); + // Don't pass failed target as OCC should have already + // been marked as failed during the poll. + errlHndl_t err = OccManager::resetOccs(NULL); + if(err) + { + ERRORLOG::errlCommit(err, HTMGT_COMP_ID); + } + } } // end processOccError() @@ -340,13 +354,25 @@ namespace HTMGT } // Set state for all OCCs - errlHndl_t l_err = occMgr::instance().setOccState(targetState); + errlHndl_t l_err = OccManager::setOccState(targetState); if (NULL == l_err) { TMGT_INF("enableOccActuation: OCC states updated to 0x%02X", targetState); } + if (OccManager::occNeedsReset()) + { + TMGT_ERR("enableOccActuation(): OCCs need to be reset"); + // Don't pass failed target as OCC should have already + // been marked as failed during the poll. + errlHndl_t err2 = OccManager::resetOccs(NULL); + if(err2) + { + ERRORLOG::errlCommit(err2, HTMGT_COMP_ID); + } + } + return l_err; } // end enableOccActuation() |