/* IBM_PROLOG_BEGIN_TAG */ /* This is an automatically generated prolog. */ /* */ /* $Source: src/usr/diag/mdia/mdiasm.C $ */ /* */ /* OpenPOWER HostBoot Project */ /* */ /* Contributors Listed Below - COPYRIGHT 2012,2017 */ /* [+] International Business Machines Corp. */ /* */ /* */ /* Licensed under the Apache License, Version 2.0 (the "License"); */ /* you may not use this file except in compliance with the License. */ /* You may obtain a copy of the License at */ /* */ /* http://www.apache.org/licenses/LICENSE-2.0 */ /* */ /* Unless required by applicable law or agreed to in writing, software */ /* distributed under the License is distributed on an "AS IS" BASIS, */ /* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */ /* implied. See the License for the specific language governing */ /* permissions and limitations under the License. */ /* */ /* IBM_PROLOG_END_TAG */ /** * @file mdiasm.C * @brief mdia state machine implementation */ #include "mdiasm.H" #include "mdiasmimpl.H" #include "mdiatrace.H" #include "mdiaworkitem.H" #include "mdiamonitor.H" #include #include #include #include #include #include #include #include #include #include #include #include using namespace TARGETING; using namespace ERRORLOG; using namespace std; using namespace DeviceFW; namespace MDIA { void StateMachine::running(bool & o_running) { mutex_lock(&iv_mutex); o_running = !(iv_done || iv_shutdown); mutex_unlock(&iv_mutex); } void addTimeoutFFDC(TargetHandle_t i_target, errlHndl_t & io_log) { const uint64_t mbaRegs[] = { MBA01_SPA, MBA01_SPA_MASK, MBA01_CMD_TYPE, MBA01_CMD_CONTROL, MBA01_CMD_STATUS, MBA01_MBMACAQ, MBA01_MBMEA, }; const uint64_t membufRegs[] = { MEM_SPA_FIR, MEM_SPA_FIR_MASK, }; const uint64_t mcsRegs[] = { MCI_FIR, MCI_FIR_MASK, MCI_FIR_ACT0, MCI_FIR_ACT1, MCS_MODE4, }; const uint64_t mcbRegs[] = { MCBIST_FIR, MCBIST_FIR_MASK, MCBIST_FIR_ACT0, MCBIST_FIR_ACT1, }; const uint64_t procRegs[] = { IPOLL_MASK, IPOLL_STATUS, GLOBAL_CS_FIR, GLOBAL_RE_FIR, GLOBAL_UCS_FIR, GLOBAL_HA_FIR, MC0_CHIPLET_HA_FIR, MC0_CHIPLET_HA_FIR_MASK, MC1_CHIPLET_HA_FIR, MC1_CHIPLET_HA_FIR_MASK , }; // target type if ( TYPE_MBA == i_target->getAttr() ) { // get the parent membuf ConstTargetHandle_t membuf = getParentChip(i_target); // get the parent mcs TargetHandleList targetList; TargetHandle_t mcs = NULL; if(membuf) { getParentAffinityTargets( targetList, membuf, CLASS_UNIT, TYPE_MCS); } if(targetList.size() == 1) { mcs = targetList[0]; } // get the parent proc ConstTargetHandle_t proc = NULL; if(mcs) { proc = getParentChip(mcs); } const struct Entry { TARGETING::ConstTargetHandle_t target; const uint64_t * begin; const uint64_t * end; } tables[] = { {i_target, mbaRegs, mbaRegs + sizeof(mbaRegs)/sizeof(*mbaRegs)}, {membuf, membufRegs, membufRegs + sizeof(membufRegs)/sizeof(*membufRegs)}, {mcs, mcsRegs, mcsRegs + sizeof(mcsRegs)/sizeof(*mcsRegs)}, {proc, procRegs, procRegs + sizeof(procRegs)/sizeof(*procRegs)}, }; for(const Entry * tableIt = tables; tableIt != tables + sizeof(tables)/sizeof(*tables); ++tableIt) { if(!tableIt->target) { continue; } for(const uint64_t * regIt = tableIt->begin; regIt != tableIt->end; ++regIt) { ErrlUserDetailsLogRegister udLogRegister( tableIt->target, DEVICE_SCOM_ADDRESS(*regIt)); udLogRegister.addToLog(io_log); } } } else if ( TYPE_MCBIST == i_target->getAttr() ) { // get the parent proc ConstTargetHandle_t proc = getParentChip(i_target); const struct Entry { TARGETING::ConstTargetHandle_t target; const uint64_t * begin; const uint64_t * end; } tables[] = { {i_target, mcbRegs, mcbRegs + sizeof(mcbRegs)/sizeof(*mcbRegs)}, {proc, procRegs, procRegs + sizeof(procRegs)/sizeof(*procRegs)}, }; for(const Entry * tableIt = tables; tableIt != tables + sizeof(tables)/sizeof(*tables); ++tableIt) { if(!tableIt->target) { continue; } for(const uint64_t * regIt = tableIt->begin; regIt != tableIt->end; ++regIt) { ErrlUserDetailsLogRegister udLogRegister( tableIt->target, DEVICE_SCOM_ADDRESS(*regIt)); udLogRegister.addToLog(io_log); } } } // collect these traces for timeout debugging io_log->collectTrace("MDIA_FAST",512); io_log->collectTrace(PRDF_COMP_NAME,512); io_log->collectTrace(FAPI_TRACE_NAME,512); io_log->collectTrace(FAPI_IMP_TRACE_NAME,512); } fapi2::TargetType getMdiaTargetType() { fapi2::TargetType targetType; // we need to check the model of the master proc // if it is Cumulus then we will use TARGET_TYPE_MBA_CHIPLET for targetType // else it is Nimbus so then we will use TARGET_TYPE_MCBIST for targetType TARGETING::Target* masterProc = nullptr; TARGETING::targetService().masterProcChipTargetHandle(masterProc); if ( TARGETING::MODEL_CUMULUS == masterProc->getAttr() ) { targetType = fapi2::TARGET_TYPE_MBA_CHIPLET; } else { targetType = fapi2::TARGET_TYPE_MCBIST; } return targetType; } // Do the setup for CE thresholds errlHndl_t ceErrorSetup( TargetHandle_t i_mba ) { errlHndl_t err = NULL; do { // get the parent membuf TargetHandle_t membuf = const_cast(getParentChip( i_mba)); uint64_t addr = ( ( 0 == i_mba->getAttr()) ? MEM_MBA0_MBSTR : MEM_MBA1_MBSTR ); uint64_t data = 0; size_t sz_data = sizeof(uint64_t); err = deviceRead( membuf, &data, sz_data, DEVICE_SCOM_ADDRESS(addr) ); if( NULL != err ) { MDIA_FAST("ceErrorSetup: deviceRead on 0x%08X failed HUID:0x%08X", addr, get_huid(membuf)); break; } // set 0-3 bits to Enable soft, intermittent, hard and Retry CE // threshold attention // set error threshold to 1 ( set 15,27,39,51 bits ). // Enable per-symbol error counters to count soft, intermittent // and hard CEs ( set 55, 56, 57 bits ). // First clear starting 52 bits and than set relevant bits. data = ( data & 0x0000000000000fff ) | 0xf0010010010011c0; err = deviceWrite( membuf, &data, sz_data, DEVICE_SCOM_ADDRESS(addr) ); if( NULL != err ) { MDIA_FAST("ceErrorSetup: deviceWrite on 0x%08X failed HUID:0x%08X", addr, get_huid(i_mba)); break; } } while(0); return err; } uint64_t getMemSize(TargetHandle_t i_target) { uint64_t memsize = 0; AttributeTraits::Type effDimmSizeAttr; TargetHandleList targetList; // if target is MBA if( TYPE_MBA == i_target->getAttr() ) { targetList.push_back(i_target); } // if target is MCBIST we have to get the connected MCSs else if( TYPE_MCBIST == i_target->getAttr() ) { PredicateCTM predType( CLASS_NA, TYPE_MCS ); PredicateIsFunctional predFunc; PredicatePostfixExpr predAnd; predAnd.push(&predType).push(&predFunc).And(); targetService().getAssociated( targetList, i_target, TargetService::CHILD_BY_AFFINITY, TargetService::ALL, &predAnd ); } for (auto trgt : targetList) { if(trgt->tryGetAttr(effDimmSizeAttr)) { for(uint64_t port = 0; port < sizeof(effDimmSizeAttr)/sizeof(*effDimmSizeAttr); ++port) { for(uint64_t dimm = 0; dimm < sizeof(effDimmSizeAttr[0])/sizeof(*effDimmSizeAttr[0]); ++dimm) { memsize += effDimmSizeAttr[port][dimm]; } } } } return memsize; } void StateMachine::processCommandTimeout(const MonitorIDs & i_monitorIDs) { MDIA_FAST("sm: processCommandTimeout"); WorkFlowProperties *wkflprop = NULL; errlHndl_t err = nullptr; mutex_lock(&iv_mutex); for(MonitorIDs::const_iterator monitorIt = i_monitorIDs.begin(); monitorIt != i_monitorIDs.end(); ++monitorIt) { for(WorkFlowPropertiesIterator wit = iv_workFlowProperties.begin(); wit != iv_workFlowProperties.end(); ++wit) { if((*wit)->timer == *monitorIt) { TargetHandle_t target = getTarget(**wit); TYPE trgtType = target->getAttr(); uint64_t firData = 0; uint64_t mskData = 0; size_t sz_uint64 = sizeof(uint64_t); // Init data for MCBIST. uint64_t firAddr = MCBIST_FIR; uint64_t firAndAddr = MCBIST_FIR_AND; uint64_t mskAddr = MCBIST_FIR_MASK; uint64_t bitMask = 0x0028000000000000; // Change if target type is MBA. if ( TYPE_MBA == trgtType ) { firAddr = MBA01_SPA; mskAddr = MBA01_SPA_MASK; bitMask = 0x8080000000000000; // bits 0 or 8 } // Check for command complete. If set, don't time out. err = deviceRead( target, &firData, sz_uint64, DEVICE_SCOM_ADDRESS(firAddr) ); if ( nullptr != err ) { MDIA_FAST("sm: deviceRead on 0x%08X failed HUID:0x%08X", firAddr, get_huid(target)); //commit locally and let it timeout errlCommit(err, MDIA_COMP_ID); } else { firData &= bitMask; } // TODO RTC 168088 if ( 0 != firData ) { err = deviceRead( target, &mskData, sz_uint64, DEVICE_SCOM_ADDRESS(mskAddr) ); if ( nullptr != err ) { MDIA_FAST("sm: deviceRead on 0x%08X failed " "HUID:0x%08X", mskAddr, get_huid(target)); //commit locally and let it timeout errlCommit(err, MDIA_COMP_ID); } } // Pending maint cmd complete, reset timer if(firData & ~mskData) { // Committing an info log to help debug SW timeout if((*wit)->timeoutCnt >= MAINT_CMD_TIMEOUT_LOG) { MDIA_FAST("sm: committing a SW timeout info log " "for HUID:0x%08X", get_huid(target)); /*@ * @errortype * @reasoncode MDIA::MAINT_COMMAND_SW_TIMED_OUT * @severity ERRORLOG::ERRL_SEV_INFORMATIONAL * @moduleid MDIA::PROCESS_COMMAND_TIMEOUT * @userData1 Associated memory diag work item * @userData2 Target HUID * @devdesc A maint command SW timed out */ err = new ErrlEntry(ERRL_SEV_INFORMATIONAL, PROCESS_COMMAND_TIMEOUT, MAINT_COMMAND_SW_TIMED_OUT, *((*wit)->workItem), get_huid(target)); // collect ffdc addTimeoutFFDC(target, err); errlCommit(err, MDIA_COMP_ID); // reset for the next logging (*wit)->timeoutCnt = 0; } else { // advance timeout counter (*wit)->timeoutCnt++; } MDIA_FAST("sm: work item %d reset SW timed out on " "HUID:0x%08X, timeoutCnt: %d", *((*wit)->workItem), get_huid(target), (*wit)->timeoutCnt); // register a new timeout monitor uint64_t monitorId = getMonitor().addMonitor(MAINT_CMD_TIMEOUT); (*wit)->timer = monitorId; break; } /*@ * @errortype * @reasoncode MDIA::MAINT_COMMAND_HW_TIMED_OUT * @severity ERRORLOG::ERRL_SEV_UNRECOVERABLE * @moduleid MDIA::PROCESS_COMMAND_TIMEOUT * @userData1 Associated memory diag work item * @userData2 Target HUID * @devdesc A maint command HW timed out */ errlHndl_t timeoutErrl = new ErrlEntry( ERRL_SEV_UNRECOVERABLE, PROCESS_COMMAND_TIMEOUT, MAINT_COMMAND_HW_TIMED_OUT, *((*wit)->workItem), get_huid(target)); // collect ffdc addTimeoutFFDC(target, timeoutErrl); timeoutErrl->addHwCallout(target, HWAS::SRCI_PRIORITY_HIGH, HWAS::DELAYED_DECONFIG, HWAS::GARD_NULL); // If maint cmd complete bit is not on, time out MDIA_FAST("sm: stopping command HUID:0x%08X", get_huid(target)); //target type is MBA if ( TYPE_MBA == trgtType ) { //TODO RTC 155857 //no longer have the mss_MaintCmd class at the moment //will need to update once we have Cumulus support //fapi2::ReturnCode fapirc = // static_cast((*wit)->data)->stopCmd(); //err = fapi2::rcToErrl(fapirc); //if( nullptr != err ) //{ // MDIA_ERR("sm: mss_MaintCmd::stopCmd failed"); // errlCommit(err, MDIA_COMP_ID); //} //fapirc = // static_cast((*wit)->data)->cleanupCmd(); //err = fapi2::rcToErrl(fapirc); //if( nullptr != err ) //{ // MDIA_ERR("sm: mss_MaintCmd::cleanupCmd failed"); // errlCommit(err, MDIA_COMP_ID); //} } //target type is MCBIST else { fapi2::Target fapiMcbist(target); FAPI_INVOKE_HWP( err, memdiags::stop, fapiMcbist ); if ( nullptr != err ) { MDIA_ERR("sm: memdiags::stop failed"); errlCommit(err, MDIA_COMP_ID); } //memdiags::stop will set the command complete attention so //we need to clear those bitMask = ~bitMask; err = deviceWrite( target, &bitMask, sz_uint64, DEVICE_SCOM_ADDRESS(firAndAddr) ); if ( nullptr != err ) { MDIA_FAST( "sm: deviceWrite on 0x%08X failed, HUID: " "0x%08X", firAddr, get_huid(target) ); errlCommit(err, MDIA_COMP_ID); } } (*wit)->data = NULL; (*wit)->status = COMMAND_TIMED_OUT; wkflprop = *wit; // log a timeout event MDIA_ERR("sm: workItem %d HW timeout on HUID:0x%08X", *((*wit)->workItem), get_huid(target)); errlCommit(timeoutErrl, MDIA_COMP_ID); break; } } // if this is the very last command(s), schedule must be called // so the waiting istep thread is signaled that we are done. // If no match is found (wkflprop), all the attentions came // in before the timeout(s) could be processed. the prd thread // will have already started the next command(s), if any. if(wkflprop) { scheduleWorkItem(*wkflprop); } } mutex_unlock(&iv_mutex); } errlHndl_t StateMachine::run(const WorkFlowAssocMap & i_list) { // load the workflow properties setup(i_list); // start work items start(); // wait for all work items to finish wait(); // act on workFlow errors mutex_lock(&iv_mutex); for(WorkFlowPropertiesIterator wit = iv_workFlowProperties.begin(); wit != iv_workFlowProperties.end(); ++wit) { if((*wit)->log) { errlCommit((*wit)->log, MDIA_COMP_ID); } } mutex_unlock(&iv_mutex); return 0; } void StateMachine::setup(const WorkFlowAssocMap & i_list) { // clear out any properties from a previous run reset(); mutex_lock(&iv_mutex); WorkFlowProperties * p = 0; for(WorkFlowAssoc it = i_list.begin(); it != i_list.end(); ++it) { // for each target / workFlow assoc, // initialize the workFlow progress indicator // to indicate that no work has been done yet // for the target p = new WorkFlowProperties(); p->assoc = it; p->workItem = getWorkFlow(it).begin(); p->status = IN_PROGRESS; p->log = 0; p->timer = 0; p->timeoutCnt = 0; // get the memsize p->memSize = getMemSize(it->first); p->data = NULL; p->chipUnit = it->first->getAttr(); iv_workFlowProperties.push_back(p); } if(iv_workFlowProperties.empty()) { iv_done = true; } else { iv_done = false; } MDIA_FAST("sm: setup complete: target(s): %d, status: %d", iv_workFlowProperties.size(), iv_done); mutex_unlock(&iv_mutex); } void StateMachine::wait() { mutex_lock(&iv_mutex); MDIA_FAST("sm: waiting for completion of %d target(s), status: %d", iv_workFlowProperties.size(), iv_done); // wait for everything to finish while(!iv_done && !iv_shutdown) { sync_cond_wait(&iv_cond, &iv_mutex); } mutex_unlock(&iv_mutex); } void StateMachine::start() { mutex_lock(&iv_mutex); MDIA_FAST("sm: starting up"); iv_shutdown = false; // schedule the first work items for all target / workFlow associations for(WorkFlowPropertiesIterator wit = iv_workFlowProperties.begin(); wit != iv_workFlowProperties.end(); ++wit) { scheduleWorkItem(**wit); } mutex_unlock(&iv_mutex); } bool StateMachine::scheduleWorkItem(WorkFlowProperties & i_wfp) { // schedule work items for execution in the thread pool // see if the workFlow for this target is complete // and see if all phases have completed successfully if(i_wfp.workItem == getWorkFlow(i_wfp).end()) { i_wfp.status = COMPLETE; } // see if the workFlow for this target is done...for better or worse // (failed or successful) // if it is, also check to see if all workFlows for all targets // are complete if(i_wfp.status != IN_PROGRESS && allWorkFlowsComplete()) { // Clear BAD_DQ_BIT_SET bit TargetHandle_t top = NULL; targetService().getTopLevelTarget(top); ATTR_RECONFIGURE_LOOP_type reconfigAttr = top->getAttr(); reconfigAttr &= ~RECONFIGURE_LOOP_BAD_DQ_BIT_SET; top->setAttr(reconfigAttr); // all workFlows are finished // release the init service dispatcher // thread waiting for completion MDIA_FAST("sm: all workflows finished"); iv_done = true; sync_cond_broadcast(&iv_cond); } else if(i_wfp.status == IN_PROGRESS) { // still work left for this target // 1 - get the phase for the target, // 2 - create the work item // 3 - schedule it // determine the priority for the work item to be scheduled // the priority is the number of iterations // through the memory multiplied by the memory size // multiply by memory size // assume 1 GB DIMMS if figuring out the memory // size failed uint64_t priority = getRemainingWorkItems(i_wfp) * (i_wfp.memSize ? i_wfp.memSize : 1); if(!iv_tp) { MDIA_FAST("Starting threadPool..."); iv_tp = new Util::ThreadPool(); iv_tp->start(); } TargetHandle_t target = getTarget(i_wfp); MDIA_FAST("sm: dispatching work item %d for: 0x%08x, priority: %d, " "unit: %d", *i_wfp.workItem, get_huid(target), priority, i_wfp.chipUnit); iv_tp->insert(new WorkItem(*this, &i_wfp, priority, i_wfp.chipUnit)); return true; } return false; } bool StateMachine::workItemIsAsync(WorkFlowProperties & i_wfp) { bool async = true; switch (*i_wfp.workItem) { case RESTORE_DRAM_REPAIRS: case DUMMY_SYNC_PHASE: case CLEAR_HW_CHANGED_STATE: case ANALYZE_IPL_MNFG_CE_STATS: // no attention associated with these so // schedule the next work item now async = false; break; default: async = true; break; } return async; } bool StateMachine::executeWorkItem(WorkFlowProperties * i_wfp) { bool dispatched = false; // thread pool work item entry point mutex_lock(&iv_mutex); // ensure this thread sees the most recent state if(!iv_shutdown) { bool async = workItemIsAsync(*i_wfp); uint64_t workItem = *i_wfp->workItem; MDIA_FAST("sm: executing work item %d for: 0x%08x", workItem, get_huid(getTarget(*i_wfp))); mutex_unlock(&iv_mutex); errlHndl_t err = 0; int32_t rc = 0; switch(workItem) { // do the appropriate thing based on the phase for this target case RESTORE_DRAM_REPAIRS: //TODO RTC 136126 //rc = PRDF::restoreDramRepairs(getTarget(*i_wfp)); break; case START_PATTERN_0: case START_PATTERN_1: case START_PATTERN_2: case START_PATTERN_3: case START_PATTERN_4: case START_PATTERN_5: case START_PATTERN_6: case START_PATTERN_7: case START_RANDOM_PATTERN: case START_SCRUB: err = doMaintCommand(*i_wfp); break; case CLEAR_HW_CHANGED_STATE: mutex_lock(&iv_mutex); clearHWStateChanged(getTarget(*i_wfp)); mutex_unlock(&iv_mutex); break; case ANALYZE_IPL_MNFG_CE_STATS: { MDIA_FAST("Executing analyzeIplCEStats"); bool calloutMade = false; TargetHandle_t mba = getTarget( *i_wfp); rc = PRDF::analyzeIplCEStats( mba, calloutMade); if( rc) { MDIA_FAST("executeWorkItem: PRDF::analyzeIplCEStats failed " "rc:%d HUID:0x%08X", rc, get_huid(mba)); } if( calloutMade ) { // There is no reason to update HCDB as we are doing // deferred deconfig. HCDB will be updated at end of istep // during deferred deconfig only. Just adding information // message here. MDIA_FAST("PRD performed HW callouts during" "analyzeIplCEStats"); } } break; default: break; } mutex_lock(&iv_mutex); if(err || rc) { // stop the workFlow for this target i_wfp->status = FAILED; i_wfp->log = err; } else if(!async) { // sync work item - // move the workFlow pointer to the next phase ++i_wfp->workItem; } if(err || !async) { // check to see if this was the last workFlow // in progress (if there was an error), or for sync // work items, schedule the next work item dispatched = scheduleWorkItem(*i_wfp); } } mutex_unlock(&iv_mutex); return dispatched; } errlHndl_t StateMachine::doMaintCommand(WorkFlowProperties & i_wfp) { errlHndl_t err = nullptr; uint64_t workItem; TargetHandle_t target; // starting a maint cmd ... register a timeout monitor TargetHandle_t sys = nullptr; targetService().getTopLevelTarget(sys); HbSettings hbSettings = sys->getAttr(); uint64_t maintCmdTO = hbSettings.traceContinuous ? MAINT_CMD_TIMEOUT_LONG : MAINT_CMD_TIMEOUT; mutex_lock(&iv_mutex); uint64_t monitorId = CommandMonitor::INVALID_MONITOR_ID; i_wfp.timeoutCnt = 0; // reset for new work item workItem = *i_wfp.workItem; target = getTarget(i_wfp); mutex_unlock(&iv_mutex); TYPE trgtType = target->getAttr(); do { // new command...use the full range //target type is MBA if (TYPE_MBA == trgtType) { /*TODO RTC 155857 uint64_t stopCondition = mss_MaintCmd::STOP_END_OF_RANK | mss_MaintCmd::STOP_ON_MPE | mss_MaintCmd::STOP_ON_UE | mss_MaintCmd::STOP_ON_END_ADDRESS | mss_MaintCmd::ENABLE_CMD_COMPLETE_ATTENTION; if( TARGETING::MNFG_FLAG_IPL_MEMORY_CE_CHECKING & iv_globals.mfgPolicy ) { // For MNFG mode, check CE also stopCondition |= mss_MaintCmd::STOP_ON_HARD_NCE_ETE; } ecmdDataBufferBase startAddr(64), endAddr(64); mss_MaintCmd * cmd = NULL; cmd = static_cast(i_wfp.data); fapi2::Target fapiMba(target); // We will always do ce setup though CE calculation // is only done during MNFG. This will give use better ffdc. err = ceErrorSetup( target ); if( nullptr != err) { MDIA_FAST("sm: ceErrorSetup failed for mba. HUID:0x%08X", get_huid(target)); break; } fapirc = mss_get_address_range( fapiMba, MSS_ALL_RANKS, startAddr, endAddr); err = fapiRcToErrl(fapirc); if(err) { MDIA_FAST("sm: get_address_range failed"); break; } // new command...use the full range switch(workItem) { case START_RANDOM_PATTERN: cmd = new mss_SuperFastRandomInit( fapiMba, startAddr, endAddr, mss_MaintCmd::PATTERN_RANDOM, stopCondition, false); MDIA_FAST("sm: random init %p on: %x", cmd, get_huid(target)); break; case START_SCRUB: cmd = new mss_SuperFastRead( fapiMba, startAddr, endAddr, stopCondition, false); MDIA_FAST("sm: scrub %p on: %x", cmd, get_huid(target)); break; case START_PATTERN_0: case START_PATTERN_1: case START_PATTERN_2: case START_PATTERN_3: case START_PATTERN_4: case START_PATTERN_5: case START_PATTERN_6: case START_PATTERN_7: cmd = new mss_SuperFastInit( fapiMba, startAddr, endAddr, static_cast(workItem), stopCondition, false); MDIA_FAST("sm: init %p on: %x", cmd, get_huid(target)); break; default: break; } if(!cmd) { MDIA_ERR("unrecognized maint command type %d on: %x", workItem, get_huid(target)); break; } mutex_lock(&iv_mutex); i_wfp.data = cmd; mutex_unlock(&iv_mutex); // Command and address configured. // Invoke the command. fapirc = cmd->setupAndExecuteCmd(); err = fapi2::rcToErrl(fapirc); if( nullptr != err ) { MDIA_FAST("sm: setupAndExecuteCmd %p failed", target); i_wfp.data = nullptr; if (cmd) { delete cmd; } } */ } //target type is MCBIST else { fapi2::Target fapiMcbist(target); mss::mcbist::stop_conditions stopCond; switch(workItem) { case START_RANDOM_PATTERN: FAPI_INVOKE_HWP( err, memdiags::sf_init, fapiMcbist, mss::mcbist::PATTERN_RANDOM ); MDIA_FAST("sm: random init %p on: %x", fapiMcbist, get_huid(target)); break; case START_SCRUB: //set stop conditions stopCond.set_pause_on_mpe(mss::ON); stopCond.set_pause_on_ue(mss::ON); stopCond.set_nce_inter_symbol_count_enable(mss::ON); stopCond.set_nce_soft_symbol_count_enable( mss::ON); stopCond.set_nce_hard_symbol_count_enable( mss::ON); if (TARGETING::MNFG_FLAG_IPL_MEMORY_CE_CHECKING & iv_globals.mfgPolicy) { stopCond.set_pause_on_nce_hard(mss::ON); } FAPI_INVOKE_HWP( err, memdiags::sf_read, fapiMcbist, stopCond ); MDIA_FAST("sm: scrub %p on: %x", fapiMcbist, get_huid(target)); break; case START_PATTERN_0: case START_PATTERN_1: case START_PATTERN_2: case START_PATTERN_3: case START_PATTERN_4: case START_PATTERN_5: case START_PATTERN_6: case START_PATTERN_7: FAPI_INVOKE_HWP( err, memdiags::sf_init, fapiMcbist, workItem ); MDIA_FAST("sm: init %p on: %x", fapiMcbist, get_huid(target)); break; default: MDIA_ERR("unrecognized work item type %d on: %x", workItem, get_huid(target)); break; } if( nullptr != err ) { MDIA_FAST("sm: Running Maint Cmd failed"); i_wfp.data = nullptr; } } if ( nullptr == err ) { // Start a timeout monitor mutex_lock(&iv_mutex); monitorId = getMonitor().addMonitor(maintCmdTO); i_wfp.timer = monitorId; mutex_unlock(&iv_mutex); } } while(0); return err; } CommandMonitor & StateMachine::getMonitor() { if(!iv_monitor) { MDIA_FAST("Starting monitor..."); iv_monitor = new CommandMonitor(); iv_monitor->start(*this); } return *iv_monitor; } bool StateMachine::processMaintCommandEvent(const MaintCommandEvent & i_event) { MDIA_FAST("sm: processMaintCommandEvent"); enum { CLEANUP_CMD = 0x8, DELETE_CMD = 0x4, STOP_CMD = 0x2, START_NEXT_CMD = 0x1, DISPATCHED = 0x80, }; uint64_t flags = 0; TargetHandle_t target = NULL; errlHndl_t err = NULL; mutex_lock(&iv_mutex); WorkFlowPropertiesIterator wit = iv_workFlowProperties.begin(); for(; wit != iv_workFlowProperties.end(); ++wit) { if(getTarget(**wit) == i_event.target) { break; } } if(wit == iv_workFlowProperties.end()) { MDIA_ERR("sm: did not find target: %x", get_huid(i_event.target)); } // if a command finishes (just) after the // timeout and we haven't had a chance to stop the // command yet, it may end up here. Ignore it // and let the timeout thread do its job. // Also ignore when it is in failed state. else if(((**wit).status != COMMAND_TIMED_OUT) && ((**wit).status != FAILED)) { WorkFlowProperties & wfp = **wit; // always unregister any existing maint cmd monitor getMonitor().removeMonitor(wfp.timer); target = getTarget(**wit); TYPE trgtType = target->getAttr(); MDIA_FAST("sm: processing event for: %x, target: %x, type: %x", get_huid(getTarget(wfp)), get_huid(target), i_event.type); MaintCommandEventType eventType = i_event.type; // If shutdown is requested and we're not in MNFG mode // skip testing on all MBAs if(( INITSERVICE::isShutdownRequested() ) && ( COMMAND_COMPLETE == eventType ) && ! (( MNFG_FLAG_ENABLE_EXHAUSTIVE_PATTERN_TEST & iv_globals.mfgPolicy) || ( MNFG_FLAG_ENABLE_STANDARD_PATTERN_TEST & iv_globals.mfgPolicy) || ( MNFG_FLAG_ENABLE_MINIMUM_PATTERN_TEST & iv_globals.mfgPolicy))) { MDIA_FAST("sm: shutdown requested, overrding event " "for: %x, target: %p, type: %x, globals: %x", get_huid(getTarget(wfp)), target, i_event.type, iv_globals.mfgPolicy); eventType = STOP_TESTING; } #ifdef CONFIG_BMC_IPMI // Reset the watchdog timer after running each pattern errlHndl_t err_ipmi = IPMIWATCHDOG::resetWatchDogTimer(); if(err_ipmi) { MDIA_FAST("sm executeWorkitem: IPMI reset watchdog failed"); err_ipmi->collectTrace("MDIA_FAST",1024); errlCommit(err_ipmi, MDIA_COMP_ID ); } #endif switch(eventType) { case COMMAND_COMPLETE: // command stopped or complete at end of last rank // move to the next command ++wfp.workItem; // done with this maint command flags = DELETE_CMD | START_NEXT_CMD; wfp.data = NULL; break; case STOP_TESTING: // stop testing on this target wfp.status = COMPLETE; // done with this command flags = DELETE_CMD | STOP_CMD | START_NEXT_CMD; wfp.data = NULL; break; case RESET_TIMER: flags = CLEANUP_CMD; break; default: // this shouldn't happen, but if it does // free up the memory flags = DELETE_CMD; wfp.data = NULL; break; } //target type is MBA if(TYPE_MBA == trgtType) { //TODO RTC 155857 //mss_MaintCmd * cmd = static_cast(wfp.data); // //if(cmd && (flags & STOP_CMD)) //{ // MDIA_FAST("sm: stopping command: %p", target); // fapi2::ReturnCode fapirc = cmd->stopCmd(); // err = fapi2::rcToErrl(fapirc); // if (nullptr != err) // { // MDIA_ERR("sm: mss_MaintCmd::stopCmd failed"); // errlCommit(err, MDIA_COMP_ID); // } //} //if(cmd && (flags & CLEANUP_CMD)) //{ // // restore any init settings that // // may have been changed by the command // fapi2::ReturnCode fapirc = cmd->cleanupCmd(); // err = fapi2::rcToErrl(fapirc); // if(nullptr != err) // { // MDIA_ERR("sm: mss_MaintCmd::cleanupCmd failed"); // errlCommit(err, MDIA_COMP_ID); // } //} //if(cmd && (flags & DELETE_CMD)) //{ // delete cmd; //} } //target type is MCBIST else { if(flags & STOP_CMD) { MDIA_FAST("sm: stopping command: %p", target); fapi2::Target fapiMcbist(target); FAPI_INVOKE_HWP( err, memdiags::stop, fapiMcbist ); if(nullptr != err) { MDIA_ERR("sm: memdiags::stop failed"); errlCommit(err, MDIA_COMP_ID); } } } // schedule the next work item if((flags & START_NEXT_CMD) && !iv_shutdown) { if(scheduleWorkItem(wfp)) { flags |= DISPATCHED; } } } mutex_unlock(&iv_mutex); return (flags & DISPATCHED); } bool StateMachine::allWorkFlowsComplete() { // check to see if all workFlows are complete bool allWorkFlowsComplete = true; for(WorkFlowPropertiesIterator wit = iv_workFlowProperties.begin(); wit != iv_workFlowProperties.end(); ++wit) { if((*wit)->status == IN_PROGRESS) { allWorkFlowsComplete = false; break; } } return allWorkFlowsComplete; } void StateMachine::reset() { mutex_lock(&iv_mutex); for(WorkFlowPropertiesIterator wit = iv_workFlowProperties.begin(); wit != iv_workFlowProperties.end(); ++wit) { if((**wit).log) { delete (**wit).log; } delete *wit; } iv_workFlowProperties.clear(); mutex_unlock(&iv_mutex); } void StateMachine::shutdown() { mutex_lock(&iv_mutex); Util::ThreadPool * tp = iv_tp; CommandMonitor * monitor = iv_monitor; iv_tp = 0; iv_monitor = 0; iv_shutdown = true; mutex_unlock(&iv_mutex); MDIA_FAST("sm: shutting down..."); if(tp) { MDIA_FAST("Stopping threadPool..."); tp->shutdown(); delete tp; } if(monitor) { MDIA_FAST("Stopping monitor..."); monitor->shutdown(); delete monitor; } MDIA_FAST("sm: ...shutdown complete"); } StateMachine::~StateMachine() { shutdown(); sync_cond_destroy(&iv_cond); mutex_destroy(&iv_mutex); } StateMachine::StateMachine() : iv_monitor(0), iv_done(true), iv_shutdown(false), iv_tp(0), iv_globals() { mutex_init(&iv_mutex); sync_cond_init(&iv_cond); } void StateMachine::setGlobals(Globals & i_globals) { mutex_lock(&iv_mutex); iv_globals = i_globals; mutex_unlock(&iv_mutex); } }