From 86d20c2609a8f0fc8c082c68e0d76d6b05ffba1b Mon Sep 17 00:00:00 2001 From: sachin gupta Date: Tue, 18 Jun 2013 03:24:26 -0500 Subject: PRD: IPL MNFG CE changes for MDIA Change-Id: I4830625fe3dfa0194c520c539a6e765d8331ca7c RTC: 47290 Depends-On: I94bd86b7b902a20e4f945abbac3b5556cd07f1bc Reviewed-on: http://gfw160.austin.ibm.com:8080/gerrit/5054 Tested-by: Jenkins Server Reviewed-by: Zane Shelley Reviewed-by: A. Patrick Williams III --- src/usr/diag/mdia/mdia.C | 2 +- src/usr/diag/mdia/mdiafwd.H | 6 +- src/usr/diag/mdia/mdiamba.C | 12 +++- src/usr/diag/mdia/mdiasm.C | 104 +++++++++++++++++++++++++++++++++-- src/usr/diag/mdia/mdiasm.H | 1 + src/usr/diag/mdia/test/mdiatestmba.H | 2 +- 6 files changed, 118 insertions(+), 9 deletions(-) (limited to 'src/usr') diff --git a/src/usr/diag/mdia/mdia.C b/src/usr/diag/mdia/mdia.C index cafc276fb..9b646b0cd 100644 --- a/src/usr/diag/mdia/mdia.C +++ b/src/usr/diag/mdia/mdia.C @@ -86,7 +86,7 @@ errlHndl_t runStep(const TargetHandleList & i_targetList) break; } - err = getMbaWorkFlow(mode, list[*tit]); + err = getMbaWorkFlow(mode, list[*tit], globals); if(err) { diff --git a/src/usr/diag/mdia/mdiafwd.H b/src/usr/diag/mdia/mdiafwd.H index 6737cb29b..6548298a8 100644 --- a/src/usr/diag/mdia/mdiafwd.H +++ b/src/usr/diag/mdia/mdiafwd.H @@ -77,6 +77,8 @@ enum // mem chiplet regs MEM_SPA_FIR = 0x03040004, MEM_SPA_FIR_MASK = 0x03040007, + MEM_MBA0_MBSTR = 0x02011655, + MEM_MBA1_MBSTR = 0x02011755, // mba unit regs MBA01_SPA = 0X03010611, @@ -170,13 +172,15 @@ errlHndl_t getMbaDiagnosticMode( * * @param[in] i_mode the diagnostic mode for the target * @param[out] o_wf the workflow for the mba target + * @param[in] i_globals policy flags needed to determine MNFG status * * @retval 0 no error * @retval !0 unexpected error occurred */ errlHndl_t getMbaWorkFlow( DiagMode i_mode, - WorkFlow & o_wf); + WorkFlow & o_wf, + const Globals & i_globals); /** * @brief doStepCleanup shut down threads and pools on step exit diff --git a/src/usr/diag/mdia/mdiamba.C b/src/usr/diag/mdia/mdiamba.C index 35abedbf6..40487db13 100644 --- a/src/usr/diag/mdia/mdiamba.C +++ b/src/usr/diag/mdia/mdiamba.C @@ -78,7 +78,10 @@ errlHndl_t getMbaDiagnosticMode( return 0; } -errlHndl_t getMbaWorkFlow(DiagMode i_mode, WorkFlow & o_wf) +errlHndl_t getMbaWorkFlow( + DiagMode i_mode, + WorkFlow & o_wf, + const Globals & i_globals) { // add the correct sequences for the mba based // on the mode @@ -135,9 +138,14 @@ errlHndl_t getMbaWorkFlow(DiagMode i_mode, WorkFlow & o_wf) break; } + if(MNFG_FLAG_BIT_MNFG_IPL_MEMORY_CE_CHECKINGE + & i_globals.mfgPolicy) + { + o_wf.push_back(ANALYZE_IPL_MNFG_CE_STATS); + } + // clear HW changed state attribute o_wf.push_back(CLEAR_HW_CHANGED_STATE); - return 0; } diff --git a/src/usr/diag/mdia/mdiasm.C b/src/usr/diag/mdia/mdiasm.C index ec2892ba8..04debeb3f 100644 --- a/src/usr/diag/mdia/mdiasm.C +++ b/src/usr/diag/mdia/mdiasm.C @@ -152,6 +152,59 @@ void addTimeoutFFDC(TargetHandle_t i_mba, errlHndl_t & io_log) } } } +// Do the setup for CE thresholds +errlHndl_t ceErrorSetup( TargetHandle_t i_mba ) +{ + errlHndl_t err = NULL; + ecmdDataBufferBase buffer(64); + + do + { + // get the parent membuf + TargetHandle_t membuf = const_cast(getParentChip( + i_mba)); + uint64_t addr = ( ( 0 == i_mba->getAttr()) ? + MEM_MBA0_MBSTR : MEM_MBA1_MBSTR ); + + fapi::Target fapiMb(TARGET_TYPE_MEMBUF_CHIP, membuf); + ReturnCode fapirc = fapiGetScom( fapiMb, addr , buffer); + + err = fapiRcToErrl(fapirc); + + if(err) + { + MDIA_FAST("ceErrorSetup: fapiGetScom on 0x%08X failed HUID:0x%08X", + addr, get_huid(membuf)); + break; + } + + // Enable soft, intermittent, hard and Retry CE threshold attention + buffer.setBit(0, 4); + + buffer.clearBit(4,48); + // Set error thresold to 1 + buffer.setBit(15); + buffer.setBit(27); + buffer.setBit(39); + buffer.setBit(51); + + // Enable per-symbol error counters to count soft, intermittent + // and hard CEs + buffer.setBit(55,3); + + fapirc = fapiPutScom( fapiMb, addr , buffer); + err = fapiRcToErrl(fapirc); + + if(err) + { + MDIA_FAST("ceErrorSetup: fapiPutScom on 0x%08X failed HUID:0x%08X", + addr, get_huid(i_mba)); + break; + } + } while(0); + + return err; +} void StateMachine::processCommandTimeout(const MonitorIDs & i_monitorIDs) { @@ -481,6 +534,7 @@ bool StateMachine::workItemIsAsync(WorkFlowProperties & i_wfp) case RESTORE_DRAM_REPAIRS: case DUMMY_SYNC_PHASE: case CLEAR_HW_CHANGED_STATE: + case ANALYZE_IPL_MNFG_CE_STATS: // no attention associated with these so // schedule the next work item now @@ -554,6 +608,25 @@ bool StateMachine::executeWorkItem(WorkFlowProperties * i_wfp) mutex_unlock(&iv_mutex); + case ANALYZE_IPL_MNFG_CE_STATS: + { + MDIA_FAST("Executing analyzeIplCEStats"); + bool calloutMade = false; + TargetHandle_t mba = getTarget( *i_wfp); + rc = PRDF::analyzeIplCEStats( mba, + calloutMade); + if( rc) + { + MDIA_FAST("executeWorkItem: PRDF::analyzeIplCEStats failed " + "rc:%d HUID:0x%08X", rc, get_huid(mba)); + } + if( calloutMade ) + { + // TODO via RTC 38371. + // Update HCDB + } + + } break; default: @@ -622,14 +695,38 @@ errlHndl_t StateMachine::doMaintCommand(WorkFlowProperties & i_wfp) mutex_unlock(&iv_mutex); - fapi::Target fapiMba(TARGET_TYPE_MBA_CHIPLET, targetMba); - do { + do + { + fapi::Target fapiMba(TARGET_TYPE_MBA_CHIPLET, targetMba); + ReturnCode fapirc; + TargetHandle_t top = NULL; + targetService().getTopLevelTarget(top); + uint64_t mfgPolicy = 0; + if( top ) + { + mfgPolicy = top->getAttr(); + } + // We will always do ce setup though CE calculation + // is only done during MNFG. This will give use better ffdc. + err = ceErrorSetup( targetMba ); + if( NULL != err) + { + MDIA_FAST("sm: ceErrorSetup failed for mba. HUID:0x%08X", + get_huid(targetMba)); + break; + } + + if( TARGETING::MNFG_FLAG_BIT_MNFG_IPL_MEMORY_CE_CHECKINGE & mfgPolicy ) + { + // For MNFG mode, check CE also + stopCondition |= mss_MaintCmd::STOP_ON_HARD_NCE_ETE; + } // setup the address range. // assume the full range for now - ReturnCode fapirc = mss_get_address_range( + fapirc = mss_get_address_range( fapiMba, MSS_ALL_RANKS, startAddr, @@ -742,7 +839,6 @@ errlHndl_t StateMachine::doMaintCommand(WorkFlowProperties & i_wfp) MDIA_FAST("sm: scrub %p on: %x", cmd, get_huid(targetMba)); break; - case START_PATTERN_0: case START_PATTERN_1: case START_PATTERN_2: diff --git a/src/usr/diag/mdia/mdiasm.H b/src/usr/diag/mdia/mdiasm.H index f4c93b699..83d700201 100644 --- a/src/usr/diag/mdia/mdiasm.H +++ b/src/usr/diag/mdia/mdiasm.H @@ -56,6 +56,7 @@ enum WorkFlowPhase RESTORE_DRAM_REPAIRS, START_SCRUB, CLEAR_HW_CHANGED_STATE, + ANALYZE_IPL_MNFG_CE_STATS, }; /** diff --git a/src/usr/diag/mdia/test/mdiatestmba.H b/src/usr/diag/mdia/test/mdiatestmba.H index 3dd2d8bd9..f877072e2 100644 --- a/src/usr/diag/mdia/test/mdiatestmba.H +++ b/src/usr/diag/mdia/test/mdiatestmba.H @@ -112,7 +112,7 @@ class MdiaMbaTest : public CxxTest::TestSuite expected.push_back(START_PATTERN_0); expected.push_back(CLEAR_HW_CHANGED_STATE); - err = getMbaWorkFlow(mode, wf); + err = getMbaWorkFlow(mode, wf, globals); if(err) { -- cgit v1.2.1