From b869f42b77f42f47bd6c5e45ca9c61da29b0ccbd Mon Sep 17 00:00:00 2001 From: Zane Shelley Date: Mon, 21 May 2018 21:09:56 -0500 Subject: PRD: ECC analysis and command handling for DRAM sparing Change-Id: I031abe95c8bcbbce26ee5b064aca7ba95f19d924 RTC: 193444 Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/59164 Reviewed-by: Caleb N. Palmer Tested-by: Jenkins Server Reviewed-by: Matt Derksen Reviewed-by: Benjamin J. Weisenbeck Reviewed-by: Zane C. Shelley Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/59514 Tested-by: Jenkins OP Build CI Tested-by: Jenkins OP HW Tested-by: FSP CI Jenkins --- .../common/plat/pegasus/prdfCenMbaTdCtlr_common.C | 107 ----- .../common/plat/pegasus/prdfCenMbaTdCtlr_common.H | 459 --------------------- .../prdf/common/plat/pegasus/prdfCenMbaTdCtlr_rt.C | 110 ----- src/usr/diag/prdf/plat/mem/prdfMemDsd.H | 120 ++++-- src/usr/diag/prdf/plat/mem/prdfMemDsd_ipl.C | 174 +++++++- src/usr/diag/prdf/plat/mem/prdfMemDsd_rt.C | 191 +++++++-- .../diag/prdf/plat/pegasus/prdfCenMbaTdCtlr_ipl.C | 437 -------------------- .../diag/prdf/plat/pegasus/prdfCenMbaTdCtlr_ipl.H | 163 -------- 8 files changed, 404 insertions(+), 1357 deletions(-) delete mode 100644 src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaTdCtlr_common.C delete mode 100644 src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaTdCtlr_common.H delete mode 100644 src/usr/diag/prdf/plat/pegasus/prdfCenMbaTdCtlr_ipl.C delete mode 100644 src/usr/diag/prdf/plat/pegasus/prdfCenMbaTdCtlr_ipl.H diff --git a/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaTdCtlr_common.C b/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaTdCtlr_common.C deleted file mode 100644 index 04a336e8d..000000000 --- a/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaTdCtlr_common.C +++ /dev/null @@ -1,107 +0,0 @@ -/* IBM_PROLOG_BEGIN_TAG */ -/* This is an automatically generated prolog. */ -/* */ -/* $Source: src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaTdCtlr_common.C $ */ -/* */ -/* OpenPOWER HostBoot Project */ -/* */ -/* Contributors Listed Below - COPYRIGHT 2013,2018 */ -/* [+] International Business Machines Corp. */ -/* */ -/* */ -/* Licensed under the Apache License, Version 2.0 (the "License"); */ -/* you may not use this file except in compliance with the License. */ -/* You may obtain a copy of the License at */ -/* */ -/* http://www.apache.org/licenses/LICENSE-2.0 */ -/* */ -/* Unless required by applicable law or agreed to in writing, software */ -/* distributed under the License is distributed on an "AS IS" BASIS, */ -/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */ -/* implied. See the License for the specific language governing */ -/* permissions and limitations under the License. */ -/* */ -/* IBM_PROLOG_END_TAG */ - -#include - -// Framework includes -#include - -// Pegasus includes -#include -#include -#include -#include - -using namespace TARGETING; - -namespace PRDF -{ - -using namespace PlatServices; - -//------------------------------------------------------------------------------ - -int32_t CenMbaTdCtlrCommon::handleMCE_DSD2( STEP_CODE_DATA_STRUCT & io_sc ) -{ - #define PRDF_FUNC "[CenMbaTdCtlrCommon::handleMCE_DSD2] " - - int32_t o_rc = SUCCESS; - - do - { - if ( DSD_PHASE_2 != iv_tdState ) - { - PRDF_ERR( PRDF_FUNC "Invalid state machine configuration" ); - o_rc = FAIL; break; - } - - setTdSignature( io_sc, PRDFSIG_DsdBadSpare ); - io_sc.service_data->setServiceCall(); - - // Callout spare DRAM. - MemoryMru memmru ( iv_mbaTrgt, iv_rank, iv_mark.getCM() ); - io_sc.service_data->SetCallout( memmru ); - - // The spare DRAM is bad, so set it in VPD. At this point, the chip mark - // should have already been set in the VPD because it was recently - // verified. - - CenDqBitmap bitmap; - o_rc = getBadDqBitmap( iv_mbaTrgt, iv_rank, bitmap ); - if ( SUCCESS != o_rc ) - { - PRDF_ERR( PRDF_FUNC "getBadDqBitmap() failed" ); - break; - } - if ( iv_isEccSteer ) - { - bitmap.setEccSpare(); - } - else - { - o_rc = bitmap.setDramSpare( iv_mark.getCM().getPortSlct() ); - if ( SUCCESS != o_rc ) - { - PRDF_ERR( PRDF_FUNC "setDramSpare() failed" ); - break; - } - } - - o_rc = setBadDqBitmap( iv_mbaTrgt, iv_rank, bitmap ); - if ( SUCCESS != o_rc ) - { - PRDF_ERR( PRDF_FUNC "setBadDqBitmap() failed" ); - break; - } - - } while(0); - - return o_rc; - - #undef PRDF_FUNC -} - -} // end namespace PRDF - diff --git a/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaTdCtlr_common.H b/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaTdCtlr_common.H deleted file mode 100644 index 11b810df1..000000000 --- a/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaTdCtlr_common.H +++ /dev/null @@ -1,459 +0,0 @@ -/* IBM_PROLOG_BEGIN_TAG */ -/* This is an automatically generated prolog. */ -/* */ -/* $Source: src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaTdCtlr_common.H $ */ -/* */ -/* OpenPOWER HostBoot Project */ -/* */ -/* Contributors Listed Below - COPYRIGHT 2013,2016 */ -/* [+] International Business Machines Corp. */ -/* */ -/* */ -/* Licensed under the Apache License, Version 2.0 (the "License"); */ -/* you may not use this file except in compliance with the License. */ -/* You may obtain a copy of the License at */ -/* */ -/* http://www.apache.org/licenses/LICENSE-2.0 */ -/* */ -/* Unless required by applicable law or agreed to in writing, software */ -/* distributed under the License is distributed on an "AS IS" BASIS, */ -/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */ -/* implied. See the License for the specific language governing */ -/* permissions and limitations under the License. */ -/* */ -/* IBM_PROLOG_END_TAG */ - -/** @file prdfCenMbaTdCtlr_common.H - * @brief The common implementation of the MBA TD Controller. - */ - -#ifndef __prdfCenMbaTdCtlr_common_H -#define __prdfCenMbaTdCtlr_common_H - -// Framework includes -#include -#include -#include - -// Pegasus includes -#include -#include -#include -#include - -namespace PRDF -{ - -class ExtensibleChip; - -/** - * @brief A state machine for memory targeted diagnostics. - */ -class CenMbaTdCtlrCommon -{ - public: // constants, enums - - /** - * @brief This enum will be used to indicate type of TD event requested to - * be handled. - * @note The order of the enums values is important. It is used for - * sorting the TdQueue by event type priority. - */ - enum TdType - { - VCM_EVENT = 0, ///< A Verify Chip Mark event. - TPS_EVENT, ///< A Two-Phase Scrub event. - }; - - protected: // constants, enums - - /** - * @brief Lists all possible states of TD controller - * @note These enums are used as array indexes to cv_cmdCompleteFuncs and - * the last entry will be used to get the size of the array. - */ - enum TdState - { - NO_OP = 0, ///< No TD procedures in place. - VCM_PHASE_1, ///< Verify Chip Mark phase 1. - VCM_PHASE_2, ///< Verify Chip Mark phase 2. - DSD_PHASE_1, ///< DRAM Spare Deploy phase 1. - DSD_PHASE_2, ///< DRAM Spare Deploy phase 2. - TPS_PHASE_1, ///< Two-Phase Scrub phase 1. - TPS_PHASE_2, ///< Two-Phase Scrub phase 2. - MAX_TD_STATE ///< The maximum number of TD states. - }; - - enum EccErrorMask - { - NO_ERROR = 0, ///< No ECC errors found - UE = 0x01, ///< UE - MPE = 0x02, ///< Chip mark placed - MCE = 0x04, ///< CE on chip mark - HARD_CTE = 0x08, ///< Hard CE threshold exceeed - SOFT_CTE = 0x10, ///< Soft CE threshold exceeed - INTER_CTE = 0x20, ///< Intermittent CE threshold exceeed - RETRY_CTE = 0x40, ///< Retry CE threshold exceeed - }; - - // Common stop conditions - enum StopConditions - { - COND_TARGETED_CMD = - mss_MaintCmd::STOP_ON_RETRY_CE_ETE | - mss_MaintCmd::STOP_ON_END_ADDRESS | - mss_MaintCmd::ENABLE_CMD_COMPLETE_ATTENTION, - - COND_BG_SCRUB = - mss_MaintCmd::STOP_ON_HARD_NCE_ETE | - mss_MaintCmd::STOP_ON_INT_NCE_ETE | - mss_MaintCmd::STOP_ON_SOFT_NCE_ETE | - mss_MaintCmd::STOP_ON_RETRY_CE_ETE | - mss_MaintCmd::STOP_ON_MPE | - mss_MaintCmd::STOP_ON_UE | - mss_MaintCmd::STOP_IMMEDIATE | - mss_MaintCmd::ENABLE_CMD_COMPLETE_ATTENTION, - - COND_FAST_SCRUB = - COND_BG_SCRUB | - mss_MaintCmd::STOP_ON_END_ADDRESS, - }; - - public: // functions - - /** - * @brief Constructor - * - * This constructor will be called in the MBA data bundle code. Therefore, - * no register reads/writes can be done in this constructor. Anything needed - * to initialize the instance variables that requires register reads/writes - * or is non-trivial should be done in initialize(). - * - * @param i_mbaChip An MBA chip. - */ - explicit CenMbaTdCtlrCommon( ExtensibleChip * i_mbaChip ) : - iv_mbaChip(i_mbaChip), iv_membChip(NULL), iv_mbaTrgt(NULL), - iv_mbaPos(MAX_MBA_PER_MEMBUF), iv_x4Dimm(false), iv_initialized(false), - iv_tdState(NO_OP), iv_rank(), iv_mark(), iv_mssCmd(NULL), - iv_isEccSteer(false) - {} - - /** @brief Destructor */ - ~CenMbaTdCtlrCommon() - { - delete iv_mssCmd; iv_mssCmd = NULL; - } - - /** - * @brief Determines and executes the next course of action after a - * maintenance command complete attention. - * @note Initializes the TD controller, if needed. - * @param io_sc The step code data struct. - * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. - */ - virtual int32_t handleCmdCompleteEvent( STEP_CODE_DATA_STRUCT & io_sc ) = 0; - - /** - * @brief Adds a TD procedure to the queue. - * - * TD events are only intended to be handled during FSP runtime, however, it - * is possible that a TD events could be triggered in Hostboot after it has - * been flushed from the cache to system memory. All requests to handle TD - * events during Hostboot will be ignored. Any chip marks placed at this - * time will be found when the FSP TD controller is initialized. The error - * log for the trigger will be committed and a trace statement will be made - * indicating which rank and TD procedure was requested. - * - * @param io_sc The step code data struct. - * @param i_rank The rank in which the event occurred. - * @param i_event The event type (see enum TdType). - * @param i_banTps TRUE to ban any future TPS requests for this rank, - * default FALSE. - * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. - * @note If no TD procedures are in progress, it will stop background - * scrub and start the next TD procedure. - */ - virtual int32_t handleTdEvent( STEP_CODE_DATA_STRUCT & io_sc, - const CenRank & i_rank, - const TdType i_event, - bool i_banTps = false ) = 0; - protected: // functions - - /** - * @brief Initializes the TD controller and sets appropriate information - * in the hardware, if needed. - * - * Since the TD controller constructor will only be called in the MBA data - * bundle, register reads/writes can NOT be done in the constructor. - * Instead, anything needed to initialize the instance variables that - * requires register reads/writes or is non-trivial should be done in - * this function. - * - * @note Should be called at the beginning of every public function to - * ensure the TD controller is initialized. - * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. - */ - virtual int32_t initialize(); - - /** - * @brief Analyzes a non-TD command complete event. - * - * A maintenance command has completed but no TD are in progress. This - * function will check for any ECC errors, unverified chip marks from a - * reset/reload, etc. and starts any TD procedures, if necessary. - * - * @param io_sc The step code data struct. - * @param i_stopAddr The address in which the command stopped. - * @param i_endAddr The address set in the MBMEA. - * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. - */ - virtual int32_t analyzeCmdComplete( STEP_CODE_DATA_STRUCT & io_sc, - const CenAddr & i_stopAddr, - const CenAddr & i_endAddr ) = 0; - - /** - * @brief Analyzes VCM Phase 1 results and moves state machine. - * @param io_sc The step code data struct. - * @param i_stopAddr The address in which the command stopped. - * @param i_endAddr The address set in the MBMEA. - * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. - */ - virtual int32_t analyzeVcmPhase1( STEP_CODE_DATA_STRUCT & io_sc, - const CenAddr & i_stopAddr, - const CenAddr & i_endAddr ) = 0; - - /** - * @brief Analyzes VCM Phase 2 results and moves state machine. - * @param io_sc The step code data struct. - * @param i_stopAddr The address in which the command stopped. - * @param i_endAddr The address set in the MBMEA. - * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. - */ - virtual int32_t analyzeVcmPhase2( STEP_CODE_DATA_STRUCT & io_sc, - const CenAddr & i_stopAddr, - const CenAddr & i_endAddr ) = 0; - - /** - * @brief Analyzes DSD Phase 1 results and moves state machine. - * @param io_sc The step code data struct. - * @param i_stopAddr The address in which the command stopped. - * @param i_endAddr The address set in the MBMEA. - * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. - */ - virtual int32_t analyzeDsdPhase1( STEP_CODE_DATA_STRUCT & io_sc, - const CenAddr & i_stopAddr, - const CenAddr & i_endAddr ) = 0; - - /** - * @brief Analyzes DSD Phase 2 results and moves state machine. - * @param io_sc The step code data struct. - * @param i_stopAddr The address in which the command stopped. - * @param i_endAddr The address set in the MBMEA. - * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. - */ - virtual int32_t analyzeDsdPhase2( STEP_CODE_DATA_STRUCT & io_sc, - const CenAddr & i_stopAddr, - const CenAddr & i_endAddr ) = 0; - - /** - * @brief Analyzes Tps Phase 1 results and moves state machine. - * @param io_sc The step code data struct. - * @param i_stopAddr The address in which the command stopped. - * @param i_endAddr The address set in the MBMEA. - * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. - */ - virtual int32_t analyzeTpsPhase1( STEP_CODE_DATA_STRUCT & io_sc, - const CenAddr & i_stopAddr, - const CenAddr & i_endAddr ) = 0; - - /** - * @brief Analyzes Tps Phase 2 results and moves state machine. - * @param io_sc The step code data struct. - * @param i_stopAddr The address in which the command stopped. - * @param i_endAddr The address set in the MBMEA. - * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. - */ - virtual int32_t analyzeTpsPhase2( STEP_CODE_DATA_STRUCT & io_sc, - const CenAddr & i_stopAddr, - const CenAddr & i_endAddr ) = 0; - - /** - * @brief Starts VCM Phase 1. - * @param io_sc The step code data struct. - * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. - */ - virtual int32_t startVcmPhase1( STEP_CODE_DATA_STRUCT & io_sc ) = 0; - - /** - * @brief Starts VCM Phase 2. - * @param io_sc The step code data struct. - * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. - */ - virtual int32_t startVcmPhase2( STEP_CODE_DATA_STRUCT & io_sc ) = 0; - - /** - * @brief Starts DSD Phase 1. - * @param io_sc The step code data struct. - * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. - */ - virtual int32_t startDsdPhase1( STEP_CODE_DATA_STRUCT & io_sc ) = 0; - - /** - * @brief Starts DSD Phase 2. - * @param io_sc The step code data struct. - * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. - */ - virtual int32_t startDsdPhase2( STEP_CODE_DATA_STRUCT & io_sc ) = 0; - - /** - * @brief Starts Tps Phase 1. - * @param io_sc The step code data struct. - * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. - */ - virtual int32_t startTpsPhase1( STEP_CODE_DATA_STRUCT & io_sc ) = 0; - - /** - * @brief Starts Tps Phase 2. - * @param io_sc The step code data struct. - * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. - */ - virtual int32_t startTpsPhase2( STEP_CODE_DATA_STRUCT & io_sc ) = 0; - - /** - * @return TRUE if currently running a targeted diagnositics procedure, - * FALSE otherwise. - */ - virtual bool isInTdMode(); - - /** - * @brief Calls the cleanupCmd() function of the command that had just - * completed. - * @note This function will clear the maintenance command complete - * attention. So for FSP attentions, the SDC needs to be synched - * before calling this function just in case there is a - * reset/reload. - * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. - */ - virtual int32_t cleanupPrevCmd(); - - /** - * @brief Preforms cleanup tasks that need to be done before starting the - * next maintenance command (i.e. clear scrub counter). - * @param i_clearStats TRUE to clear all scrub statistics (default), FALSE - * otherwise. This is useful when we need to resume - * background scrubbing on the next address and we - * don't want to clear all of the scrub statistics. - * @note Will call cleanupPrevCmd() as part of the preparations. - * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. - */ - virtual int32_t prepareNextCmd( bool i_clearStats = true ); - - /** - * @brief Clears FIR bits that may have been a side-effect of a chip mark - * placed by hardware. - * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. - */ - virtual int32_t chipMarkCleanup(); - - /** - * @brief Checks if ECC errors have occurred during a maintenance command. - * @param o_eccErrorMask Bitwise mask indicating which ECC errors have - * occurred. - * @param io_sc Service data collector. - * @note This function also updates SDC Multi-Signature list for each - * ECC error. - * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. - */ - virtual int32_t checkEccErrors( uint16_t & o_eccErrorMask, - STEP_CODE_DATA_STRUCT & io_sc ); - - /** - * @brief Handle MCE event during VCM Phase 2 - * @param io_sc Service data collector. - * @note This will update bad bits information in VPD, set callouts, and - * start the DRAM sparing procedure, if possible. - * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. - */ - virtual int32_t handleMCE_VCM2( STEP_CODE_DATA_STRUCT & io_sc ); - - /** - * @brief Handle MCE event during DSD Phase 2 - * @param io_sc Service data collector. - * @note This will update bad bits information in VPD and set callouts. - * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. - */ - virtual int32_t handleMCE_DSD2( STEP_CODE_DATA_STRUCT & io_sc ); - - /** - * @brief Will set the threshold for all runtime ETE attentions in - * hardware. - * @note This only sets the runtime thresholds but is a common function - * because these thresholds will need to be set before starting the - * initial fast scrub at the end of Hostboot. - * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. - */ - virtual int32_t setRtEteThresholds(); - - /** - * @brief This class is designed such that all functions will eventually - * return any bad error code to the top level public functions such - * as handleCmdCompleteEvent() and handleTdEvent(). This is a common - * function to handle everything needed to that the TD controller - * can hopefully fail gracefully. - * @param io_sc The step code data struct. - */ - virtual void badPathErrorHandling( STEP_CODE_DATA_STRUCT & io_sc ); - - /** - * @brief Add signature to SDC - * @param io_sc Service data collector. - * @param i_sig Error Signature. - * @note All signatures in TD controller are scoped to the targeted MBA. - * However, it is possible that the attention that triggered this - * TD request came from one of the MBSECCFIRs, which are on the - * MEMBUF. So, change the chip ID in the signature to the targeted - * MBA to avoid an "Undefined error code". - */ - void setTdSignature( STEP_CODE_DATA_STRUCT & io_sc, uint32_t i_sig ); - - protected: // instance variables - - /** The MBA chip that this TD controller acts on. */ - ExtensibleChip * iv_mbaChip; - - /** The MEMBUF chip connected iv_mbaChip. */ - ExtensibleChip * iv_membChip; - - /** The MBA target associated with iv_mbaChip. */ - TARGETING::TargetHandle_t iv_mbaTrgt; - - /** The position number (0-1) relative to the connected MEMBUF. */ - uint32_t iv_mbaPos; - - /** TRUE if DIMM has x4 DRAMs, FALSE if DIMM has x8 DRAMs. */ - bool iv_x4Dimm; - - /** Indicates if TD controller is initialized. */ - bool iv_initialized; - - /** The targeted diagnostics state variable (see enum TdState). */ - TdState iv_tdState; - - /** The current rank that is being targeted for diagnostics. */ - CenRank iv_rank; - - /** The current mark that is being targeted for diagnostics. */ - CenMark iv_mark; - - /** Current maintenance command */ - PlatServices::mss_MaintCmdWrapper * iv_mssCmd; - - /** Tells if in DSD procedure we should use eccSpare. */ - bool iv_isEccSteer; - -}; // CenMbaTdCtlrCommon - -} // end namespace PRDF - -#endif // __prdfCenMbaTdCtlr_common_H - diff --git a/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaTdCtlr_rt.C b/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaTdCtlr_rt.C index a2bc9ca83..b8a4d5ef0 100755 --- a/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaTdCtlr_rt.C +++ b/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaTdCtlr_rt.C @@ -76,116 +76,6 @@ CenMbaTdCtlr::FUNCS CenMbaTdCtlr::cv_cmdCompleteFuncs[] = // Private Functions //------------------------------------------------------------------------------ -int32_t CenMbaTdCtlr::analyzeDsdPhase1( STEP_CODE_DATA_STRUCT & io_sc, - const CenAddr & i_stopAddr, - const CenAddr & i_endAddr ) -{ - #define PRDF_FUNC "[CenMbaTdCtlr::analyzeDsdPhase1] " - - int32_t o_rc = SUCCESS; - - do - { - if ( DSD_PHASE_1 != iv_tdState ) - { - PRDF_ERR( PRDF_FUNC "Invalid state machine configuration" ); - o_rc = FAIL; break; - } - - // Add the mark to the callout list. - CalloutUtil::calloutMark( iv_mbaTrgt, iv_rank, iv_mark, io_sc ); - - // Check for any ECC errors that occurred during the procedure. - uint16_t eccErrorMask = NO_ERROR; - o_rc = checkEccErrors( eccErrorMask, io_sc ); - if ( SUCCESS != o_rc ) - { - PRDF_ERR( PRDF_FUNC "checkEccErrors() failed" ); - break; - } - - if ( eccErrorMask & UE ) - { - o_rc = handleUe_Td( io_sc, i_stopAddr ); - if ( SUCCESS != o_rc ) - { - PRDF_ERR( PRDF_FUNC "handleUe_Td() failed" ); - break; - } - - // Abort the procedure. - iv_tdState = NO_OP; - break; - } - - if ( eccErrorMask & RETRY_CTE ) - { - o_rc = handleRceEte_Td( io_sc ); - if ( SUCCESS != o_rc ) - { - PRDF_ERR( PRDF_FUNC "handleRceEte_Td() failed" ); - break; - } - } - - if ( i_endAddr == i_stopAddr ) - { - // At this point, the procedure has not been aborted due to an error - // like a memory UE so consider the spare successful. - setTdSignature( io_sc, PRDFSIG_DsdDramSpared ); - - // Remove chip mark from hardware. - iv_mark.clearCM(); - - // There is small time window where hardware places a chip mark - // immediately after it is removed, but before the HWP procedure can - // query the FIR registers. In this case, we will simply allow the - // write to be 'blocked' and handle the new chip mark in a separate - // attention. - bool allowWriteBlocked = true; - bool blocked; // Currently ignored. - o_rc = mssSetMarkStore( iv_mbaTrgt, iv_rank, iv_mark, blocked, - allowWriteBlocked ); - if ( SUCCESS != o_rc ) - { - PRDF_ERR( PRDF_FUNC "mssSetMarkStore() failed" ); - break; - } - - // Always reset the state machine after DSD Phase 1 is complete. - iv_tdState = NO_OP; - } - else - { - // Restart the scrub on the next address. - o_rc = resumeScrub( io_sc, eccErrorMask ); - if ( SUCCESS != o_rc ) - { - PRDF_ERR( PRDF_FUNC "resumeScrub() failed" ); - break; - } - } - - } while(0); - - // If this TD procedure was completed or aborted, execute TD complete - // sequence. - if ( (iv_tdState == NO_OP) && (SUCCESS == o_rc) ) - { - o_rc = handleTdComplete( io_sc ); - if ( SUCCESS != o_rc ) - { - PRDF_ERR( PRDF_FUNC "handleTdComplete() failed" ); - } - } - - return o_rc; - - #undef PRDF_FUNC -} - -//------------------------------------------------------------------------------ - int32_t CenMbaTdCtlr::analyzeTpsPhase1( STEP_CODE_DATA_STRUCT & io_sc, const CenAddr & i_stopAddr, const CenAddr & i_endAddr ) diff --git a/src/usr/diag/prdf/plat/mem/prdfMemDsd.H b/src/usr/diag/prdf/plat/mem/prdfMemDsd.H index c53af33bc..ac1b1e044 100644 --- a/src/usr/diag/prdf/plat/mem/prdfMemDsd.H +++ b/src/usr/diag/prdf/plat/mem/prdfMemDsd.H @@ -70,6 +70,8 @@ class DsdEvent : public TdEntry uint32_t o_rc = SUCCESS; + o_done = false; + do { // First, do analysis. @@ -112,59 +114,121 @@ class DsdEvent : public TdEntry private: // functions /** - * @brief Do analysis based on the current phase. - * @param io_sc The step code data struct. - * @param o_done True if the procedure is complete or has aborted, false - * otherwise. + * @brief Does isolation for ECC attentions during each phase. + * @param i_eccAttns Mask of all currently active maintenance attentions. + * See enum MaintEccAttns for values. + * @param io_sc The step code data struct. + * @param o_done True if the procedure is complete or has aborted. * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. */ - uint32_t analyzePhase( STEP_CODE_DATA_STRUCT & io_sc, bool & o_done ); + uint32_t checkEcc( const uint32_t & i_eccAttns, + STEP_CODE_DATA_STRUCT & io_sc, bool & o_done ); /** - * @brief Starts the appropriate maintenance command for each phase of the - * procedure. - * @pre iv_phase must be set appropriately before calling this function. + * @brief Called in the last phase of the procedure to determine if the + * spare was successfully applied. + * @param i_eccAttns Mask of all currently active maintenance attentions. + * See enum MaintEccAttns for values. + * @param io_sc The step code data struct. + * @param o_done True if the procedure is complete or has aborted. * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. */ - uint32_t startCmd(); + uint32_t verifySpare( const uint32_t & i_eccAttns, + STEP_CODE_DATA_STRUCT & io_sc, bool & o_done ); /** - * @brief Starts the next phase of the procedure. + * @brief Do analysis based on the current phase. * @param io_sc The step code data struct. - * @post iv_phase will be updated appropriately per design. + * @param o_done True if the procedure is complete or has aborted. * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. */ - uint32_t startNextPhase( STEP_CODE_DATA_STRUCT & io_sc ) + uint32_t analyzePhase( STEP_CODE_DATA_STRUCT & io_sc, bool & o_done ) { - uint32_t signature = 0; + #define PRDF_FUNC "[DsdEvent::analyzePhase] " + + uint32_t o_rc = SUCCESS; - switch ( iv_phase ) + do { - case TD_PHASE_0: - iv_phase = TD_PHASE_1; - signature = PRDFSIG_StartDsdPhase1; + if ( TD_PHASE_0 == iv_phase ) + { + // Before starting the first command, set iv_mark in the + // hardware steer mux. + /* TODO: RTC 189221 + o_rc = mssSetSteerMux( iv_chip, iv_rank, iv_mark, + iv_eccSpare ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "mssSetSteerMux(0x%08x,0x%2x) failed", + iv_chip->getHuid(), getKey() ); + break; + } + */ + + break; // Nothing to analyze yet. + } + + // Look for any ECC errors that occurred during the command. + uint32_t eccAttns; + o_rc = checkEccFirs( iv_chip, eccAttns ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "checkEccFirs(0x%08x) failed", + iv_chip->getHuid() ); break; + } + + // Analyze the ECC errors, if needed. + o_rc = checkEcc( eccAttns, io_sc, o_done ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "checkEcc() failed on 0x%08x", + iv_chip->getHuid() ); + break; + } - #ifndef __HOSTBOOT_RUNTIME // IPL only + if ( o_done ) break; // abort the procedure. - case TD_PHASE_1: - iv_phase = TD_PHASE_2; - signature = PRDFSIG_StartDsdPhase2; + // Determine if the spare was applied successfully. + o_rc = verifySpare( eccAttns, io_sc, o_done ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "verifySpare() failed on 0x%08x", + iv_chip->getHuid() ); break; + } - #endif + } while (0); - default: PRDF_ASSERT( false ); // invalid phase + #ifdef __HOSTBOOT_RUNTIME + if ( (SUCCESS == o_rc) && o_done ) + { + // Clear the ECC FFDC for this master rank. + MemDbUtils::resetEccFfdc( iv_chip, iv_rank, MASTER_RANK ); } + #endif - PRDF_TRAC( "[DsdEvent] Starting DSD Phase %d: 0x%08x,0x%02x", - iv_phase, iv_chip->getHuid(), getKey() ); - - io_sc.service_data->AddSignatureList( iv_chip->getTrgt(), signature ); + return o_rc; - return startCmd(); + #undef PRDF_FUNC } + /** + * @brief Starts the appropriate maintenance command for each phase of the + * procedure. + * @pre iv_phase must be set appropriately before calling this function. + * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. + */ + uint32_t startCmd(); + + /** + * @brief Starts the next phase of the procedure. + * @param io_sc The step code data struct. + * @post iv_phase will be updated appropriately per design. + * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. + */ + uint32_t startNextPhase( STEP_CODE_DATA_STRUCT & io_sc ); + private: // instance variables const MemMark iv_mark; ///< The chip mark from hardware. diff --git a/src/usr/diag/prdf/plat/mem/prdfMemDsd_ipl.C b/src/usr/diag/prdf/plat/mem/prdfMemDsd_ipl.C index 63fea6ea8..a7de2d38c 100644 --- a/src/usr/diag/prdf/plat/mem/prdfMemDsd_ipl.C +++ b/src/usr/diag/prdf/plat/mem/prdfMemDsd_ipl.C @@ -26,6 +26,7 @@ /** @file prdfMemDsd_ipl.C */ // Platform includes +#include #include #include @@ -38,54 +39,155 @@ using namespace PlatServices; //############################################################################## // -// Generic template functions +// Specializations for MBA // //############################################################################## -template -uint32_t DsdEvent::analyzePhase( STEP_CODE_DATA_STRUCT & io_sc, - bool & o_done ) +template<> +uint32_t DsdEvent::checkEcc( const uint32_t & i_eccAttns, + STEP_CODE_DATA_STRUCT & io_sc, + bool & o_done ) { - #define PRDF_FUNC "[DsdEvent::analyzePhase] " + #define PRDF_FUNC "[DsdEvent::checkEcc] " uint32_t o_rc = SUCCESS; do { - if ( TD_PHASE_0 == iv_phase ) + // IUEs are reported as UEs during read operations (via RCE ETE on + // Centaur). Therefore, we will treat IUEs like UEs for these scrub + // operations simply to maintain consistency during all of Memory + // Diagnostics. + if ( (i_eccAttns & MAINT_UE) || (i_eccAttns & MAINT_RCE_ETE) ) { - // Before starting the next command, set iv_mark in the steer mux. - /* TODO: RTC 189221 - o_rc = setSteerMux( iv_chip, iv_rank, iv_mark ); + PRDF_TRAC( "[DsdEvent] UE Detected: 0x%08x,0x%02x", + iv_chip->getHuid(), getKey() ); + + io_sc.service_data->setSignature( iv_chip->getHuid(), + (i_eccAttns & MAINT_UE) + ? PRDFSIG_MaintUE + : PRDFSIG_MaintIUE ); + + // At this point we don't actually have an address for the UE. The + // best we can do is get the address in which the command stopped. + MemAddr addr; + o_rc = getMemMaintAddr( iv_chip, addr ); if ( SUCCESS != o_rc ) { - PRDF_ERR( PRDF_FUNC "setSteerMux(0x%08x,0x%2x) failed", + PRDF_ERR( PRDF_FUNC "getMemMaintAddr(0x%08x) failed", + iv_chip->getHuid() ); + break; + } + + o_rc = MemEcc::handleMemUe( iv_chip, addr, + UE_TABLE::SCRUB_UE, io_sc ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "handleMemUe(0x%08x,0x%02x) failed", iv_chip->getHuid(), getKey() ); break; } + + // Leave the mark in place and abort this procedure. + o_done = true; break; + } + + } while (0); + + return o_rc; + + #undef PRDF_FUNC +} + +//------------------------------------------------------------------------------ + +template<> +uint32_t DsdEvent::verifySpare( const uint32_t & i_eccAttns, + STEP_CODE_DATA_STRUCT & io_sc, + bool & o_done ) +{ + #define PRDF_FUNC "[DsdEvent::verifySpare] " + + uint32_t o_rc = SUCCESS; + + do + { + if ( TD_PHASE_2 != iv_phase ) break; // nothing to do + + if ( i_eccAttns & MAINT_MCE ) + { + PRDF_TRAC( "[DsdEvent] DRAM spare is bad: 0x%08x,0x%02x", + iv_chip->getHuid(), getKey() ); + + io_sc.service_data->setSignature( iv_chip->getHuid(), + PRDFSIG_DsdBadSpare ); + + // Make the error log predictive. + io_sc.service_data->setServiceCall(); + + // Set the bad spare in the VPD. At this point, the chip mark + // should have already been set in the VPD because it was recently + // verified. + + /* TODO: RTC 189221 + CenDqBitmap bitmap; + o_rc = getBadDqBitmap( iv_mbaTrgt, iv_rank, bitmap ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "getBadDqBitmap() failed" ); + break; + } + if ( iv_eccSpare ) + { + bitmap.setEccSpare(); + } + else + { + o_rc = bitmap.setDramSpare( iv_mark.getCM().getPortSlct() ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "setDramSpare() failed" ); + break; + } + } + + o_rc = setBadDqBitmap( iv_mbaTrgt, iv_rank, bitmap ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "setBadDqBitmap() failed" ); + break; + } */ + } + else + { + PRDF_TRAC( "[DsdEvent] DRAM spare applied successfully: " + "0x%08x,0x%02x", iv_chip->getHuid(), getKey() ); - break; // Nothing to analyze yet. + io_sc.service_data->setSignature( iv_chip->getHuid(), + PRDFSIG_DsdDramSpared ); + + // Remove the chip mark. + o_rc = MarkStore::clearChipMark( iv_chip, iv_rank ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "clearChipMark(0x%08x,0x%02x) failed", + iv_chip->getHuid(), getKey() ); + break; + } } - // TODO: RTC 189221 finish supporting this function. + // At this point the procedure is complete. + o_done = true; } while (0); - // TODO: RTC 189221 remove once function is supported - PRDF_ERR( PRDF_FUNC "not supported yet" ); - o_done = true; // to ensure nothing else gets executed - return o_rc; #undef PRDF_FUNC } -//############################################################################## -// -// Specializations for MBA -// -//############################################################################## +//------------------------------------------------------------------------------ template<> uint32_t DsdEvent::startCmd() @@ -130,5 +232,35 @@ uint32_t DsdEvent::startCmd() //------------------------------------------------------------------------------ +template<> +uint32_t DsdEvent::startNextPhase( STEP_CODE_DATA_STRUCT & io_sc ) +{ + uint32_t signature = 0; + + switch ( iv_phase ) + { + case TD_PHASE_0: + iv_phase = TD_PHASE_1; + signature = PRDFSIG_StartDsdPhase1; + break; + + case TD_PHASE_1: + iv_phase = TD_PHASE_2; + signature = PRDFSIG_StartDsdPhase2; + break; + + default: PRDF_ASSERT( false ); // invalid phase + } + + PRDF_TRAC( "[DsdEvent] Starting DSD Phase %d: 0x%08x,0x%02x", + iv_phase, iv_chip->getHuid(), getKey() ); + + io_sc.service_data->AddSignatureList( iv_chip->getTrgt(), signature ); + + return startCmd(); +} + +//------------------------------------------------------------------------------ + } // end namespace PRDF diff --git a/src/usr/diag/prdf/plat/mem/prdfMemDsd_rt.C b/src/usr/diag/prdf/plat/mem/prdfMemDsd_rt.C index 031de5507..f8e8687b0 100644 --- a/src/usr/diag/prdf/plat/mem/prdfMemDsd_rt.C +++ b/src/usr/diag/prdf/plat/mem/prdfMemDsd_rt.C @@ -26,6 +26,7 @@ /** @file prdfMemDsd_rt.C */ // Platform includes +#include #include using namespace TARGETING; @@ -37,62 +38,139 @@ using namespace PlatServices; //############################################################################## // -// Generic template functions +// Specializations for MBA // //############################################################################## -template -uint32_t DsdEvent::analyzePhase( STEP_CODE_DATA_STRUCT & io_sc, - bool & o_done ) +template<> +uint32_t DsdEvent::checkEcc( const uint32_t & i_eccAttns, + STEP_CODE_DATA_STRUCT & io_sc, + bool & o_done ) { - #define PRDF_FUNC "[DsdEvent::analyzePhase] " + #define PRDF_FUNC "[DsdEvent::checkEcc] " uint32_t o_rc = SUCCESS; - // TODO: RTC 189221 remove once function is supported - PRDF_ERR( PRDF_FUNC "not supported yet" ); - do { - if ( TD_PHASE_0 == iv_phase ) + if ( i_eccAttns & MAINT_UE ) { - // Before starting the next command, set iv_mark in the steer mux. - /* TODO: RTC 189221 - o_rc = setSteerMux( iv_chip, iv_rank, iv_mark ); + PRDF_TRAC( "[DsdEvent] UE Detected: 0x%08x,0x%02x", + iv_chip->getHuid(), getKey() ); + + io_sc.service_data->setSignature( iv_chip->getHuid(), + PRDFSIG_MaintUE ); + + // At this point we don't actually have an address for the UE. The + // best we can do is get the address in which the command stopped. + MemAddr addr; + o_rc = getMemMaintAddr( iv_chip, addr ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "getMemMaintAddr(0x%08x) failed", + iv_chip->getHuid() ); + break; + } + + o_rc = MemEcc::handleMemUe( iv_chip, addr, + UE_TABLE::SCRUB_UE, io_sc ); if ( SUCCESS != o_rc ) { - PRDF_ERR( PRDF_FUNC "setSteerMux(0x%08x,0x%2x) failed", + PRDF_ERR( PRDF_FUNC "handleMemUe(0x%08x,0x%02x) failed", iv_chip->getHuid(), getKey() ); break; } - */ - break; // Nothing to analyze yet. + // Leave the mark in place and abort this procedure. + o_done = true; break; } - // TODO: RTC 189221 finish supporting this function. + if ( i_eccAttns & MAINT_RCE_ETE ) + { + io_sc.service_data->setSignature( iv_chip->getHuid(), + PRDFSIG_MaintRETRY_CTE ); + + // Add the rank to the callout list. + MemoryMru mm { iv_chip->getTrgt(), iv_rank, + MemoryMruData::CALLOUT_RANK }; + io_sc.service_data->SetCallout( mm ); - // At this point, we are done with the procedure. - o_done = true; + // Make the error log predictive. + io_sc.service_data->setServiceCall(); + + // Don't abort continue the procedure. + } } while (0); - if ( (SUCCESS == o_rc) && o_done ) + return o_rc; + + #undef PRDF_FUNC +} + +//------------------------------------------------------------------------------ + +template<> +uint32_t DsdEvent::verifySpare( const uint32_t & i_eccAttns, + STEP_CODE_DATA_STRUCT & io_sc, + bool & o_done ) +{ + #define PRDF_FUNC "[DsdEvent::verifySpare] " + + uint32_t o_rc = SUCCESS; + + do { - // Clear the ECC FFDC for this master rank. - MemDbUtils::resetEccFfdc( iv_chip, iv_rank, MASTER_RANK ); - } + if ( TD_PHASE_1 != iv_phase ) break; // nothing to do + + // Because of the Centaur workarounds, we will only do one phase for + // DRAM sparing. In that case, we will not look for an MCE because it is + // very likely for those to occur on phase 1. Instead, we will assume + // the spare is good if the command reached the end of the rank without + // error (i.e. a UE). + + bool lastAddr = false; + o_rc = didCmdStopOnLastAddr( iv_chip, MASTER_RANK, lastAddr ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "didCmdStopOnLastAddr(0x%08x) failed", + iv_chip->getHuid() ); + break; + } + + // It is important to initialize iv_canResumeScrub here, so that we will + // know to resume the current phase in startNextPhase() instead of + // starting the next phase. + iv_canResumeScrub = !lastAddr; + + if ( lastAddr ) + { + PRDF_TRAC( "[DsdEvent] DRAM spare applied successfully: " + "0x%08x,0x%02x", iv_chip->getHuid(), getKey() ); + + io_sc.service_data->setSignature( iv_chip->getHuid(), + PRDFSIG_DsdDramSpared ); + // Remove the chip mark. + o_rc = MarkStore::clearChipMark( iv_chip, iv_rank ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "clearChipMark(0x%08x,0x%02x) failed", + iv_chip->getHuid(), getKey() ); + break; + } + + // At this point the procedure is complete. + o_done = true; + } + + } while (0); return o_rc; #undef PRDF_FUNC } -//############################################################################## -// -// Specializations for MBA -// -//############################################################################## +//------------------------------------------------------------------------------ template<> uint32_t DsdEvent::startCmd() @@ -112,12 +190,26 @@ uint32_t DsdEvent::startCmd() stopCond |= mss_MaintCmd::STOP_ON_UE; stopCond |= mss_MaintCmd::STOP_IMMEDIATE; - // Start the time based scrub procedure on this master rank. - o_rc = startTdScrub( iv_chip, iv_rank, MASTER_RANK, stopCond ); - if ( SUCCESS != o_rc ) + if ( iv_canResumeScrub ) { - PRDF_ERR( PRDF_FUNC "startTdScrub(0x%08x,0x%2x) failed", - iv_chip->getHuid(), getKey() ); + // Resume the command from the next address to the end of this master + // rank. + o_rc = resumeTdScrub( iv_chip, MASTER_RANK, stopCond ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "resumeTdScrub(0x%08x) failed", + iv_chip->getHuid() ); + } + } + else + { + // Start the time based scrub procedure on this master rank. + o_rc = startTdScrub( iv_chip, iv_rank, MASTER_RANK, stopCond); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "startTdScrub(0x%08x,0x%2x) failed", + iv_chip->getHuid(), getKey() ); + } } return o_rc; @@ -127,5 +219,40 @@ uint32_t DsdEvent::startCmd() //------------------------------------------------------------------------------ +template<> +uint32_t DsdEvent::startNextPhase( STEP_CODE_DATA_STRUCT & io_sc ) +{ + uint32_t signature = 0; + + if ( iv_canResumeScrub ) + { + signature = PRDFSIG_DsdResume; + + PRDF_TRAC( "[DsdEvent] Resuming DSD Phase %d: 0x%08x,0x%02x", + iv_phase, iv_chip->getHuid(), getKey() ); + } + else + { + switch ( iv_phase ) + { + case TD_PHASE_0: + iv_phase = TD_PHASE_1; + signature = PRDFSIG_StartVcmPhase1; + break; + + default: PRDF_ASSERT( false ); // invalid phase + } + + PRDF_TRAC( "[DsdEvent] Starting DSD Phase %d: 0x%08x,0x%02x", + iv_phase, iv_chip->getHuid(), getKey() ); + } + + io_sc.service_data->AddSignatureList( iv_chip->getTrgt(), signature ); + + return startCmd(); +} + +//------------------------------------------------------------------------------ + } // end namespace PRDF diff --git a/src/usr/diag/prdf/plat/pegasus/prdfCenMbaTdCtlr_ipl.C b/src/usr/diag/prdf/plat/pegasus/prdfCenMbaTdCtlr_ipl.C deleted file mode 100644 index 414717a1b..000000000 --- a/src/usr/diag/prdf/plat/pegasus/prdfCenMbaTdCtlr_ipl.C +++ /dev/null @@ -1,437 +0,0 @@ -/* IBM_PROLOG_BEGIN_TAG */ -/* This is an automatically generated prolog. */ -/* */ -/* $Source: src/usr/diag/prdf/plat/pegasus/prdfCenMbaTdCtlr_ipl.C $ */ -/* */ -/* OpenPOWER HostBoot Project */ -/* */ -/* Contributors Listed Below - COPYRIGHT 2014,2018 */ -/* [+] International Business Machines Corp. */ -/* */ -/* */ -/* Licensed under the Apache License, Version 2.0 (the "License"); */ -/* you may not use this file except in compliance with the License. */ -/* You may obtain a copy of the License at */ -/* */ -/* http://www.apache.org/licenses/LICENSE-2.0 */ -/* */ -/* Unless required by applicable law or agreed to in writing, software */ -/* distributed under the License is distributed on an "AS IS" BASIS, */ -/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */ -/* implied. See the License for the specific language governing */ -/* permissions and limitations under the License. */ -/* */ -/* IBM_PROLOG_END_TAG */ - -// The following is required because PRD implements its own version of these -// hardware procedures: -// $Id: mss_scrub.H,v 1.4 2013/12/02 15:00:03 bellows Exp $ -// $Id: mss_scrub.C,v 1.10 2014/03/11 19:06:19 gollub Exp $ - -#include - -// Framework includes -#include -#include -#include -#include -#include -#include - -// Pegasus includes -#include -#include -#include -#include -#include -#include - -// Custom compile configs -#include - -using namespace TARGETING; - -namespace PRDF -{ - -using namespace PlatServices; - -//------------------------------------------------------------------------------ -// Class Variables -//------------------------------------------------------------------------------ - -CenMbaTdCtlr::FUNCS CenMbaTdCtlr::cv_cmdCompleteFuncs[] = -{ - &CenMbaTdCtlr::analyzeCmdComplete, // NO_OP - &CenMbaTdCtlr::analyzeVcmPhase1, // VCM_PHASE_1 - &CenMbaTdCtlr::analyzeVcmPhase2, // VCM_PHASE_2 - &CenMbaTdCtlr::analyzeDsdPhase1, // DSD_PHASE_1 - &CenMbaTdCtlr::analyzeDsdPhase2, // DSD_PHASE_2 - &CenMbaTdCtlr::analyzeTpsPhase1, // TPS_PHASE_1 - &CenMbaTdCtlr::analyzeTpsPhase2, // TPS_PHASE_2 -}; - -//------------------------------------------------------------------------------ -// Public Functions -//------------------------------------------------------------------------------ - -int32_t CenMbaTdCtlr::handleCmdCompleteEvent( STEP_CODE_DATA_STRUCT & io_sc ) -{ - //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - // Moved to MemTdCtlr class - //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - return SUCCESS; -} - -//------------------------------------------------------------------------------ - -int32_t CenMbaTdCtlr::handleTdEvent( STEP_CODE_DATA_STRUCT & io_sc, - const CenRank & i_rank, - const CenMbaTdCtlrCommon::TdType i_event, - bool i_banTps ) -{ - //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - // Moved to MemTdCtlr class - //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - return SUCCESS; -} - -//------------------------------------------------------------------------------ - -int32_t CenMbaTdCtlr::startInitialBgScrub() -{ - //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - // Moved to startBgScrub() in prdfPlatServices.C - //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - return SUCCESS; -} - -//------------------------------------------------------------------------------ -// Private Functions -//------------------------------------------------------------------------------ - -int32_t CenMbaTdCtlr::initialize() -{ - //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - // Moved to MemTdCtlr class - //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - return SUCCESS; -} - -//------------------------------------------------------------------------------ - -int32_t CenMbaTdCtlr::analyzeCmdComplete( STEP_CODE_DATA_STRUCT & io_sc, - const CenAddr & i_stopAddr, - const CenAddr & i_endAddr ) -{ - //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - // Moved to MemTdCtlr class - //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - return SUCCESS; -} - -//------------------------------------------------------------------------------ - -int32_t CenMbaTdCtlr::analyzeVcmPhase1( STEP_CODE_DATA_STRUCT & io_sc, - const CenAddr & i_stopAddr, - const CenAddr & i_endAddr ) -{ - //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - // moved to VcmEvent class - //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - return SUCCESS; -} - -//------------------------------------------------------------------------------ - -int32_t CenMbaTdCtlr::analyzeVcmPhase2( STEP_CODE_DATA_STRUCT & io_sc, - const CenAddr & i_stopAddr, - const CenAddr & i_endAddr ) -{ - //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - // moved to VcmEvent class - //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - return SUCCESS; -} - -//------------------------------------------------------------------------------ - -int32_t CenMbaTdCtlr::analyzeDsdPhase1( STEP_CODE_DATA_STRUCT & io_sc, - const CenAddr & i_stopAddr, - const CenAddr & i_endAddr ) -{ - #define PRDF_FUNC "[CenMbaTdCtlr::analyzeDsdPhase1] " - - int32_t o_rc = SUCCESS; - - do - { - if ( DSD_PHASE_1 != iv_tdState ) - { - PRDF_ERR( PRDF_FUNC "Invalid state machine configuration" ); - o_rc = FAIL; break; - } - - // Add the mark to the callout list. - CalloutUtil::calloutMark( iv_mbaTrgt, iv_rank, iv_mark, io_sc ); - - // Get error condition which caused command to stop - uint16_t eccErrorMask = NO_ERROR; - o_rc = checkEccErrors( eccErrorMask, io_sc ); - if ( SUCCESS != o_rc) - { - PRDF_ERR( PRDF_FUNC "checkEccErrors() failed" ); - break; - } - - if ( ( eccErrorMask & UE) || ( eccErrorMask & RETRY_CTE ) ) - { - // Handle UE. Highest priority - o_rc = handleUE( io_sc ); - if ( SUCCESS != o_rc ) - { - PRDF_ERR( PRDF_FUNC "handleUE() failed" ); - break; - } - } - else - { - // Start DSD Phase 2 - o_rc = startDsdPhase2( io_sc ); - if ( SUCCESS != o_rc ) - { - PRDF_ERR( PRDF_FUNC "startDsdPhase2() failed" ); - break; - } - } - - } while(0); - - return o_rc; - - #undef PRDF_FUNC -} - -//------------------------------------------------------------------------------ - -int32_t CenMbaTdCtlr::analyzeDsdPhase2( STEP_CODE_DATA_STRUCT & io_sc, - const CenAddr & i_stopAddr, - const CenAddr & i_endAddr ) -{ - #define PRDF_FUNC "[CenMbaTdCtlr::analyzeDsdPhase2] " - - int32_t o_rc = SUCCESS; - - do - { - if ( DSD_PHASE_2 != iv_tdState ) - { - PRDF_ERR( PRDF_FUNC "Invalid state machine configuration" ); - o_rc = FAIL; break; - } - - // Add the mark to the callout list. - CalloutUtil::calloutMark( iv_mbaTrgt, iv_rank, iv_mark, io_sc ); - - // Get error condition which caused command to stop - uint16_t eccErrorMask = NO_ERROR; - o_rc = checkEccErrors( eccErrorMask, io_sc ); - if ( SUCCESS != o_rc ) - { - PRDF_ERR( PRDF_FUNC "checkEccErrors() failed" ); - break; - } - - if ( eccErrorMask & UE) - { - // Handle UE. Highest priority - o_rc = handleUE( io_sc ); - if ( SUCCESS != o_rc ) - { - PRDF_ERR( PRDF_FUNC "handleUE() failed" ); - break; - } - } - else if ( eccErrorMask & MCE ) - { - // The spare is bad. - - // Do callouts and VPD updates. - o_rc = handleMCE_DSD2( io_sc ); - if ( SUCCESS != o_rc ) - { - PRDF_ERR( PRDF_FUNC "handleMCE_DSD2() failed" ); - break; - } - } - else - { - // The chip mark has successfully been steered to the spare. - - setTdSignature( io_sc, PRDFSIG_DsdDramSpared ); - - // Remove chip mark from hardware. - iv_mark.clearCM(); - bool blocked; // not possible during MDIA - o_rc = mssSetMarkStore( iv_mbaTrgt, iv_rank, iv_mark, blocked ); - if ( SUCCESS != o_rc ) - { - PRDF_ERR( PRDF_FUNC "mssSetMarkStore() failed" ); - break; - } - } - - iv_tdState = NO_OP; // The TD procedure is complete. - - } while(0); - - return o_rc; - - #undef PRDF_FUNC -} - -//------------------------------------------------------------------------------ - -int32_t CenMbaTdCtlr::analyzeTpsPhase1( STEP_CODE_DATA_STRUCT & io_sc, - const CenAddr & i_stopAddr, - const CenAddr & i_endAddr ) -{ - //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - // Moved to TpsEvent class - //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - return SUCCESS; -} - -//------------------------------------------------------------------------------ - -int32_t CenMbaTdCtlr::analyzeTpsPhase2( STEP_CODE_DATA_STRUCT & io_sc, - const CenAddr & i_stopAddr, - const CenAddr & i_endAddr ) -{ - //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - // Moved to TpsEvent class - //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - return SUCCESS; -} - -//------------------------------------------------------------------------------ - -int32_t CenMbaTdCtlr::startVcmPhase1( STEP_CODE_DATA_STRUCT & io_sc ) -{ - //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - // moved to VcmEvent class - //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - return SUCCESS; -} - -//------------------------------------------------------------------------------ - -int32_t CenMbaTdCtlr::startVcmPhase2( STEP_CODE_DATA_STRUCT & io_sc ) -{ - //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - // moved to VcmEvent class - //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - return SUCCESS; -} - -//------------------------------------------------------------------------------ - -int32_t CenMbaTdCtlr::startDsdPhase1( STEP_CODE_DATA_STRUCT & io_sc ) -{ - //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - // Moved to DsdEvent class - //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - return SUCCESS; -} - -//------------------------------------------------------------------------------ - -int32_t CenMbaTdCtlr::startDsdPhase2( STEP_CODE_DATA_STRUCT & io_sc ) -{ - //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - // Moved to DsdEvent class - //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - return SUCCESS; -} - -//------------------------------------------------------------------------------ - -int32_t CenMbaTdCtlr::startTpsPhase1( STEP_CODE_DATA_STRUCT & io_sc ) -{ - //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - // Moved to TpsEvent class - //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - return SUCCESS; -} - -//------------------------------------------------------------------------------ - -int32_t CenMbaTdCtlr::startTpsPhase2( STEP_CODE_DATA_STRUCT & io_sc ) -{ - //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - // Moved to TpsEvent class - //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - return SUCCESS; -} - -//------------------------------------------------------------------------------ - -int32_t CenMbaTdCtlr::handleUE( STEP_CODE_DATA_STRUCT & io_sc ) -{ - //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - // moved to MemEcc::handleMemUe() - //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - return SUCCESS; -} - -//------------------------------------------------------------------------------ - -int32_t CenMbaTdCtlr::handleMPE( STEP_CODE_DATA_STRUCT & io_sc ) -{ - //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - // moved to MemEcc::handleMpe() - //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - return SUCCESS; -} - -//------------------------------------------------------------------------------ - -int32_t CenMbaTdCtlr::handleMnfgCeEte( STEP_CODE_DATA_STRUCT & io_sc ) -{ - //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - // Moved to TpsEvent class - //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - return SUCCESS; -} - -//------------------------------------------------------------------------------ - -int32_t CenMbaTdCtlr::signalMdiaCmdComplete() -{ - //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - // Moved to MemTdCtlr class - //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - return SUCCESS; -} - -} // end namespace PRDF - diff --git a/src/usr/diag/prdf/plat/pegasus/prdfCenMbaTdCtlr_ipl.H b/src/usr/diag/prdf/plat/pegasus/prdfCenMbaTdCtlr_ipl.H deleted file mode 100644 index 4e49b2024..000000000 --- a/src/usr/diag/prdf/plat/pegasus/prdfCenMbaTdCtlr_ipl.H +++ /dev/null @@ -1,163 +0,0 @@ -/* IBM_PROLOG_BEGIN_TAG */ -/* This is an automatically generated prolog. */ -/* */ -/* $Source: src/usr/diag/prdf/plat/pegasus/prdfCenMbaTdCtlr_ipl.H $ */ -/* */ -/* OpenPOWER HostBoot Project */ -/* */ -/* Contributors Listed Below - COPYRIGHT 2014,2018 */ -/* [+] International Business Machines Corp. */ -/* */ -/* */ -/* Licensed under the Apache License, Version 2.0 (the "License"); */ -/* you may not use this file except in compliance with the License. */ -/* You may obtain a copy of the License at */ -/* */ -/* http://www.apache.org/licenses/LICENSE-2.0 */ -/* */ -/* Unless required by applicable law or agreed to in writing, software */ -/* distributed under the License is distributed on an "AS IS" BASIS, */ -/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */ -/* implied. See the License for the specific language governing */ -/* permissions and limitations under the License. */ -/* */ -/* IBM_PROLOG_END_TAG */ - -/** @file prdfCenMbaTdCtlr_ipl.H - * @brief The Hostboot implementation of the MBA TD Controller. - */ - -#ifndef __prdfCenMbaTdCtlr_ipl_H -#define __prdfCenMbaTdCtlr_ipl_H - -#include - -namespace PRDF -{ - -/** - * @brief A state machine for memory targeted diagnostics during Hostboot MDIA. - */ -class CenMbaTdCtlr : public CenMbaTdCtlrCommon -{ - private: // constants, enums - - // Function pointers for maintenance command complete events. - typedef int32_t (CenMbaTdCtlr::*FUNCS)( STEP_CODE_DATA_STRUCT & io_sc, - const CenAddr & i_stopAddr, - const CenAddr & i_endAddr ); - - public: // functions - - /** - * @brief Constructor - * - * This contructor will be called in the MBA data bundle code. Therefore, - * no register reads/writes can be done in this constructor. Anything needed - * to initialize the instance variables that requires register reads/writes - * or is non-trivial should be done in initialize(). - * - * @param i_mbaChip An MBA chip. - */ - explicit CenMbaTdCtlr( ExtensibleChip * i_mbaChip ) : - CenMbaTdCtlrCommon(i_mbaChip) - {} - - public: // Overloaded functions - - int32_t handleCmdCompleteEvent( STEP_CODE_DATA_STRUCT & io_sc ); - int32_t handleTdEvent( STEP_CODE_DATA_STRUCT & io_sc, - const CenRank & i_rank, const TdType i_event, - bool i_banTps = false ); - - private: // Overloaded functions - - int32_t initialize(); - - int32_t analyzeCmdComplete( STEP_CODE_DATA_STRUCT & io_sc, - const CenAddr & i_stopAddr, - const CenAddr & i_endAddr ); - int32_t analyzeVcmPhase1( STEP_CODE_DATA_STRUCT & io_sc, - const CenAddr & i_stopAddr, - const CenAddr & i_endAddr ); - int32_t analyzeVcmPhase2( STEP_CODE_DATA_STRUCT & io_sc, - const CenAddr & i_stopAddr, - const CenAddr & i_endAddr ); - int32_t analyzeDsdPhase1( STEP_CODE_DATA_STRUCT & io_sc, - const CenAddr & i_stopAddr, - const CenAddr & i_endAddr ); - int32_t analyzeDsdPhase2( STEP_CODE_DATA_STRUCT & io_sc, - const CenAddr & i_stopAddr, - const CenAddr & i_endAddr ); - int32_t analyzeTpsPhase1( STEP_CODE_DATA_STRUCT & io_sc, - const CenAddr & i_stopAddr, - const CenAddr & i_endAddr ); - int32_t analyzeTpsPhase2( STEP_CODE_DATA_STRUCT & io_sc, - const CenAddr & i_stopAddr, - const CenAddr & i_endAddr ); - - int32_t startVcmPhase1( STEP_CODE_DATA_STRUCT & io_sc ); - int32_t startVcmPhase2( STEP_CODE_DATA_STRUCT & io_sc ); - int32_t startDsdPhase1( STEP_CODE_DATA_STRUCT & io_sc ); - int32_t startDsdPhase2( STEP_CODE_DATA_STRUCT & io_sc ); - int32_t startTpsPhase1( STEP_CODE_DATA_STRUCT & io_sc ); - int32_t startTpsPhase2( STEP_CODE_DATA_STRUCT & io_sc ); - - public: // functions - - /** - * @brief Start the initial background scrub. This is done at the very end - * of Hostboot after the last call to checkForIplAttns(). - * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. - */ - int32_t startInitialBgScrub(); - - private: // functions - - /** - * @brief Handle UEs during TD analysis. - * @param io_sc Service data collector. - * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. - */ - int32_t handleUE( STEP_CODE_DATA_STRUCT & io_sc ); - - /** - * @brief Handle MPE event - * @param io_sc Service data collector. - * @note This will start VCM phase 1. - * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. - */ - int32_t handleMPE( STEP_CODE_DATA_STRUCT & io_sc ); - - /** - * @brief Handle CE ETE during MNFG CE analysis. - * @param io_sc Service data collector. - * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. - */ - int32_t handleMnfgCeEte( STEP_CODE_DATA_STRUCT & io_sc ); - - /** - * @brief Sends a message to MDIA that a maintenance command has completed. - * @note If for some reason PRD needed to do some targeted diagnotics and - * on a rank that was not the last rank behind the MBA, this - * function will need to send a message to MDIA indicating that the - * command stopped and MDIA will need to restart the pattern testing - * from the next address to the end of memory. - * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. - */ - int32_t signalMdiaCmdComplete(); - - private: // instance variables - - /** Array of functions pointers for TD controller states. This is used to - * determine the next course of action after a maintenance command complete - * attention. - */ - static FUNCS cv_cmdCompleteFuncs[MAX_TD_STATE]; - -}; // CenMbaTdCtlr - -} // end namespace PRDF - -#endif // __prdfCenMbaTdCtlr_ipl_H - -- cgit v1.2.1