diff options
author | Zane Shelley <zshelle@us.ibm.com> | 2018-02-21 21:10:58 -0600 |
---|---|---|
committer | Zane C. Shelley <zshelle@us.ibm.com> | 2018-02-23 11:14:41 -0500 |
commit | 5324435b6d271ec3f4f387109f13d84e1e0c2c2b (patch) | |
tree | a327e02f4b72a290abc83f711efca2444ae826b1 /src/usr/diag/prdf/plat/mem | |
parent | a681d519d4dc3d01b14df22ffa79c4e6bcfdec96 (diff) | |
download | talos-hostboot-5324435b6d271ec3f4f387109f13d84e1e0c2c2b.tar.gz talos-hostboot-5324435b6d271ec3f4f387109f13d84e1e0c2c2b.zip |
PRD: initializing MemTdCtlr variables for broadcast mode
Change-Id: I4982ef4c27f694e35471cad3e3ec57eedfa2632e
CQ: SW418155
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/54548
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com>
Reviewed-by: Benjamin J. Weisenbeck <bweisenb@us.ibm.com>
Reviewed-by: Brian J. Stegmiller <bjs@us.ibm.com>
Reviewed-by: Caleb N. Palmer <cnpalmer@us.ibm.com>
Reviewed-by: Zane C. Shelley <zshelle@us.ibm.com>
Diffstat (limited to 'src/usr/diag/prdf/plat/mem')
-rw-r--r-- | src/usr/diag/prdf/plat/mem/prdfMemTdCtlr.C | 58 | ||||
-rw-r--r-- | src/usr/diag/prdf/plat/mem/prdfMemTdCtlr.H | 92 | ||||
-rw-r--r-- | src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_ipl.C | 30 | ||||
-rw-r--r-- | src/usr/diag/prdf/plat/mem/prdfRestoreDramRepairs.C | 32 |
4 files changed, 101 insertions, 111 deletions
diff --git a/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr.C b/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr.C index c78e71571..ba1eadd83 100644 --- a/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr.C +++ b/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr.C @@ -86,8 +86,6 @@ uint32_t MemTdCtlr<T>::handleCmdComplete( STEP_CODE_DATA_STRUCT & io_sc ) do { - #ifdef __HOSTBOOT_RUNTIME - // Make sure the TD controller is initialized. o_rc = initialize(); if ( SUCCESS != o_rc ) @@ -96,7 +94,7 @@ uint32_t MemTdCtlr<T>::handleCmdComplete( STEP_CODE_DATA_STRUCT & io_sc ) break; } - #else // IPL only + #ifndef __HOSTBOOT_RUNTIME // IPL only // TODO: RTC 179251 asserting here doesn't give us enough FFDC to debug // why we got this erroneous attention. Eventually, we will want @@ -142,36 +140,6 @@ uint32_t MemTdCtlr<T>::handleCmdComplete( STEP_CODE_DATA_STRUCT & io_sc ) // informational error logs. if ( !errorsFound ) io_sc.service_data->setDontCommitErrl(); } - else - { - // Make sure iv_stoppedRank still gets updated. - std::vector<ExtensibleChip *> portList; - o_rc = getMcbistMaintPort( iv_chip, portList ); - if ( SUCCESS != o_rc ) - { - PRDF_ERR( PRDF_FUNC "getMcbistMaintPort(0x%08x) failed", - iv_chip->getHuid() ); - break; - } - - // In broadcast mode, the rank configuration for all ports will be - // the same. In non-broadcast mode, there will only be one MCA in - // the list. Therefore, we can simply use the first MCA in the list - // for all configs. - ExtensibleChip * stopChip = portList.front(); - - // Get the address in which the command stopped. - MemAddr addr; - o_rc = getMemMaintAddr<T>( iv_chip, addr ); - if ( SUCCESS != o_rc ) - { - PRDF_ERR( PRDF_FUNC "getMemMaintAddr<T>(0x%08x) failed", - iv_chip->getHuid() ); - break; - } - - iv_stoppedRank = __getStopRank<TYPE_MCA>( stopChip, addr ); - } // Move onto the next step in the state machine. o_rc = nextStep( io_sc ); @@ -188,7 +156,10 @@ uint32_t MemTdCtlr<T>::handleCmdComplete( STEP_CODE_DATA_STRUCT & io_sc ) // successfully with no errors because the error log will not be // committed. if ( !io_sc.service_data->queryDontCommitErrl() ) + { + collectStateCaptureData( io_sc, TD_CTLR_DATA::END ); MemCaptureData::addEccData<T>( iv_chip, io_sc ); + } if ( SUCCESS != o_rc ) { @@ -219,10 +190,6 @@ uint32_t MemTdCtlr<T>::handleCmdComplete( STEP_CODE_DATA_STRUCT & io_sc ) #endif } - else - { - collectStateCaptureData( io_sc, TD_CTLR_DATA::END ); - } return o_rc; @@ -249,9 +216,6 @@ template<TARGETING::TYPE T> uint32_t __analyzeCmdComplete( ExtensibleChip * i_chip, TdQueue & io_queue, TdRankListEntry & o_stoppedRank, - #ifndef __HOSTBOOT_RUNTIME - bool & o_broadcastMode, - #endif const MemAddr & i_addr, bool & o_errorsFound, STEP_CODE_DATA_STRUCT & io_sc ); @@ -260,9 +224,6 @@ template<> uint32_t __analyzeCmdComplete<TYPE_MCBIST>( ExtensibleChip * i_chip, TdQueue & io_queue, TdRankListEntry & o_stoppedRank, - #ifndef __HOSTBOOT_RUNTIME - bool & o_broadcastMode, - #endif const MemAddr & i_addr, bool & o_errorsFound, STEP_CODE_DATA_STRUCT & io_sc ) @@ -294,11 +255,6 @@ uint32_t __analyzeCmdComplete<TYPE_MCBIST>( ExtensibleChip * i_chip, // Update iv_stoppedRank. o_stoppedRank = __getStopRank<TYPE_MCA>( stopChip, i_addr ); - #ifndef __HOSTBOOT_RUNTIME - // Update iv_broadcastMode. - o_broadcastMode = ( 1 < portList.size() ); - #endif - // Check each MCA for ECC errors. for ( auto & mcaChip : portList ) { @@ -330,9 +286,6 @@ template<> uint32_t __analyzeCmdComplete<TYPE_MBA>( ExtensibleChip * i_chip, TdQueue & io_queue, TdRankListEntry & o_stoppedRank, - #ifndef __HOSTBOOT_RUNTIME - bool & o_broadcastMode, - #endif const MemAddr & i_addr, bool & o_errorsFound, STEP_CODE_DATA_STRUCT & io_sc ) @@ -371,9 +324,6 @@ uint32_t MemTdCtlr<T>::analyzeCmdComplete( bool & o_errorsFound, // Then, check for ECC errors, if they exist. o_rc = __analyzeCmdComplete<T>( iv_chip, iv_queue, iv_stoppedRank, - #ifndef __HOSTBOOT_RUNTIME - iv_broadcastMode, - #endif addr, o_errorsFound, io_sc ); if ( SUCCESS != o_rc ) { diff --git a/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr.H b/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr.H index 8c0ea9df2..3f3f354fc 100644 --- a/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr.H +++ b/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr.H @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2016,2017 */ +/* Contributors Listed Below - COPYRIGHT 2016,2018 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -57,25 +57,23 @@ class MemTdCtlr * This contructor will only be called in the MCBIST or MBA data bundle, * which already checks for a valid type. * + * Need to initialize iv_stoppedRank to a valid entry in iv_rankList. Use + * the last entry in the list so that the 'next' rank is the first entry + * in the list. + * * @param i_chip An MCBIST or MBA chip. */ explicit MemTdCtlr( ExtensibleChip * i_chip ) : iv_chip( i_chip ), iv_rankList( i_chip ), - iv_stoppedRank( i_chip, MemRank(0) ) + iv_stoppedRank( iv_rankList.getList().back() ) { PRDF_ASSERT( T == iv_chip->getType() ); - if ( TARGETING::TYPE_MCBIST == i_chip->getType() ) - { - ExtensibleChip * mcaChip = - PlatServices::getConnectedChild(i_chip, TARGETING::TYPE_MCA, 0); - iv_stoppedRank = TdRankListEntry( mcaChip, MemRank(0) ); - } } /** * @brief Determines and executes the next course of action after a * maintenance command complete attention. - * @note Initializes the TD controller, if needed (runtime only). + * @note Initializes the TD controller, if needed. * @param io_sc The step code data struct. * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. */ @@ -97,8 +95,8 @@ class MemTdCtlr * requests will be ignored. Any chip marks placed during this time will be * redetected when the runtime TD controller is initialized. * - * During IPL, this will simply add a new procedure to the queue, since we - * know TD will already be in progress when this is called. + * During MemDiags, this will simply add a new procedure to the queue, since + * we know a TD procedure will already be in progress when this is called. * * @note Initializes the TD controller, if needed. * @param io_sc The step code data struct. @@ -118,14 +116,23 @@ class MemTdCtlr /** * @brief Handles reset-reload or FO scenario. - * @note This function will check if PRD was unable to restart maintenance - * command before R/R or FO. In that scenario, this function will - * start maintenance command. As during R/R or F/O we do not have any - * mechanism to restore the complete state of TD controller, we will - * not start any interrupted or pending TD procedure. We will only - * start BG scrub. If we found any chip marks during TD state - * machine initialize we will start VCM procedure rather than - * BG scrub. + * + * This does not call initialize() or start any maintenance commands. + * Instead, it checks the hardware's current state and ensures by the end of + * the function that either a command is currently running or there will be + * a command complete attention pending that PRD will handle separately. + * + * If there is already an active command complete attention. This function + * does nothing because PRD will handle the attention soon. + * + * If there is no active command complete attention and there is no command + * currently in progress, it will set the command complete attention and PRD + * will handle that attention soon. + * + * Otherwise, there is a command in progress. So, it will check for any + * unverified chip marks. If any exist, it will force the current command to + * stop, causing a command complete attention that PRD will handle soon. + * * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. */ uint32_t handleRrFo(); @@ -135,6 +142,24 @@ class MemTdCtlr private: /** + * @brief Initializes the TD controller, if needed. + * + * This should be called at the beginning of every public function to ensure + * the TD controller is initialized. + * + * During MemDiags, this initializes iv_broadcastModeCapable. + * + * At runtime, this is used to query hardware for any unverified chip marks + * that may have occurred after starting background scrubbing, but before + * PRD is up and running. We may also have unverified chip marks if the HBRT + * service is stopped and restarted (PRD is reinitialize and all previous + * state machine data is lost). + * + * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. + */ + uint32_t initialize(); + + /** * @brief This is called when there are no more TD procedures to execute. * * During Memory Diagnostics, this means the current pattern test command @@ -198,8 +223,8 @@ class MemTdCtlr /** * @brief This is called when handling a command complete attention for a - * non-TD command to initialize iv_stoppedRank and iv_broadcastMode - * then check for any ECC errors. + * non-TD command to initialize iv_stoppedRank then check for any + * ECC errors. * @param o_errorsFound True if errors where found and handled. False * otherwise. * @param io_sc The step code data struct. @@ -237,22 +262,6 @@ class MemTdCtlr */ uint32_t unmaskEccAttns(); - /** - * @brief Initializes the TD controller, if needed. - * - * This is only supported during runtime. This is mostly useful at runtime - * to query hardware for any unverified chip marks. Those may occur after - * starting background scrubbing, but before PRD is up and running. We may - * also have unverified chip marks if the HBRT service is stopped and - * restarted (PRD is reinitialize and all previous state machine data is - * lost). - * - * @note Should be called at the beginning of every public function to - * ensure the TD controller is initialized. - * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. - */ - uint32_t initialize(); - #endif private: // instance variables @@ -276,11 +285,11 @@ class MemTdCtlr * diagnostics are complete. */ TdRankListEntry iv_stoppedRank; - #ifdef __HOSTBOOT_RUNTIME - /** True if the TD controller has been initialized. False otherwise. */ bool iv_initialized = false; + #ifdef __HOSTBOOT_RUNTIME + /** True if background scrubbing should be resumed after pausing on error. * False if a TD procedure had been executed and background scrubbing needs * to be restarted with a new command. */ @@ -288,9 +297,8 @@ class MemTdCtlr #else // IPL only - /** Combined with iv_stoppedRank. Indicates if the non-TD command that - * stopped was in broadcast mode or not. */ - bool iv_broadcastMode = false; + /** Indicates if broadcast mode is capable on iv_chip. */ + bool iv_broadcastModeCapable = false; #endif diff --git a/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_ipl.C b/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_ipl.C index e6d769b6c..7caba811b 100644 --- a/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_ipl.C +++ b/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_ipl.C @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2016,2017 */ +/* Contributors Listed Below - COPYRIGHT 2016,2018 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -65,6 +65,32 @@ uint32_t MemTdCtlr<T>::handleTdEvent( STEP_CODE_DATA_STRUCT & io_sc, //------------------------------------------------------------------------------ template <TARGETING::TYPE T> +uint32_t MemTdCtlr<T>::initialize() +{ + #define PRDF_FUNC "[MemTdCtlr::initialize] " + + uint32_t o_rc = SUCCESS; + + do + { + if ( iv_initialized ) break; // nothing to do + + // Check if broadcast mode is capable on this chip. + iv_broadcastModeCapable = isBroadcastModeCapable<T>( iv_chip ); + + // At this point, the TD controller is initialized. + iv_initialized = true; + + } while (0); + + return o_rc; + + #undef PRDF_FUNC +} + +//------------------------------------------------------------------------------ + +template <TARGETING::TYPE T> uint32_t MemTdCtlr<T>::defaultStep( STEP_CODE_DATA_STRUCT & io_sc ) { #define PRDF_FUNC "[MemTdCtlr::defaultStep] " @@ -72,7 +98,7 @@ uint32_t MemTdCtlr<T>::defaultStep( STEP_CODE_DATA_STRUCT & io_sc ) uint32_t o_rc = SUCCESS; TdRankListEntry nextRank = iv_rankList.getNext( iv_stoppedRank, - iv_broadcastMode ); + iv_broadcastModeCapable ); do { diff --git a/src/usr/diag/prdf/plat/mem/prdfRestoreDramRepairs.C b/src/usr/diag/prdf/plat/mem/prdfRestoreDramRepairs.C index 3c34320b3..1fc61fba6 100644 --- a/src/usr/diag/prdf/plat/mem/prdfRestoreDramRepairs.C +++ b/src/usr/diag/prdf/plat/mem/prdfRestoreDramRepairs.C @@ -54,6 +54,7 @@ namespace RDR // local utility functions to support PRDF::restoreDramRepairs() { // Creates and returns an error log. +template<TARGETING::TYPE T> errlHndl_t createErrl( uint32_t i_reasonCode, TargetHandle_t i_trgt, uint32_t i_signature ) { @@ -63,12 +64,18 @@ errlHndl_t createErrl( uint32_t i_reasonCode, TargetHandle_t i_trgt, // Note that the error log tags are not needed because PRD uses its own // signature parser. - return new ERRORLOG::ErrlEntry( + errlHndl_t errl = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_PREDICTIVE, // severity PRDF_RESTORE_DRAM_REPAIR, // module ID i_reasonCode, // reason code userdata12, // user data 1 & 2 userdata34 ); // user data 3 & 4 + + // Add capture data. Need to do this now before the DIMM callouts are made + // because the VPD is cleared if a DIMM is added to the callout list. + MemCaptureData::addEccData<T>( i_trgt, errl ); + + return errl; } //------------------------------------------------------------------------------ @@ -80,9 +87,6 @@ void commitErrl( errlHndl_t i_errl, TargetHandle_t i_trgt ) { if ( NULL != i_errl ) { - // Add capture data - MemCaptureData::addEccData<T>( i_trgt, i_errl ); - // Add traces i_errl->collectTrace( PRDF_COMP_NAME, 512 ); @@ -147,7 +151,7 @@ void commitSoftError( uint32_t i_reasonCode, TargetHandle_t i_trgt, { if ( i_analysisErrors ) { - errlHndl_t errl = createErrl( i_reasonCode, i_trgt, i_signature ); + errlHndl_t errl = createErrl<T>( i_reasonCode, i_trgt, i_signature ); errl->addProcedureCallout( HWAS::EPUB_PRC_LVL_SUPP, HWAS::SRCI_PRIORITY_HIGH ); commitErrl<T>( errl, i_trgt ); @@ -225,8 +229,9 @@ bool processRepairedRanks<TYPE_MCA>( TargetHandle_t i_trgt, if ( NULL == errl ) { - errl = createErrl( PRDF_DETECTED_FAIL_HARDWARE, i_trgt, - PRDFSIG_RdrRepairsUsed ); + errl = createErrl<TYPE_MCA>( PRDF_DETECTED_FAIL_HARDWARE, + i_trgt, + PRDFSIG_RdrRepairsUsed ); } std::vector<MemSymbol> symList; @@ -344,8 +349,9 @@ bool processRepairedRanks<TYPE_MBA>( TargetHandle_t i_trgt, if ( NULL == errl ) { - errl = createErrl( PRDF_DETECTED_FAIL_HARDWARE, i_mba, - PRDFSIG_RdrRepairsUsed ); + errl = createErrl<TYPE_MBA>( PRDF_DETECTED_FAIL_HARDWARE, + i_mba, + PRDFSIG_RdrRepairsUsed ); } std::vector<CenSymbol> list; @@ -425,8 +431,8 @@ bool processBadDimms<TYPE_MCA>( TargetHandle_t i_trgt, uint8_t i_badDimmMask ) { if ( NULL == errl ) { - errl = createErrl( PRDF_DETECTED_FAIL_HARDWARE, i_trgt, - PRDFSIG_RdrRepairUnavail ); + errl = createErrl<TYPE_MCA>( PRDF_DETECTED_FAIL_HARDWARE, + i_trgt, PRDFSIG_RdrRepairUnavail ); } __calloutDimm<TYPE_MCA, DIMMS_PER_RANK::MCA>( errl, i_trgt, dimm ); @@ -492,8 +498,8 @@ bool processBadDimms<TYPE_MBA>( TargetHandle_t i_trgt, uint8_t i_badDimmMask ) { if ( NULL == errl ) { - errl = createErrl( PRDF_DETECTED_FAIL_HARDWARE, i_mba, - PRDFSIG_RdrRepairUnavail ); + errl = createErrl<TYPE_MBA>( PRDF_DETECTED_FAIL_HARDWARE, i_mba, + PRDFSIG_RdrRepairUnavail ); } o_calloutMade = true; |