summaryrefslogtreecommitdiffstats
path: root/src/usr/diag/prdf/plat/mem
diff options
context:
space:
mode:
authorZane Shelley <zshelle@us.ibm.com>2018-02-21 21:10:58 -0600
committerZane C. Shelley <zshelle@us.ibm.com>2018-02-23 11:14:41 -0500
commit5324435b6d271ec3f4f387109f13d84e1e0c2c2b (patch)
treea327e02f4b72a290abc83f711efca2444ae826b1 /src/usr/diag/prdf/plat/mem
parenta681d519d4dc3d01b14df22ffa79c4e6bcfdec96 (diff)
downloadtalos-hostboot-5324435b6d271ec3f4f387109f13d84e1e0c2c2b.tar.gz
talos-hostboot-5324435b6d271ec3f4f387109f13d84e1e0c2c2b.zip
PRD: initializing MemTdCtlr variables for broadcast mode
Change-Id: I4982ef4c27f694e35471cad3e3ec57eedfa2632e CQ: SW418155 Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/54548 Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com> Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com> Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com> Reviewed-by: Benjamin J. Weisenbeck <bweisenb@us.ibm.com> Reviewed-by: Brian J. Stegmiller <bjs@us.ibm.com> Reviewed-by: Caleb N. Palmer <cnpalmer@us.ibm.com> Reviewed-by: Zane C. Shelley <zshelle@us.ibm.com>
Diffstat (limited to 'src/usr/diag/prdf/plat/mem')
-rw-r--r--src/usr/diag/prdf/plat/mem/prdfMemTdCtlr.C58
-rw-r--r--src/usr/diag/prdf/plat/mem/prdfMemTdCtlr.H92
-rw-r--r--src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_ipl.C30
-rw-r--r--src/usr/diag/prdf/plat/mem/prdfRestoreDramRepairs.C32
4 files changed, 101 insertions, 111 deletions
diff --git a/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr.C b/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr.C
index c78e71571..ba1eadd83 100644
--- a/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr.C
+++ b/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr.C
@@ -86,8 +86,6 @@ uint32_t MemTdCtlr<T>::handleCmdComplete( STEP_CODE_DATA_STRUCT & io_sc )
do
{
- #ifdef __HOSTBOOT_RUNTIME
-
// Make sure the TD controller is initialized.
o_rc = initialize();
if ( SUCCESS != o_rc )
@@ -96,7 +94,7 @@ uint32_t MemTdCtlr<T>::handleCmdComplete( STEP_CODE_DATA_STRUCT & io_sc )
break;
}
- #else // IPL only
+ #ifndef __HOSTBOOT_RUNTIME // IPL only
// TODO: RTC 179251 asserting here doesn't give us enough FFDC to debug
// why we got this erroneous attention. Eventually, we will want
@@ -142,36 +140,6 @@ uint32_t MemTdCtlr<T>::handleCmdComplete( STEP_CODE_DATA_STRUCT & io_sc )
// informational error logs.
if ( !errorsFound ) io_sc.service_data->setDontCommitErrl();
}
- else
- {
- // Make sure iv_stoppedRank still gets updated.
- std::vector<ExtensibleChip *> portList;
- o_rc = getMcbistMaintPort( iv_chip, portList );
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "getMcbistMaintPort(0x%08x) failed",
- iv_chip->getHuid() );
- break;
- }
-
- // In broadcast mode, the rank configuration for all ports will be
- // the same. In non-broadcast mode, there will only be one MCA in
- // the list. Therefore, we can simply use the first MCA in the list
- // for all configs.
- ExtensibleChip * stopChip = portList.front();
-
- // Get the address in which the command stopped.
- MemAddr addr;
- o_rc = getMemMaintAddr<T>( iv_chip, addr );
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "getMemMaintAddr<T>(0x%08x) failed",
- iv_chip->getHuid() );
- break;
- }
-
- iv_stoppedRank = __getStopRank<TYPE_MCA>( stopChip, addr );
- }
// Move onto the next step in the state machine.
o_rc = nextStep( io_sc );
@@ -188,7 +156,10 @@ uint32_t MemTdCtlr<T>::handleCmdComplete( STEP_CODE_DATA_STRUCT & io_sc )
// successfully with no errors because the error log will not be
// committed.
if ( !io_sc.service_data->queryDontCommitErrl() )
+ {
+ collectStateCaptureData( io_sc, TD_CTLR_DATA::END );
MemCaptureData::addEccData<T>( iv_chip, io_sc );
+ }
if ( SUCCESS != o_rc )
{
@@ -219,10 +190,6 @@ uint32_t MemTdCtlr<T>::handleCmdComplete( STEP_CODE_DATA_STRUCT & io_sc )
#endif
}
- else
- {
- collectStateCaptureData( io_sc, TD_CTLR_DATA::END );
- }
return o_rc;
@@ -249,9 +216,6 @@ template<TARGETING::TYPE T>
uint32_t __analyzeCmdComplete( ExtensibleChip * i_chip,
TdQueue & io_queue,
TdRankListEntry & o_stoppedRank,
- #ifndef __HOSTBOOT_RUNTIME
- bool & o_broadcastMode,
- #endif
const MemAddr & i_addr,
bool & o_errorsFound,
STEP_CODE_DATA_STRUCT & io_sc );
@@ -260,9 +224,6 @@ template<>
uint32_t __analyzeCmdComplete<TYPE_MCBIST>( ExtensibleChip * i_chip,
TdQueue & io_queue,
TdRankListEntry & o_stoppedRank,
- #ifndef __HOSTBOOT_RUNTIME
- bool & o_broadcastMode,
- #endif
const MemAddr & i_addr,
bool & o_errorsFound,
STEP_CODE_DATA_STRUCT & io_sc )
@@ -294,11 +255,6 @@ uint32_t __analyzeCmdComplete<TYPE_MCBIST>( ExtensibleChip * i_chip,
// Update iv_stoppedRank.
o_stoppedRank = __getStopRank<TYPE_MCA>( stopChip, i_addr );
- #ifndef __HOSTBOOT_RUNTIME
- // Update iv_broadcastMode.
- o_broadcastMode = ( 1 < portList.size() );
- #endif
-
// Check each MCA for ECC errors.
for ( auto & mcaChip : portList )
{
@@ -330,9 +286,6 @@ template<>
uint32_t __analyzeCmdComplete<TYPE_MBA>( ExtensibleChip * i_chip,
TdQueue & io_queue,
TdRankListEntry & o_stoppedRank,
- #ifndef __HOSTBOOT_RUNTIME
- bool & o_broadcastMode,
- #endif
const MemAddr & i_addr,
bool & o_errorsFound,
STEP_CODE_DATA_STRUCT & io_sc )
@@ -371,9 +324,6 @@ uint32_t MemTdCtlr<T>::analyzeCmdComplete( bool & o_errorsFound,
// Then, check for ECC errors, if they exist.
o_rc = __analyzeCmdComplete<T>( iv_chip, iv_queue, iv_stoppedRank,
- #ifndef __HOSTBOOT_RUNTIME
- iv_broadcastMode,
- #endif
addr, o_errorsFound, io_sc );
if ( SUCCESS != o_rc )
{
diff --git a/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr.H b/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr.H
index 8c0ea9df2..3f3f354fc 100644
--- a/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr.H
+++ b/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr.H
@@ -5,7 +5,7 @@
/* */
/* OpenPOWER HostBoot Project */
/* */
-/* Contributors Listed Below - COPYRIGHT 2016,2017 */
+/* Contributors Listed Below - COPYRIGHT 2016,2018 */
/* [+] International Business Machines Corp. */
/* */
/* */
@@ -57,25 +57,23 @@ class MemTdCtlr
* This contructor will only be called in the MCBIST or MBA data bundle,
* which already checks for a valid type.
*
+ * Need to initialize iv_stoppedRank to a valid entry in iv_rankList. Use
+ * the last entry in the list so that the 'next' rank is the first entry
+ * in the list.
+ *
* @param i_chip An MCBIST or MBA chip.
*/
explicit MemTdCtlr( ExtensibleChip * i_chip ) :
iv_chip( i_chip ), iv_rankList( i_chip ),
- iv_stoppedRank( i_chip, MemRank(0) )
+ iv_stoppedRank( iv_rankList.getList().back() )
{
PRDF_ASSERT( T == iv_chip->getType() );
- if ( TARGETING::TYPE_MCBIST == i_chip->getType() )
- {
- ExtensibleChip * mcaChip =
- PlatServices::getConnectedChild(i_chip, TARGETING::TYPE_MCA, 0);
- iv_stoppedRank = TdRankListEntry( mcaChip, MemRank(0) );
- }
}
/**
* @brief Determines and executes the next course of action after a
* maintenance command complete attention.
- * @note Initializes the TD controller, if needed (runtime only).
+ * @note Initializes the TD controller, if needed.
* @param io_sc The step code data struct.
* @return Non-SUCCESS if an internal function fails, SUCCESS otherwise.
*/
@@ -97,8 +95,8 @@ class MemTdCtlr
* requests will be ignored. Any chip marks placed during this time will be
* redetected when the runtime TD controller is initialized.
*
- * During IPL, this will simply add a new procedure to the queue, since we
- * know TD will already be in progress when this is called.
+ * During MemDiags, this will simply add a new procedure to the queue, since
+ * we know a TD procedure will already be in progress when this is called.
*
* @note Initializes the TD controller, if needed.
* @param io_sc The step code data struct.
@@ -118,14 +116,23 @@ class MemTdCtlr
/**
* @brief Handles reset-reload or FO scenario.
- * @note This function will check if PRD was unable to restart maintenance
- * command before R/R or FO. In that scenario, this function will
- * start maintenance command. As during R/R or F/O we do not have any
- * mechanism to restore the complete state of TD controller, we will
- * not start any interrupted or pending TD procedure. We will only
- * start BG scrub. If we found any chip marks during TD state
- * machine initialize we will start VCM procedure rather than
- * BG scrub.
+ *
+ * This does not call initialize() or start any maintenance commands.
+ * Instead, it checks the hardware's current state and ensures by the end of
+ * the function that either a command is currently running or there will be
+ * a command complete attention pending that PRD will handle separately.
+ *
+ * If there is already an active command complete attention. This function
+ * does nothing because PRD will handle the attention soon.
+ *
+ * If there is no active command complete attention and there is no command
+ * currently in progress, it will set the command complete attention and PRD
+ * will handle that attention soon.
+ *
+ * Otherwise, there is a command in progress. So, it will check for any
+ * unverified chip marks. If any exist, it will force the current command to
+ * stop, causing a command complete attention that PRD will handle soon.
+ *
* @return Non-SUCCESS if an internal function fails, SUCCESS otherwise.
*/
uint32_t handleRrFo();
@@ -135,6 +142,24 @@ class MemTdCtlr
private:
/**
+ * @brief Initializes the TD controller, if needed.
+ *
+ * This should be called at the beginning of every public function to ensure
+ * the TD controller is initialized.
+ *
+ * During MemDiags, this initializes iv_broadcastModeCapable.
+ *
+ * At runtime, this is used to query hardware for any unverified chip marks
+ * that may have occurred after starting background scrubbing, but before
+ * PRD is up and running. We may also have unverified chip marks if the HBRT
+ * service is stopped and restarted (PRD is reinitialize and all previous
+ * state machine data is lost).
+ *
+ * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise.
+ */
+ uint32_t initialize();
+
+ /**
* @brief This is called when there are no more TD procedures to execute.
*
* During Memory Diagnostics, this means the current pattern test command
@@ -198,8 +223,8 @@ class MemTdCtlr
/**
* @brief This is called when handling a command complete attention for a
- * non-TD command to initialize iv_stoppedRank and iv_broadcastMode
- * then check for any ECC errors.
+ * non-TD command to initialize iv_stoppedRank then check for any
+ * ECC errors.
* @param o_errorsFound True if errors where found and handled. False
* otherwise.
* @param io_sc The step code data struct.
@@ -237,22 +262,6 @@ class MemTdCtlr
*/
uint32_t unmaskEccAttns();
- /**
- * @brief Initializes the TD controller, if needed.
- *
- * This is only supported during runtime. This is mostly useful at runtime
- * to query hardware for any unverified chip marks. Those may occur after
- * starting background scrubbing, but before PRD is up and running. We may
- * also have unverified chip marks if the HBRT service is stopped and
- * restarted (PRD is reinitialize and all previous state machine data is
- * lost).
- *
- * @note Should be called at the beginning of every public function to
- * ensure the TD controller is initialized.
- * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise.
- */
- uint32_t initialize();
-
#endif
private: // instance variables
@@ -276,11 +285,11 @@ class MemTdCtlr
* diagnostics are complete. */
TdRankListEntry iv_stoppedRank;
- #ifdef __HOSTBOOT_RUNTIME
-
/** True if the TD controller has been initialized. False otherwise. */
bool iv_initialized = false;
+ #ifdef __HOSTBOOT_RUNTIME
+
/** True if background scrubbing should be resumed after pausing on error.
* False if a TD procedure had been executed and background scrubbing needs
* to be restarted with a new command. */
@@ -288,9 +297,8 @@ class MemTdCtlr
#else // IPL only
- /** Combined with iv_stoppedRank. Indicates if the non-TD command that
- * stopped was in broadcast mode or not. */
- bool iv_broadcastMode = false;
+ /** Indicates if broadcast mode is capable on iv_chip. */
+ bool iv_broadcastModeCapable = false;
#endif
diff --git a/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_ipl.C b/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_ipl.C
index e6d769b6c..7caba811b 100644
--- a/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_ipl.C
+++ b/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_ipl.C
@@ -5,7 +5,7 @@
/* */
/* OpenPOWER HostBoot Project */
/* */
-/* Contributors Listed Below - COPYRIGHT 2016,2017 */
+/* Contributors Listed Below - COPYRIGHT 2016,2018 */
/* [+] International Business Machines Corp. */
/* */
/* */
@@ -65,6 +65,32 @@ uint32_t MemTdCtlr<T>::handleTdEvent( STEP_CODE_DATA_STRUCT & io_sc,
//------------------------------------------------------------------------------
template <TARGETING::TYPE T>
+uint32_t MemTdCtlr<T>::initialize()
+{
+ #define PRDF_FUNC "[MemTdCtlr::initialize] "
+
+ uint32_t o_rc = SUCCESS;
+
+ do
+ {
+ if ( iv_initialized ) break; // nothing to do
+
+ // Check if broadcast mode is capable on this chip.
+ iv_broadcastModeCapable = isBroadcastModeCapable<T>( iv_chip );
+
+ // At this point, the TD controller is initialized.
+ iv_initialized = true;
+
+ } while (0);
+
+ return o_rc;
+
+ #undef PRDF_FUNC
+}
+
+//------------------------------------------------------------------------------
+
+template <TARGETING::TYPE T>
uint32_t MemTdCtlr<T>::defaultStep( STEP_CODE_DATA_STRUCT & io_sc )
{
#define PRDF_FUNC "[MemTdCtlr::defaultStep] "
@@ -72,7 +98,7 @@ uint32_t MemTdCtlr<T>::defaultStep( STEP_CODE_DATA_STRUCT & io_sc )
uint32_t o_rc = SUCCESS;
TdRankListEntry nextRank = iv_rankList.getNext( iv_stoppedRank,
- iv_broadcastMode );
+ iv_broadcastModeCapable );
do
{
diff --git a/src/usr/diag/prdf/plat/mem/prdfRestoreDramRepairs.C b/src/usr/diag/prdf/plat/mem/prdfRestoreDramRepairs.C
index 3c34320b3..1fc61fba6 100644
--- a/src/usr/diag/prdf/plat/mem/prdfRestoreDramRepairs.C
+++ b/src/usr/diag/prdf/plat/mem/prdfRestoreDramRepairs.C
@@ -54,6 +54,7 @@ namespace RDR // local utility functions to support PRDF::restoreDramRepairs()
{
// Creates and returns an error log.
+template<TARGETING::TYPE T>
errlHndl_t createErrl( uint32_t i_reasonCode, TargetHandle_t i_trgt,
uint32_t i_signature )
{
@@ -63,12 +64,18 @@ errlHndl_t createErrl( uint32_t i_reasonCode, TargetHandle_t i_trgt,
// Note that the error log tags are not needed because PRD uses its own
// signature parser.
- return new ERRORLOG::ErrlEntry(
+ errlHndl_t errl = new ERRORLOG::ErrlEntry(
ERRORLOG::ERRL_SEV_PREDICTIVE, // severity
PRDF_RESTORE_DRAM_REPAIR, // module ID
i_reasonCode, // reason code
userdata12, // user data 1 & 2
userdata34 ); // user data 3 & 4
+
+ // Add capture data. Need to do this now before the DIMM callouts are made
+ // because the VPD is cleared if a DIMM is added to the callout list.
+ MemCaptureData::addEccData<T>( i_trgt, errl );
+
+ return errl;
}
//------------------------------------------------------------------------------
@@ -80,9 +87,6 @@ void commitErrl( errlHndl_t i_errl, TargetHandle_t i_trgt )
{
if ( NULL != i_errl )
{
- // Add capture data
- MemCaptureData::addEccData<T>( i_trgt, i_errl );
-
// Add traces
i_errl->collectTrace( PRDF_COMP_NAME, 512 );
@@ -147,7 +151,7 @@ void commitSoftError( uint32_t i_reasonCode, TargetHandle_t i_trgt,
{
if ( i_analysisErrors )
{
- errlHndl_t errl = createErrl( i_reasonCode, i_trgt, i_signature );
+ errlHndl_t errl = createErrl<T>( i_reasonCode, i_trgt, i_signature );
errl->addProcedureCallout( HWAS::EPUB_PRC_LVL_SUPP,
HWAS::SRCI_PRIORITY_HIGH );
commitErrl<T>( errl, i_trgt );
@@ -225,8 +229,9 @@ bool processRepairedRanks<TYPE_MCA>( TargetHandle_t i_trgt,
if ( NULL == errl )
{
- errl = createErrl( PRDF_DETECTED_FAIL_HARDWARE, i_trgt,
- PRDFSIG_RdrRepairsUsed );
+ errl = createErrl<TYPE_MCA>( PRDF_DETECTED_FAIL_HARDWARE,
+ i_trgt,
+ PRDFSIG_RdrRepairsUsed );
}
std::vector<MemSymbol> symList;
@@ -344,8 +349,9 @@ bool processRepairedRanks<TYPE_MBA>( TargetHandle_t i_trgt,
if ( NULL == errl )
{
- errl = createErrl( PRDF_DETECTED_FAIL_HARDWARE, i_mba,
- PRDFSIG_RdrRepairsUsed );
+ errl = createErrl<TYPE_MBA>( PRDF_DETECTED_FAIL_HARDWARE,
+ i_mba,
+ PRDFSIG_RdrRepairsUsed );
}
std::vector<CenSymbol> list;
@@ -425,8 +431,8 @@ bool processBadDimms<TYPE_MCA>( TargetHandle_t i_trgt, uint8_t i_badDimmMask )
{
if ( NULL == errl )
{
- errl = createErrl( PRDF_DETECTED_FAIL_HARDWARE, i_trgt,
- PRDFSIG_RdrRepairUnavail );
+ errl = createErrl<TYPE_MCA>( PRDF_DETECTED_FAIL_HARDWARE,
+ i_trgt, PRDFSIG_RdrRepairUnavail );
}
__calloutDimm<TYPE_MCA, DIMMS_PER_RANK::MCA>( errl, i_trgt, dimm );
@@ -492,8 +498,8 @@ bool processBadDimms<TYPE_MBA>( TargetHandle_t i_trgt, uint8_t i_badDimmMask )
{
if ( NULL == errl )
{
- errl = createErrl( PRDF_DETECTED_FAIL_HARDWARE, i_mba,
- PRDFSIG_RdrRepairUnavail );
+ errl = createErrl<TYPE_MBA>( PRDF_DETECTED_FAIL_HARDWARE, i_mba,
+ PRDFSIG_RdrRepairUnavail );
}
o_calloutMade = true;
OpenPOWER on IntegriCloud