1 files changed, 755 insertions, 80 deletions
diff --git a/src/usr/diag/prdf/plat/mem/prdfP9Mca.C b/src/usr/diag/prdf/plat/mem/prdfP9Mca.C
index 5f7efa274..fac29fce3 100644
--- a/src/usr/diag/prdf/plat/mem/prdfP9Mca.C
+++ b/src/usr/diag/prdf/plat/mem/prdfP9Mca.C
@@ -27,7 +27,6 @@
 #include <iipServiceDataCollector.h>
 #include <prdfExtensibleChip.H>
 #include <prdfPluginMap.H>
-#include <isteps/nvdimm/nvdimm.H>
 
 // Platform includes
 #include <prdfMemDbUtils.H>
@@ -38,6 +37,10 @@
   #include <prdfMemTps.H>
 #endif
 
+#ifdef CONFIG_NVDIMM
+    #include <nvdimm.H>
+#endif
+
 using namespace TARGETING;
 
 namespace PRDF
@@ -296,18 +299,9 @@ PRDF_PLUGIN_DEFINE( nimbus_mca, MemPortFailure );
 //
 //##############################################################################
 
+#ifdef CONFIG_NVDIMM
 #ifdef __HOSTBOOT_RUNTIME
 
-enum nvdimmRegOffset
-{
-    NVDIMM_MGT_CMD1          = 0x041,
-    MODULE_HEALTH            = 0x0A0,
-    MODULE_HEALTH_STATUS0    = 0x0A1,
-    MODULE_HEALTH_STATUS1    = 0x0A2,
-    ERROR_THRESHOLD_STATUS   = 0x0A5,
-    WARNING_THRESHOLD_STATUS = 0x0A7,
-};
-
 /**
  * @brief  Gets a map list of which bits are set from a uint8_t bit list (7:0)
  * @param  i_data uint8_t bit list (7:0)
@@ -349,6 +343,7 @@ uint32_t __addBpmCallout( TargetHandle_t i_dimm,
             break;
         }
 
+        // addPartCallout will default to GARD_NULL, NO_DECONFIG
         mainErrl->addPartCallout( i_dimm, HWAS::BPM_PART_TYPE,
                                   i_priority );
 
@@ -362,10 +357,12 @@ uint32_t __addBpmCallout( TargetHandle_t i_dimm,
 /**
  * @brief  Adds a callout of the cable connecting an NVDIMM to its
  *         backup power module (BPM)
+ * @param  i_dimm     The target dimm.
  * @param  i_priority The callout priority.
  * @return FAIL if unable to get the global error log, else SUCCESS
  */
-uint32_t __addNvdimmCableCallout( HWAS::callOutPriority i_priority )
+uint32_t __addNvdimmCableCallout( TargetHandle_t i_dimm,
+                                  HWAS::callOutPriority i_priority )
 {
     #define PRDF_FUNC "[__addNvdimmCableCallout] "
 
@@ -382,7 +379,9 @@ uint32_t __addNvdimmCableCallout( HWAS::callOutPriority i_priority )
             break;
         }
 
-        mainErrl->addProcedureCallout( HWAS::EPUB_PRC_NVDIMM_ERR, i_priority );
+        // addPartCallout will default to GARD_NULL, NO_DECONFIG
+        mainErrl->addPartCallout( i_dimm, HWAS::BPM_CABLE_PART_TYPE,
+                                  i_priority );
 
     }while(0);
 
@@ -391,21 +390,45 @@ uint32_t __addNvdimmCableCallout( HWAS::callOutPriority i_priority )
     #undef PRDF_FUNC
 }
 
+/**
+ * @brief  If a previous error has been found, add a signature to the
+ *         multi-signature list, else set the primary signature.
+ * @param  io_sc      The step code data struct.
+ * @param  i_trgt     The target.
+ * @param  i_errFound Whether an error has already been found or not.
+ * @param  i_sig      The signature to be set.
+ */
+void __addSignature( STEP_CODE_DATA_STRUCT & io_sc, TargetHandle_t i_trgt,
+                     bool i_errFound, uint32_t i_sig )
+{
+    if ( i_errFound )
+    {
+        io_sc.service_data->AddSignatureList( i_trgt, i_sig );
+    }
+    else
+    {
+        io_sc.service_data->setSignature( getHuid(i_trgt), i_sig );
+    }
+}
 
 /**
  * @brief  Analyze NVDIMM Health Status0 Register for errors
- * @param  io_sc  The step code data struct.
- * @param  i_dimm The target dimm.
+ * @param  io_sc       The step code data struct.
+ * @param  i_dimm      The target dimm.
+ * @param  io_errFound Whether an error has already been found or not.
  * @return FAIL if unable to read register, else SUCCESS
  */
-uint32_t __analyzeHealthStatus0Reg( STEP_CODE_DATA_STRUCT & io_sc,
-                                    TargetHandle_t i_dimm )
+uint32_t __analyzeHealthStatus0Reg(STEP_CODE_DATA_STRUCT & io_sc,
+                                   TargetHandle_t i_dimm, bool & io_errFound)
 {
     #define PRDF_FUNC "[__analyzeHealthStatus0Reg] "
 
     uint32_t o_rc = SUCCESS;
     uint8_t data = 0;
 
+    // Get MCA, for signatures
+    TargetHandle_t mca = getConnectedParent( i_dimm, TYPE_MCA );
+
     do
     {
         // NVDIMM health status registers size = 1 byte
@@ -413,7 +436,7 @@ uint32_t __analyzeHealthStatus0Reg( STEP_CODE_DATA_STRUCT & io_sc,
 
         // Read the Health Status0 Register (0xA1) 7:0
         errlHndl_t errl = deviceRead( i_dimm, &data, NVDIMM_SIZE,
-            DEVICE_NVDIMM_ADDRESS(MODULE_HEALTH_STATUS0) );
+            DEVICE_NVDIMM_ADDRESS(NVDIMM::i2cReg::MODULE_HEALTH_STATUS0) );
         if ( errl )
         {
             PRDF_ERR( PRDF_FUNC "Failed to read Health Status0 Register. "
@@ -427,58 +450,66 @@ uint32_t __analyzeHealthStatus0Reg( STEP_CODE_DATA_STRUCT & io_sc,
         // BIT 0: Voltage Regulator Fail
         if ( bitList.count(0) )
         {
-            io_sc.service_data->AddSignatureList( i_dimm, PRDFSIG_VoltRegFail );
+            __addSignature( io_sc, mca, io_errFound, PRDFSIG_VoltRegFail );
             // Callout NVDIMM on 1st, no gard
-            io_sc.service_data->SetCallout( i_dimm, MRU_HIGH, NO_GARD );
+            io_sc.service_data->SetCallout( i_dimm, MRU_MED, NO_GARD );
+            io_errFound = true;
         }
         // BIT 1: VDD Lost
         if ( bitList.count(1) )
         {
-            io_sc.service_data->AddSignatureList( i_dimm, PRDFSIG_VddLost );
+            __addSignature( io_sc, mca, io_errFound, PRDFSIG_VddLost );
             // Callout NVDIMM on 1st, no gard
-            io_sc.service_data->SetCallout( i_dimm, MRU_HIGH, NO_GARD );
+            io_sc.service_data->SetCallout( i_dimm, MRU_MED, NO_GARD );
+            io_errFound = true;
         }
         // BIT 2: VPP Lost
         if ( bitList.count(2) )
         {
-            io_sc.service_data->AddSignatureList( i_dimm, PRDFSIG_VppLost );
+            __addSignature( io_sc, mca, io_errFound, PRDFSIG_VppLost );
             // Callout NVDIMM on 1st, no gard
-            io_sc.service_data->SetCallout( i_dimm, MRU_HIGH, NO_GARD );
+            io_sc.service_data->SetCallout( i_dimm, MRU_MED, NO_GARD );
+            io_errFound = true;
         }
         // BIT 3: VTT Lost
         if ( bitList.count(3) )
         {
-            io_sc.service_data->AddSignatureList( i_dimm, PRDFSIG_VttLost );
+            __addSignature( io_sc, mca, io_errFound, PRDFSIG_VttLost );
             // Callout NVDIMM on 1st, no gard
-            io_sc.service_data->SetCallout( i_dimm, MRU_HIGH, NO_GARD );
+            io_sc.service_data->SetCallout( i_dimm, MRU_MED, NO_GARD );
+            io_errFound = true;
         }
         // BIT 4: DRAM not Self Refresh
         if ( bitList.count(4) )
         {
-            io_sc.service_data->AddSignatureList( i_dimm, PRDFSIG_NotSelfRefr );
+            __addSignature( io_sc, mca, io_errFound, PRDFSIG_NotSelfRefr );
             // Callout NVDIMM on 1st, no gard
-            io_sc.service_data->SetCallout( i_dimm, MRU_HIGH, NO_GARD );
+            io_sc.service_data->SetCallout( i_dimm, MRU_MED, NO_GARD );
+            io_errFound = true;
         }
         // BIT 5: Controller HW Error
         if ( bitList.count(5) )
         {
-            io_sc.service_data->AddSignatureList( i_dimm, PRDFSIG_CtrlHwErr );
+            __addSignature( io_sc, mca, io_errFound, PRDFSIG_CtrlHwErr );
             // Callout NVDIMM on 1st, no gard
-            io_sc.service_data->SetCallout( i_dimm, MRU_HIGH, NO_GARD );
+            io_sc.service_data->SetCallout( i_dimm, MRU_MED, NO_GARD );
+            io_errFound = true;
         }
         // BIT 6: NVM Controller Error
         if ( bitList.count(6) )
         {
-            io_sc.service_data->AddSignatureList( i_dimm, PRDFSIG_NvmCtrlErr );
+            __addSignature( io_sc, mca, io_errFound, PRDFSIG_NvmCtrlErr );
             // Callout NVDIMM on 1st, no gard
-            io_sc.service_data->SetCallout( i_dimm, MRU_HIGH, NO_GARD );
+            io_sc.service_data->SetCallout( i_dimm, MRU_MED, NO_GARD );
+            io_errFound = true;
         }
         // BIT 7: NVM Lifetime Error
         if ( bitList.count(7) )
         {
-            io_sc.service_data->AddSignatureList( i_dimm, PRDFSIG_NvmLifeErr );
+            __addSignature( io_sc, mca, io_errFound, PRDFSIG_NvmLifeErr );
             // Callout NVDIMM on 1st, no gard
-            io_sc.service_data->SetCallout( i_dimm, MRU_HIGH, NO_GARD );
+            io_sc.service_data->SetCallout( i_dimm, MRU_MED, NO_GARD );
+            io_errFound = true;
         }
 
     }while(0);
@@ -491,18 +522,22 @@ uint32_t __analyzeHealthStatus0Reg( STEP_CODE_DATA_STRUCT & io_sc,
 
 /**
  * @brief  Analyze NVDIMM Health Status1 Register for errors
- * @param  io_sc  The step code data struct.
- * @param  i_dimm The target dimm.
+ * @param  io_sc       The step code data struct.
+ * @param  i_dimm      The target dimm.
+ * @param  io_errFound Whether an error has already been found or not.
  * @return FAIL if unable to read register, else SUCCESS
  */
 uint32_t __analyzeHealthStatus1Reg( STEP_CODE_DATA_STRUCT & io_sc,
-                                    TargetHandle_t i_dimm )
+                                    TargetHandle_t i_dimm, bool & io_errFound )
 {
     #define PRDF_FUNC "[__analyzeHealthStatus1Reg] "
 
     uint32_t o_rc = SUCCESS;
     uint8_t data = 0;
 
+    // Get MCA, for signatures
+    TargetHandle_t mca = getConnectedParent( i_dimm, TYPE_MCA );
+
     do
     {
         // NVDIMM health status registers size = 1 byte
@@ -510,7 +545,7 @@ uint32_t __analyzeHealthStatus1Reg( STEP_CODE_DATA_STRUCT & io_sc,
 
         // Read the Health Status1 Register (0xA2) 7:0
         errlHndl_t errl = deviceRead( i_dimm, &data, NVDIMM_SIZE,
-            DEVICE_NVDIMM_ADDRESS(MODULE_HEALTH_STATUS1) );
+            DEVICE_NVDIMM_ADDRESS(NVDIMM::i2cReg::MODULE_HEALTH_STATUS1) );
         if ( errl )
         {
             PRDF_ERR( PRDF_FUNC "Failed to read Health Status1 Register. "
@@ -524,83 +559,90 @@ uint32_t __analyzeHealthStatus1Reg( STEP_CODE_DATA_STRUCT & io_sc,
         // BIT 0: Insufficient Energy
         if ( bitList.count(0) )
         {
-            io_sc.service_data->AddSignatureList(i_dimm, PRDFSIG_InsuffEnergy);
+            __addSignature( io_sc, mca, io_errFound, PRDFSIG_InsuffEnergy );
 
             // Callout BPM (backup power module) high, cable high
             o_rc = __addBpmCallout( i_dimm, HWAS::SRCI_PRIORITY_HIGH );
             if ( SUCCESS != o_rc ) break;
-            o_rc = __addNvdimmCableCallout( HWAS::SRCI_PRIORITY_HIGH );
+            o_rc = __addNvdimmCableCallout( i_dimm, HWAS::SRCI_PRIORITY_HIGH );
             if ( SUCCESS != o_rc ) break;
 
             // Callout NVDIMM low, no gard
             io_sc.service_data->SetCallout( i_dimm, MRU_LOW, NO_GARD );
+            io_errFound = true;
         }
         // BIT 1: Invalid Firmware
         if ( bitList.count(1) )
         {
-            io_sc.service_data->AddSignatureList( i_dimm, PRDFSIG_InvFwErr );
+            __addSignature( io_sc, mca, io_errFound, PRDFSIG_InvFwErr );
             // Callout NVDIMM on 1st, no gard
-            io_sc.service_data->SetCallout( i_dimm, MRU_HIGH, NO_GARD );
+            io_sc.service_data->SetCallout( i_dimm, MRU_MED, NO_GARD );
+            io_errFound = true;
         }
         // BIT 2: Configuration Data Error
         if ( bitList.count(2) )
         {
-            io_sc.service_data->AddSignatureList( i_dimm, PRDFSIG_CnfgDataErr );
+            __addSignature( io_sc, mca, io_errFound, PRDFSIG_CnfgDataErr );
             // Callout NVDIMM on 1st, no gard
-            io_sc.service_data->SetCallout( i_dimm, MRU_HIGH, NO_GARD );
+            io_sc.service_data->SetCallout( i_dimm, MRU_MED, NO_GARD );
+            io_errFound = true;
         }
         // BIT 3: No Energy Source
         if ( bitList.count(3) )
         {
-            io_sc.service_data->AddSignatureList(i_dimm, PRDFSIG_NoEsPres);
+            __addSignature( io_sc, mca, io_errFound, PRDFSIG_NoEsPres );
 
             // Callout BPM (backup power module) high, cable high
             o_rc = __addBpmCallout( i_dimm, HWAS::SRCI_PRIORITY_HIGH );
             if ( SUCCESS != o_rc ) break;
-            o_rc = __addNvdimmCableCallout( HWAS::SRCI_PRIORITY_HIGH );
+            o_rc = __addNvdimmCableCallout( i_dimm, HWAS::SRCI_PRIORITY_HIGH );
             if ( SUCCESS != o_rc ) break;
 
             // Callout NVDIMM low, no gard
             io_sc.service_data->SetCallout( i_dimm, MRU_LOW, NO_GARD );
+            io_errFound = true;
         }
         // BIT 4: Energy Policy Not Set
         if ( bitList.count(4) )
         {
-            io_sc.service_data->AddSignatureList( i_dimm, PRDFSIG_EsPolNotSet );
+            __addSignature( io_sc, mca, io_errFound, PRDFSIG_EsPolNotSet );
 
             // Callout FW (Level2 Support) High
             io_sc.service_data->SetCallout( LEVEL2_SUPPORT, MRU_HIGH, NO_GARD );
 
             // Callout NVDIMM low on 1st, no gard
             io_sc.service_data->SetCallout( i_dimm, MRU_LOW, NO_GARD );
+            io_errFound = true;
         }
         // BIT 5: Energy Source HW Error
         if ( bitList.count(5) )
         {
-            io_sc.service_data->AddSignatureList ( i_dimm, PRDFSIG_EsHwFail );
+            __addSignature( io_sc, mca, io_errFound, PRDFSIG_EsHwFail );
 
             // Callout BPM (backup power module) high, cable high
             o_rc = __addBpmCallout( i_dimm, HWAS::SRCI_PRIORITY_HIGH );
             if ( SUCCESS != o_rc ) break;
-            o_rc = __addNvdimmCableCallout( HWAS::SRCI_PRIORITY_HIGH );
+            o_rc = __addNvdimmCableCallout( i_dimm, HWAS::SRCI_PRIORITY_HIGH );
             if ( SUCCESS != o_rc ) break;
 
             // Callout NVDIMM low, no gard
             io_sc.service_data->SetCallout( i_dimm, MRU_LOW, NO_GARD );
+            io_errFound = true;
         }
         // BIT 6: Energy Source Health Assessment Error
         if ( bitList.count(6) )
         {
-            io_sc.service_data->AddSignatureList(i_dimm, PRDFSIG_EsHlthAssess);
+            __addSignature( io_sc, mca, io_errFound, PRDFSIG_EsHlthAssess);
 
             // Callout BPM (backup power module) high, cable high
             o_rc = __addBpmCallout( i_dimm, HWAS::SRCI_PRIORITY_HIGH );
             if ( SUCCESS != o_rc ) break;
-            o_rc = __addNvdimmCableCallout( HWAS::SRCI_PRIORITY_HIGH );
+            o_rc = __addNvdimmCableCallout( i_dimm, HWAS::SRCI_PRIORITY_HIGH );
             if ( SUCCESS != o_rc ) break;
 
             // Callout NVDIMM low, no gard
             io_sc.service_data->SetCallout( i_dimm, MRU_LOW, NO_GARD );
+            io_errFound = true;
         }
         // BIT 7: Reserved
 
@@ -613,18 +655,105 @@ uint32_t __analyzeHealthStatus1Reg( STEP_CODE_DATA_STRUCT & io_sc,
 }
 
 /**
+ * @brief  Reads and merges the data from two ES_TEMP registers to get the
+ *         correct temperature format.
+ * @param  i_dimm       The target nvdimm.
+ * @param  i_tempMsbReg The address of the register that contains the most
+ *                      significant byte of the temperature data.
+ * @param  i_tempLsbReg The address of the register that contains the least
+ *                      significant byte of the temperature data.
+ * @param  o_tempData   The 16 bit temperature data.
+ * @return FAIL if unable to read register, else SUCCESS
+ */
+uint32_t __readTemp( TargetHandle_t i_dimm, uint16_t i_tempMsbReg,
+                     uint16_t i_tempLsbReg, uint16_t & o_tempData )
+{
+    #define PRDF_FUNC "[__readTemp] "
+
+    /*
+     * -NOTE: Example showing how to read the temperature format:
+     * ES_TEMP1  = 0x03 (MSB: bits 15-8)
+     * ES_TEMP0  = 0x48 (LSB: bits 7-0)
+     *
+     * 0x0348 = 0000 0011 0100 1000 = 52.5 C
+     *
+     * -NOTE: bit definition:
+     * [15:13]Reserved
+     * [12]Sign 0 = positive, 1 = negative; 0°C should be expressed as positive
+     * [11]  128°C
+     * [10]   64°C
+     * [9]    32°C
+     * [8]    16°C
+     * [7]     8°C
+     * [6]     4°C
+     * [5]     2°C
+     * [4]     1°C
+     * [3]   0.5°C
+     * [2]  0.25°C
+     * [1] 0.125°C Optional for temp fields; not used for temp th fields
+     * [0]0.0625°C Optional for temp fields; not used for temp th fields
+     */
+    uint32_t o_rc = SUCCESS;
+
+    do
+    {
+        // NVDIMM health status registers size = 1 byte
+        size_t NVDIMM_SIZE = 1;
+        uint8_t msbData = 0;
+        uint8_t lsbData = 0;
+
+        // Read the two inputted temperature registers.
+        errlHndl_t errl = deviceRead( i_dimm, &msbData, NVDIMM_SIZE,
+                                      DEVICE_NVDIMM_ADDRESS(i_tempMsbReg) );
+        if ( errl )
+        {
+            PRDF_ERR( PRDF_FUNC "Failed to read ES Temperature MSB Register. "
+                      "HUID: 0x%08x", getHuid(i_dimm) );
+            PRDF_COMMIT_ERRL( errl, ERRL_ACTION_REPORT );
+            o_rc = FAIL;
+            break;
+        }
+
+        errl = deviceRead( i_dimm, &lsbData, NVDIMM_SIZE,
+                           DEVICE_NVDIMM_ADDRESS(i_tempLsbReg) );
+        if ( errl )
+        {
+            PRDF_ERR( PRDF_FUNC "Failed to read ES Temperature LSB Register. "
+                      "HUID: 0x%08x", getHuid(i_dimm) );
+            PRDF_COMMIT_ERRL( errl, ERRL_ACTION_REPORT );
+            o_rc = FAIL;
+            break;
+        }
+
+        o_tempData = ((uint16_t)msbData << 8) | lsbData;
+
+    }while(0);
+
+    return o_rc;
+
+    #undef PRDF_FUNC
+}
+
+/**
  * @brief  Analyze NVDIMM Error Threshold Status Register for errors
- * @param  io_sc  The step code data struct.
- * @param  i_dimm The target dimm.
+ * @param  io_sc       The step code data struct.
+ * @param  i_dimm      The target dimm.
+ * @param  io_errFound Whether an error has already been found or not.
+ * @param  o_esTempErr A flag for whether we hit an ES TEMP error or not.
  * @return FAIL if unable to read register, else SUCCESS
  */
 uint32_t __analyzeErrorThrStatusReg( STEP_CODE_DATA_STRUCT & io_sc,
-                                     TargetHandle_t i_dimm )
+                                     TargetHandle_t i_dimm, bool & io_errFound,
+                                     bool & o_esTempErr )
 {
     #define PRDF_FUNC "[__analyzeErrorThrStatusReg] "
 
     uint32_t o_rc = SUCCESS;
     uint8_t data = 0;
+    o_esTempErr = false;
+
+    // Get MCA, for signatures
+    TargetHandle_t mca = getConnectedParent( i_dimm, TYPE_MCA );
 
     do
     {
@@ -633,7 +762,7 @@ uint32_t __analyzeErrorThrStatusReg( STEP_CODE_DATA_STRUCT & io_sc,
 
         // Read the Error Threshold Status Register (0xA5) 7:0
         errlHndl_t errl = deviceRead( i_dimm, &data, NVDIMM_SIZE,
-            DEVICE_NVDIMM_ADDRESS(ERROR_THRESHOLD_STATUS) );
+            DEVICE_NVDIMM_ADDRESS(NVDIMM::i2cReg::ERROR_THRESHOLD_STATUS) );
         if ( errl )
         {
             PRDF_ERR( PRDF_FUNC "Failed to read Error Threshold Status Reg. "
@@ -648,7 +777,7 @@ uint32_t __analyzeErrorThrStatusReg( STEP_CODE_DATA_STRUCT & io_sc,
         // BIT 1: ES Lifetime Error
         if ( bitList.count(1) )
         {
-            io_sc.service_data->AddSignatureList ( i_dimm, PRDFSIG_EsLifeErr );
+            __addSignature( io_sc, mca, io_errFound, PRDFSIG_EsLifeErr );
 
             // Callout BPM (backup power module) high
             o_rc = __addBpmCallout( i_dimm, HWAS::SRCI_PRIORITY_HIGH );
@@ -656,11 +785,60 @@ uint32_t __analyzeErrorThrStatusReg( STEP_CODE_DATA_STRUCT & io_sc,
 
             // Callout NVDIMM low, no gard
             io_sc.service_data->SetCallout( i_dimm, MRU_LOW, NO_GARD );
+            io_errFound = true;
         }
         // BIT 2: ES Temperature Error
         if ( bitList.count(2) )
         {
-            io_sc.service_data->AddSignatureList( i_dimm, PRDFSIG_EsTmpErr );
+            // Sleep two seconds to avoid exiting PRD analysis faster than the
+            // ES_TEMP sample rate.
+            PlatServices::milliSleep( 2, 0 );
+
+            // Read the ES_TEMP and ES_TEMP_ERROR_HIGH_THRESHOLD values
+            uint16_t msbEsTempReg = NVDIMM::i2cReg::ES_TEMP1;
+            uint16_t lsbEsTempReg = NVDIMM::i2cReg::ES_TEMP0;
+            uint16_t esTemp = 0;
+            o_rc = __readTemp( i_dimm, msbEsTempReg, lsbEsTempReg, esTemp );
+            if ( SUCCESS != o_rc ) break;
+
+            uint16_t msbThReg = NVDIMM::i2cReg::ES_TEMP_ERROR_HIGH_THRESHOLD1;
+            uint16_t lsbThReg = NVDIMM::i2cReg::ES_TEMP_ERROR_HIGH_THRESHOLD0;
+            uint16_t esTempHighTh = 0;
+            o_rc = __readTemp( i_dimm, msbThReg, lsbThReg, esTempHighTh );
+            if ( SUCCESS != o_rc ) break;
+
+            msbThReg = NVDIMM::i2cReg::ES_TEMP_ERROR_LOW_THRESHOLD1;
+            lsbThReg = NVDIMM::i2cReg::ES_TEMP_ERROR_LOW_THRESHOLD0;
+            uint16_t esTempLowTh = 0;
+            o_rc = __readTemp( i_dimm, msbThReg, lsbThReg, esTempLowTh );
+            if ( SUCCESS != o_rc ) break;
+
+            // Check to see if the ES_TEMP is negative (bit 12)
+            bool esTempNeg = false;
+            if ( esTemp & 0x1000 ) esTempNeg = true;
+
+            // If ES_TEMP is equal or above ES_TEMP_ERROR_HIGH_THRESHOLD
+            // Just in case ES_TEMP has moved before we read it out, we'll add
+            // a 2°C margin when comparing to the threshold.
+            if ( (esTemp >= (esTempHighTh - 0x0020)) && !esTempNeg )
+            {
+                __addSignature( io_sc, mca, io_errFound,
+                                PRDFSIG_EsTmpErrHigh );
+            }
+            // Else check if the error hit the low threshold, again with the
+            // same 2°C margin.
+            else if ( (esTemp <= (esTempLowTh + 0x0020)) || esTempNeg )
+            {
+                __addSignature( io_sc, mca, io_errFound,
+                                PRDFSIG_EsTmpErrLow );
+            }
+            // Else the temperature must have gone back to a normal value, so
+            // we will label this as a false alarm case.
+            else
+            {
+                __addSignature( io_sc, mca, io_errFound,
+                                PRDFSIG_EsTmpErrFa );
+            }
 
             // Callout BPM (backup power module) high
             o_rc = __addBpmCallout( i_dimm, HWAS::SRCI_PRIORITY_HIGH );
@@ -668,6 +846,9 @@ uint32_t __analyzeErrorThrStatusReg( STEP_CODE_DATA_STRUCT & io_sc,
 
             // Callout NVDIMM low, no gard
             io_sc.service_data->SetCallout( i_dimm, MRU_LOW, NO_GARD );
+
+            o_esTempErr = true;
+            io_errFound = true;
         }
         // BIT 3:7: Reserved
 
@@ -680,6 +861,419 @@ uint32_t __analyzeErrorThrStatusReg( STEP_CODE_DATA_STRUCT & io_sc,
 }
 
 /**
+ * @brief  Adjusts the warning threshold so that future warnings are allowed
+ *         to report.
+ * @param  io_sc       The step code data struct.
+ * @param  i_dimm      The target nvdimm.
+ * @param  i_warnThReg The address of the relevant warning threshold register.
+ * @param  i_errThReg  The address of the relevant error threshold register.
+ * @param  o_firstWarn Flag if this is the first warning of this type.
+ * @param  o_statusErr Flag to tell if we found an error from checking the
+ *                     notification status register.
+ * @return FAIL if unable to read register, else SUCCESS
+ */
+uint32_t __adjustThreshold( STEP_CODE_DATA_STRUCT & io_sc,
+                            TargetHandle_t i_dimm, uint16_t i_warnThReg,
+                            uint16_t i_errThReg, bool & o_firstWarn,
+                            bool & o_statusErr )
+{
+    #define PRDF_FUNC "[__adjustThreshold] "
+
+    uint32_t o_rc = SUCCESS;
+    uint16_t notifCmdReg    = NVDIMM::i2cReg::SET_EVENT_NOTIFICATION_CMD;
+    uint16_t notifStatusReg = NVDIMM::i2cReg::SET_EVENT_NOTIFICATION_STATUS;
+    o_firstWarn = false;
+    o_statusErr = false;
+
+    do
+    {
+        // NVDIMM health status registers size = 1 byte
+        size_t NVDIMM_SIZE = 1;
+
+        // Read the corresponding warning threshold
+        uint8_t warnTh = 0;
+        errlHndl_t errl = deviceRead( i_dimm, &warnTh, NVDIMM_SIZE,
+                                      DEVICE_NVDIMM_ADDRESS(i_warnThReg) );
+        if ( errl )
+        {
+            PRDF_ERR( PRDF_FUNC "Failed to read Warning Threshold Reg. HUID: "
+                      "0x%08x", getHuid(i_dimm) );
+            PRDF_COMMIT_ERRL( errl, ERRL_ACTION_REPORT );
+            o_rc = FAIL;
+            break;
+        }
+
+        // Read the corresponding error threshold
+        uint8_t errTh = 0;
+        errl = deviceRead( i_dimm, &errTh, NVDIMM_SIZE,
+                           DEVICE_NVDIMM_ADDRESS(i_errThReg) );
+        if ( errl )
+        {
+            PRDF_ERR( PRDF_FUNC "Failed to read Error Threshold Reg. HUID: "
+                      "0x%08x", getHuid(i_dimm) );
+            PRDF_COMMIT_ERRL( errl, ERRL_ACTION_REPORT );
+            o_rc = FAIL;
+            break;
+        }
+
+        // If the warning threshold is not set to the error threshold+1,
+        // move the threshold.
+        if ( warnTh != (errTh+1) )
+        {
+            o_firstWarn = true;
+
+            // SET_EVENT_NOTIFICATION_CMD is a write only register that is
+            // used to change the SET_EVENT_NOTIFICATION_STATUS register.
+            // The only bits within it that are used are bits 0 and 1, as such
+            // we can safely set the rest to 0. It is defined as:
+            // [0]:   Persistency Notification
+            // [1]:   Warning Threshold Notification
+            // [2]:   Obsolete
+            // [3]:   Firmware Activation Notification (Not Used)
+            // [4:7]: Reserved
+
+            // Clear SET_EVENT_NOTIFICATION_CMD bit 1 and keep bit 0 set
+            uint8_t notifCmd = 0x01;
+            errl = deviceWrite( i_dimm, &notifCmd, NVDIMM_SIZE,
+                                DEVICE_NVDIMM_ADDRESS(notifCmdReg) );
+            if ( errl )
+            {
+                PRDF_ERR( PRDF_FUNC "Failed to clear Set Event Notification "
+                          "Cmd Reg. HUID: 0x%08x", getHuid(i_dimm) );
+                PRDF_COMMIT_ERRL( errl, ERRL_ACTION_REPORT );
+                o_rc = FAIL;
+                break;
+            }
+
+            // Check SET_EVENT_NOTIFICATION_STATUS to ensure everything is set
+            // as we expect and we don't see any errors.
+            uint8_t notifStat = 0;
+            errl = deviceRead( i_dimm, &notifStat, NVDIMM_SIZE,
+                               DEVICE_NVDIMM_ADDRESS(notifStatusReg) );
+            if ( errl )
+            {
+                PRDF_ERR( PRDF_FUNC "Failed to read Set Event Notification "
+                          "Status Reg. HUID: 0x%08x", getHuid(i_dimm) );
+                PRDF_COMMIT_ERRL( errl, ERRL_ACTION_REPORT );
+                o_rc = FAIL;
+                break;
+            }
+            std::map<uint8_t,bool> bitList = __nvdimmGetActiveBits( notifStat );
+
+            // if Bit [1]: SET_EVENT_NOTIFICATION_ERROR = 1
+            // or Bit [2]: PERSISTENCY_ENABLED = 0
+            // or Bit [3]: WARNING_THRESHOLD_ENABLED = 1
+            if ( bitList.count(1)  || !bitList.count(2) || bitList.count(3) )
+            {
+                o_statusErr = true;
+
+                // Make the log predictive and mask the fir
+                io_sc.service_data->SetThresholdMaskId(0);
+
+                // Callout the NVDIMM, no gard
+                io_sc.service_data->SetCallout( i_dimm, MRU_MED, NO_GARD );
+
+                // Send message to PHYP that save/restore may work
+                o_rc = PlatServices::nvdimmNotifyProtChange( i_dimm,
+                    NVDIMM::NVDIMM_RISKY_HW_ERROR );
+                if ( SUCCESS != o_rc ) break;
+
+                break;
+            }
+
+
+            // Set the warning threshold to error threshold + 1
+            warnTh = errTh+1;
+            errl = deviceWrite( i_dimm, &warnTh, NVDIMM_SIZE,
+                                DEVICE_NVDIMM_ADDRESS(i_warnThReg) );
+            if ( errl )
+            {
+                PRDF_ERR( PRDF_FUNC "Failed to write Warning Threshold Reg. "
+                          "HUID: 0x%08x", getHuid(i_dimm) );
+                PRDF_COMMIT_ERRL( errl, ERRL_ACTION_REPORT );
+                o_rc = FAIL;
+                break;
+            }
+
+            // Set SET_EVENT_NOTIFICATION_CMD bit 1 and keep bit 0 set
+            notifCmd = 0x03;
+            errl = deviceWrite( i_dimm, &notifCmd, NVDIMM_SIZE,
+                                DEVICE_NVDIMM_ADDRESS(notifCmdReg) );
+            if ( errl )
+            {
+                PRDF_ERR( PRDF_FUNC "Failed to write Set Event Notification "
+                          "Cmd Reg. HUID: 0x%08x", getHuid(i_dimm) );
+                PRDF_COMMIT_ERRL( errl, ERRL_ACTION_REPORT );
+                o_rc = FAIL;
+                break;
+            }
+
+            // Recheck SET_EVENT_NOTIFICATION_STATUS to ensure everything is set
+            // as we expect and we don't see any errors.
+            errl = deviceRead( i_dimm, &notifStat, NVDIMM_SIZE,
+                               DEVICE_NVDIMM_ADDRESS(notifStatusReg) );
+            if ( errl )
+            {
+                PRDF_ERR( PRDF_FUNC "Failed to read Set Event Notification "
+                          "Status Reg. HUID: 0x%08x", getHuid(i_dimm) );
+                PRDF_COMMIT_ERRL( errl, ERRL_ACTION_REPORT );
+                o_rc = FAIL;
+                break;
+            }
+            bitList = __nvdimmGetActiveBits( notifStat );
+
+            // if Bit [1]: SET_EVENT_NOTIFICATION_ERROR = 1
+            // or Bit [2]: PERSISTENCY_ENABLED = 0
+            // or Bit [3]: WARNING_THRESHOLD_ENABLED = 0
+            if ( bitList.count(1)  || !bitList.count(2) || !bitList.count(3) )
+            {
+                o_statusErr = true;
+
+                // Make the log predictive and mask the fir
+                io_sc.service_data->SetThresholdMaskId(0);
+
+                // Callout the NVDIMM, no gard
+                io_sc.service_data->SetCallout( i_dimm, MRU_MED, NO_GARD );
+
+                // Send message to PHYP that save/restore may work
+                o_rc = PlatServices::nvdimmNotifyProtChange( i_dimm,
+                    NVDIMM::NVDIMM_RISKY_HW_ERROR );
+                if ( SUCCESS != o_rc ) break;
+
+                break;
+            }
+        }
+        // Note: moving the threshold should clear the warning from
+        // WARNING_THRESHOLD_STATUS, which allows future warnings to report.
+
+    }while(0);
+
+    return o_rc;
+
+    #undef PRDF_FUNC
+}
+
+/**
+ * @brief  Analyze NVDIMM Warning Threshold Status Register for errors
+ * @param  io_sc       The step code data struct.
+ * @param  i_dimm      The target dimm.
+ * @param  io_errFound Whether an error has already been found or not.
+ * @return FAIL if unable to read register, else SUCCESS
+ */
+uint32_t __analyzeWarningThrStatusReg(STEP_CODE_DATA_STRUCT & io_sc,
+                                      TargetHandle_t i_dimm, bool & io_errFound)
+{
+    #define PRDF_FUNC "[__analyzeWarningThrStatusReg] "
+
+    uint32_t o_rc = SUCCESS;
+    uint8_t data = 0;
+
+    // Get MCA, for signatures
+    TargetHandle_t mca = getConnectedParent( i_dimm, TYPE_MCA );
+
+    do
+    {
+        // NVDIMM health status registers size = 1 byte
+        size_t NVDIMM_SIZE = 1;
+
+        // Read the Warning Threshold Status Register (0xA7) 7:0
+        errlHndl_t errl = deviceRead( i_dimm, &data, NVDIMM_SIZE,
+            DEVICE_NVDIMM_ADDRESS(NVDIMM::i2cReg::WARNING_THRESHOLD_STATUS) );
+        if ( errl )
+        {
+            PRDF_ERR( PRDF_FUNC "Failed to read Warning Threshold Status Reg. "
+                      "HUID: 0x%08x", getHuid(i_dimm) );
+            PRDF_COMMIT_ERRL( errl, ERRL_ACTION_REPORT );
+            o_rc = FAIL;
+            break;
+        }
+        std::map<uint8_t,bool> bitList = __nvdimmGetActiveBits( data );
+
+        // Analyze Bit 2 First
+        // BIT 2: ES_TEMP_WARNING
+        if ( bitList.count(2) )
+        {
+            // Sleep two seconds to avoid exiting PRD analysis faster than the
+            // ES_TEMP sample rate.
+            PlatServices::milliSleep( 2, 0 );
+
+            // Read the ES_TEMP and ES_TEMP_WARNING_HIGH_THRESHOLD values
+            uint16_t msbEsTempReg = NVDIMM::i2cReg::ES_TEMP1;
+            uint16_t lsbEsTempReg = NVDIMM::i2cReg::ES_TEMP0;
+            uint16_t esTemp = 0;
+            o_rc = __readTemp( i_dimm, msbEsTempReg, lsbEsTempReg, esTemp );
+            if ( SUCCESS != o_rc ) break;
+
+            uint16_t msbThReg = NVDIMM::i2cReg::ES_TEMP_WARNING_HIGH_THRESHOLD1;
+            uint16_t lsbThReg = NVDIMM::i2cReg::ES_TEMP_WARNING_HIGH_THRESHOLD0;
+            uint16_t esTempHighTh = 0;
+            o_rc = __readTemp( i_dimm, msbThReg, lsbThReg, esTempHighTh );
+            if ( SUCCESS != o_rc ) break;
+
+            msbThReg = NVDIMM::i2cReg::ES_TEMP_WARNING_LOW_THRESHOLD1;
+            lsbThReg = NVDIMM::i2cReg::ES_TEMP_WARNING_LOW_THRESHOLD0;
+            uint16_t esTempLowTh = 0;
+            o_rc = __readTemp( i_dimm, msbThReg, lsbThReg, esTempLowTh );
+            if ( SUCCESS != o_rc ) break;
+
+            // Check to see if the ES_TEMP is negative (bit 12)
+            bool esTempNeg = false;
+            if ( esTemp & 0x1000 ) esTempNeg = true;
+
+            // If ES_TEMP is equal or above ES_TEMP_WARNING_HIGH_THRESHOLD
+            // Just in case ES_TEMP has moved before we read it out, we'll add
+            // a 2°C margin when comparing to the threshold.
+            if ( (esTemp >= (esTempHighTh - 0x0020)) && !esTempNeg )
+            {
+                __addSignature( io_sc, mca, io_errFound,
+                                PRDFSIG_EsTmpWarnHigh );
+            }
+            // Else check if the warning hit the low threshold, again with the
+            // same 2°C margin.
+            else if ( (esTemp <= (esTempLowTh + 0x0020)) || esTempNeg )
+            {
+                __addSignature( io_sc, mca, io_errFound,
+                                PRDFSIG_EsTmpWarnLow );
+            }
+            // Else the temperature must have gone back to a normal value, so
+            // we will label this as a false alarm case.
+            else
+            {
+                __addSignature( io_sc, mca, io_errFound,
+                                PRDFSIG_EsTmpWarnFa );
+            }
+
+            // Callout BPM (backup power module) high
+            o_rc = __addBpmCallout( i_dimm, HWAS::SRCI_PRIORITY_HIGH );
+            if ( SUCCESS != o_rc ) break;
+
+            // Callout NVDIMM low, no gard
+            io_sc.service_data->SetCallout( i_dimm, MRU_LOW, NO_GARD );
+
+            // Because of the possibility of intermittent ES temperature
+            // false alarm readings, we will keep the log hidden. If there is
+            // an actual ES temperature problem, we assume we will continue
+            // to be called to handle the temperature warning and hit threshold.
+
+            // Only send the save/restore message to PHYP if we hit threshold.
+            if ( io_sc.service_data->IsAtThreshold() )
+            {
+                // Send message to PHYP that save/restore may work
+                o_rc = PlatServices::nvdimmNotifyProtChange( i_dimm,
+                        NVDIMM::NVDIMM_RISKY_HW_ERROR );
+                if ( SUCCESS != o_rc ) break;
+            }
+
+            io_errFound = true;
+        }
+        // BIT 0: NVM_LIFETIME_WARNING
+        if ( bitList.count(0) )
+        {
+            // Adjust warning threshold.
+            uint16_t warnThReg = NVDIMM::i2cReg::NVM_LIFETIME_WARNING_THRESHOLD;
+            uint16_t errThReg  = NVDIMM::i2cReg::NVM_LIFETIME_ERROR_THRESHOLD;
+            bool firstWarn = false;
+            bool statusErr = false;
+            o_rc = __adjustThreshold( io_sc, i_dimm, warnThReg, errThReg,
+                                      firstWarn, statusErr );
+            if ( SUCCESS != o_rc ) break;
+
+            // Make the log predictive, but do not mask the FIR
+            io_sc.service_data->setServiceCall();
+
+            // If we got a set event notification status error, add the
+            // signature for that before adding the signature for the warning.
+            // Also do not take our normal callout action since we already will
+            // have called out the NVDIMM because of the status error.
+            if ( statusErr )
+            {
+                __addSignature( io_sc, mca, io_errFound, PRDFSIG_NotifStatErr );
+
+                // Need to set io_errFound here so the warning signature is
+                // added to the multi-signature list instead of as the primary
+                // signature.
+                io_errFound = true;
+            }
+            else
+            {
+                // Callout NVDIMM on 1st, no gard
+                io_sc.service_data->SetCallout( i_dimm, MRU_MED, NO_GARD );
+            }
+
+            // Update signature depending on whether this is the first or second
+            // warning of this type.
+            if ( firstWarn )
+            {
+                __addSignature( io_sc, mca, io_errFound, PRDFSIG_NvmLifeWarn1 );
+            }
+            else
+            {
+                __addSignature( io_sc, mca, io_errFound, PRDFSIG_NvmLifeWarn2 );
+            }
+
+
+            io_errFound = true;
+        }
+        // BIT 1: ES_LIFETIME_WARNING
+        if ( bitList.count(1) )
+        {
+            // Adjust warning threshold.
+            uint16_t warnThReg = NVDIMM::i2cReg::ES_LIFETIME_WARNING_THRESHOLD;
+            uint16_t errThReg  = NVDIMM::i2cReg::ES_LIFETIME_ERROR_THRESHOLD;
+            bool firstWarn = false;
+            bool statusErr = false;
+            o_rc = __adjustThreshold( io_sc, i_dimm, warnThReg, errThReg,
+                                      firstWarn, statusErr );
+            if ( SUCCESS != o_rc ) break;
+
+            // Make the log predictive, but do not mask the FIR
+            io_sc.service_data->setServiceCall();
+
+            // If we got a set event notification status error, add the
+            // signature for that before adding the signature for the warning.
+            // Also do not take our normal callout action since we already will
+            // have called out the NVDIMM because of the status error.
+            if ( statusErr )
+            {
+                __addSignature( io_sc, mca, io_errFound, PRDFSIG_NotifStatErr );
+
+                // Need to set io_errFound here so the warning signature is
+                // added to the multi-signature list instead of as the primary
+                // signature.
+                io_errFound = true;
+            }
+            else
+            {
+                // Callout BPM (backup power module) high
+                o_rc = __addBpmCallout( i_dimm, HWAS::SRCI_PRIORITY_HIGH );
+                if ( SUCCESS != o_rc ) break;
+
+                // Callout NVDIMM low, no gard
+                io_sc.service_data->SetCallout( i_dimm, MRU_LOW, NO_GARD );
+            }
+
+            // Update signature depending on whether this is the first or second
+            // warning of this type.
+            if ( firstWarn )
+            {
+                __addSignature(io_sc, mca, io_errFound, PRDFSIG_EsLifeWarn1);
+            }
+            else
+            {
+                __addSignature(io_sc, mca, io_errFound, PRDFSIG_EsLifeWarn2);
+            }
+
+            io_errFound = true;
+        }
+
+    }while(0);
+
+    return o_rc;
+
+    #undef PRDF_FUNC
+}
+
+/**
  * @brief  De-assert the EVENT_N pin by setting bit 2 in NVDIMM_MGT_CMD1 (0x41)
  * @param  i_dimm The target dimm.
  * @return FAIL if unable to read/write register, else SUCCESS
@@ -698,7 +1292,7 @@ uint32_t __deassertEventN( TargetHandle_t i_dimm )
 
         // Read the NVDIMM_MGT_CMD1 register (0x41) 7:0
         errlHndl_t errl = deviceRead( i_dimm, &data, NVDIMM_SIZE,
-            DEVICE_NVDIMM_ADDRESS(NVDIMM_MGT_CMD1) );
+            DEVICE_NVDIMM_ADDRESS(NVDIMM::i2cReg::NVDIMM_MGT_CMD1) );
         if ( errl )
         {
             PRDF_ERR( PRDF_FUNC "Failed to read NVDIMM_MGT_CMD1. "
@@ -713,7 +1307,7 @@ uint32_t __deassertEventN( TargetHandle_t i_dimm )
 
         // Write the updated data back to NVDIMM_MGT_CMD1
         errl = deviceWrite( i_dimm, &data, NVDIMM_SIZE,
-            DEVICE_NVDIMM_ADDRESS(NVDIMM_MGT_CMD1) );
+            DEVICE_NVDIMM_ADDRESS(NVDIMM::i2cReg::NVDIMM_MGT_CMD1) );
         if ( errl )
         {
             PRDF_ERR( PRDF_FUNC "Failed to write NVDIMM_MGT_CMD1. "
@@ -732,6 +1326,7 @@ uint32_t __deassertEventN( TargetHandle_t i_dimm )
 }
 
 #endif // HOSTBOOT_RUNTIME
+#endif // CONFIG_NVDIMM
 
 /**
  * @brief  MCACALFIR[8] - Error from NVDIMM health status registers
@@ -744,13 +1339,28 @@ int32_t AnalyzeNvdimmHealthStatRegs( ExtensibleChip * i_chip,
 {
     #define PRDF_FUNC "[nimbus_mca::AnalyzeNvdimmHealthStatRegs] "
 
+    #ifdef CONFIG_NVDIMM
     #ifdef __HOSTBOOT_RUNTIME
 
     uint32_t l_rc = SUCCESS;
+    bool errFound = false;
 
     // We need to check both dimms for errors
     for ( auto & dimm : getConnected(i_chip->getTrgt(), TYPE_DIMM) )
     {
+        // Skip any non-NVDIMMs
+        if ( !isNVDIMM(dimm) ) continue;
+
+        // Add SMART-specific, page 4 registers to FFDC
+        errlHndl_t mainErrl = nullptr;
+        mainErrl = ServiceGeneratorClass::ThisServiceGenerator().getErrl();
+        if ( nullptr == mainErrl )
+        {
+            PRDF_ERR( PRDF_FUNC "Failed to get the global error log." );
+            continue;
+        }
+        PlatServices::nvdimmAddFfdc( dimm, mainErrl );
+
         // De-assert the EVENT_N pin by setting bit 2 in NVDIMM_MGT_CMD1
         l_rc = __deassertEventN( dimm );
         if ( SUCCESS != l_rc ) continue;
@@ -762,7 +1372,7 @@ int32_t AnalyzeNvdimmHealthStatRegs( ExtensibleChip * i_chip,
 
         // Read the Module Health Register (0xA0) 7:0
         errlHndl_t errl = deviceRead( dimm, &data, NVDIMM_SIZE,
-            DEVICE_NVDIMM_ADDRESS(MODULE_HEALTH) );
+            DEVICE_NVDIMM_ADDRESS(NVDIMM::i2cReg::MODULE_HEALTH) );
         if ( errl )
         {
             PRDF_ERR( PRDF_FUNC "Failed to read Module Health Register. "
@@ -775,6 +1385,30 @@ int32_t AnalyzeNvdimmHealthStatRegs( ExtensibleChip * i_chip,
         // BIT 0: Persistency Lost
         if ( bitList.count(0) )
         {
+            // Analyze Health Status0 Reg, Health Status1 Reg,
+            // and Error Theshold Status Reg
+            l_rc = __analyzeHealthStatus0Reg( io_sc, dimm, errFound );
+            if ( SUCCESS != l_rc ) continue;
+            l_rc = __analyzeHealthStatus1Reg( io_sc, dimm, errFound );
+            if ( SUCCESS != l_rc ) continue;
+            bool esTempErr = false;
+            l_rc = __analyzeErrorThrStatusReg(io_sc, dimm, errFound, esTempErr);
+            if ( SUCCESS != l_rc ) continue;
+
+            // If we hit an ES temperature error and have not yet hit threshold,
+            // then keep the log hidden.
+            if ( esTempErr && !io_sc.service_data->IsAtThreshold() ) continue;
+
+            // If we didn't find any error, then keep the log hidden.
+            if ( !errFound )
+            {
+                io_sc.service_data->setSignature( i_chip->getHuid(),
+                    PRDFSIG_FirEvntGone );
+                // Callout NVDIMM
+                io_sc.service_data->SetCallout( dimm, MRU_MED, NO_GARD );
+                continue;
+            }
+
             // EVENT_N cannot be retriggered on a new PERSISTENCY_LOST_ERROR
             // if a previous PERSISTENCY_LOST_ERROR still exists. Meaning, we
             // cannot detect/report multiple errors that happen at different
@@ -782,43 +1416,77 @@ int32_t AnalyzeNvdimmHealthStatRegs( ExtensibleChip * i_chip,
             // and make the log predictive.
             io_sc.service_data->SetThresholdMaskId(0);
 
-            // Send persistency lost message to PHYP
-            l_rc = PlatServices::nvdimmNotifyPhypProtChange( dimm,
-                    NVDIMM::UNPROTECTED_BECAUSE_ERROR );
+            // Send message to PHYP that save/restore may work
+            l_rc = PlatServices::nvdimmNotifyProtChange( dimm,
+                NVDIMM::NVDIMM_RISKY_HW_ERROR );
             if ( SUCCESS != l_rc ) continue;
 
-            // Analyze Health Status0 Reg, Health Status1 Reg,
-            // and Error Theshold Status Reg
-            l_rc = __analyzeHealthStatus0Reg( io_sc, dimm );
-            if ( SUCCESS != l_rc ) continue;
-            l_rc = __analyzeHealthStatus1Reg( io_sc, dimm );
-            if ( SUCCESS != l_rc ) continue;
-            l_rc = __analyzeErrorThrStatusReg( io_sc, dimm );
+        }
+        // BIT 1: Warning Threshold Exceeded
+        else if ( bitList.count(1) )
+        {
+            l_rc = __analyzeWarningThrStatusReg( io_sc, dimm, errFound );
             if ( SUCCESS != l_rc ) continue;
+
+            if ( !errFound )
+            {
+                io_sc.service_data->setSignature( i_chip->getHuid(),
+                    PRDFSIG_FirEvntGone );
+                // Callout NVDIMM
+                io_sc.service_data->SetCallout( dimm, MRU_MED, NO_GARD );
+                continue;
+            }
         }
-        // BIT 1: Warning Threshold Exceeded -- ignore
         // BIT 2: Persistency Restored
-        if ( bitList.count(2) )
+        else if ( bitList.count(2) )
         {
             // It would be rare to have an intermittent error that comes and
             // goes so fast we only see PERSISTENCY_RESTORED and not
             // PERSISTENCY_LOST_ERROR. Set predictive on threshold of 32
             // per day (rule code handles the thresholding), else just keep
             // as a hidden log.
-            io_sc.service_data->AddSignatureList( dimm, PRDFSIG_NvdimmPersRes );
+            __addSignature( io_sc, i_chip->getTrgt(), errFound,
+                            PRDFSIG_NvdimmPersRes );
+
+            // Callout NVDIMM
+            io_sc.service_data->SetCallout( dimm, MRU_MED, NO_GARD );
+        }
+        // BIT 3: Below Warning Threshold
+        else if ( bitList.count(3) )
+        {
+            // Much like the persistency restored bit above, we don't expect
+            // to see this, so just make a hidden log.
+            __addSignature( io_sc, i_chip->getTrgt(), errFound,
+                            PRDFSIG_BelowWarnTh );
+
+            // Callout NVDIMM
+            io_sc.service_data->SetCallout( dimm, MRU_MED, NO_GARD );
+        }
+        // BIT 4: Hardware Failure -- ignore - no logic feeding this
+        // BIT 5: EVENT_N_LOW -- ignore
+        // BIT 6:7: Unused
+
+        // If we reach a threshold on MCACALFIR[8] of 32 per day, we assume
+        // some intermittent error must be triggering the FIR that isn't a
+        // persistency lost error which would cause us to mask. The rule code
+        // handles the actual thresholding here.
+        if ( io_sc.service_data->IsAtThreshold() && !errFound )
+        {
+            io_sc.service_data->setSignature( i_chip->getHuid(),
+                                              PRDFSIG_IntNvdimmErr );
 
             // callout NVDIMM high, cable high, BPM high, no gard
             io_sc.service_data->SetCallout( dimm, MRU_HIGH, NO_GARD );
             l_rc = __addBpmCallout( dimm, HWAS::SRCI_PRIORITY_HIGH );
             if ( SUCCESS != l_rc ) continue;
-            l_rc = __addNvdimmCableCallout( HWAS::SRCI_PRIORITY_HIGH );
+            l_rc = __addNvdimmCableCallout( dimm, HWAS::SRCI_PRIORITY_HIGH );
             if ( SUCCESS != l_rc ) continue;
-        }
-        // BIT 3: Below Warning Threshold -- ignore
-        // BIT 4: Hardware Failure -- ignore
-        // BIT 5: EVENT_N_LOW -- ignore
-        // BIT 6:7: Unused
 
+            // Send message to PHYP that save/restore may work
+            l_rc = PlatServices::nvdimmNotifyProtChange( dimm,
+                    NVDIMM::NVDIMM_RISKY_HW_ERROR );
+            if ( SUCCESS != l_rc ) continue;
+        }
     }
     #else // IPL only
 
@@ -826,7 +1494,14 @@ int32_t AnalyzeNvdimmHealthStatRegs( ExtensibleChip * i_chip,
     PRDF_ERR( PRDF_FUNC "Unexpected call to analyze NVDIMMs at IPL." );
     io_sc.service_data->SetCallout( LEVEL2_SUPPORT, MRU_HIGH, NO_GARD );
 
-    #endif
+    #endif // end runtime vs IPL check
+
+    #else // CONFIG_NVDIMM not defined
+
+    PRDF_ERR( PRDF_FUNC "CONFIG_NVDIMM not defined." );
+    io_sc.service_data->SetCallout( LEVEL2_SUPPORT, MRU_HIGH, NO_GARD );
+
+    #endif // end CONFIG_NVDIMM check
 
     return SUCCESS; // nothing to return to rule code