diff options
Diffstat (limited to 'src/usr/isteps/nvdimm')
-rw-r--r-- | src/usr/isteps/nvdimm/ReadMe.md | 278 | ||||
-rw-r--r-- | src/usr/isteps/nvdimm/bpm_update.C | 4108 | ||||
-rw-r--r-- | src/usr/isteps/nvdimm/bpm_update.H | 1078 | ||||
-rw-r--r-- | src/usr/isteps/nvdimm/errlud_nvdimm.C | 48 | ||||
-rw-r--r-- | src/usr/isteps/nvdimm/errlud_nvdimm.H | 33 | ||||
-rw-r--r-- | src/usr/isteps/nvdimm/nvdimm.C | 5026 | ||||
-rw-r--r-- | src/usr/isteps/nvdimm/nvdimm.H | 344 | ||||
-rw-r--r-- | src/usr/isteps/nvdimm/nvdimm.mk | 5 | ||||
-rw-r--r-- | src/usr/isteps/nvdimm/nvdimmErrorLog.C | 1317 | ||||
-rw-r--r-- | src/usr/isteps/nvdimm/nvdimmErrorLog.H | 108 | ||||
-rw-r--r-- | src/usr/isteps/nvdimm/nvdimm_update.C | 687 | ||||
-rw-r--r-- | src/usr/isteps/nvdimm/nvdimm_update.H | 54 | ||||
-rwxr-xr-x | src/usr/isteps/nvdimm/nvdimmdd.C | 532 | ||||
-rwxr-xr-x | src/usr/isteps/nvdimm/nvdimmdd.H | 124 | ||||
-rw-r--r-- | src/usr/isteps/nvdimm/plugins/errludP_nvdimm.H | 79 | ||||
-rw-r--r-- | src/usr/isteps/nvdimm/plugins/nvdimmUdParserFactory.H | 10 | ||||
-rw-r--r-- | src/usr/isteps/nvdimm/runtime/nvdimm_rt.C | 1222 |
17 files changed, 14152 insertions, 901 deletions
diff --git a/src/usr/isteps/nvdimm/ReadMe.md b/src/usr/isteps/nvdimm/ReadMe.md new file mode 100644 index 000000000..1f98438b2 --- /dev/null +++ b/src/usr/isteps/nvdimm/ReadMe.md @@ -0,0 +1,278 @@ +# Battery Power Module (BPM) Updates Overview
+To support different firmware versions released by SMART, the bpm_update.C and
+bpm_update.H files were created to facilitate upgrades and downgrades of the
+firmware version on a BPM attached to an NVDIMM. There are two kinds of BPM, one
+that supports 16GB type NVDIMMs and one that supports 32GB type NVDIMMs.
+Although they have separate image files, the update is functionally the same for
+each. This overview will not go into fine-grain detail on every process of the
+update. For more information see the comments in bpm_update.H, bpm_update.C and
+in the various supporting files.
+
+Supporting Files:
+* Two image files, e.g., SRCA8062IBMH012B_FULL_FW_Rev1.03_02282019.txt or
+SRCA8062IBMH011B_FULL_FW_Rev1.04_05172019.txt
+ * The image file names are important in that they contain information that
+ is not found anywhere else in the files. For example, After SRCA8062IBMH01
+ but right before the B is a number. That signifies which kind of BPM type
+ that image is for. A 1 means 32gb type, a 2 means 16gb type. Also, note that
+ the version Rev1.0x is in the file name. There is no other place where this
+ occurs within the image file. So, to differentiate the updates from each
+ other the file names must be left intact.
+* src/build/buildpnor/buildBpmFlashImages.pl
+ * This perl script is responsible for packaging the image files listed above
+ into binaries that can then be associated with LIDs for use during the BPM
+ update.
+* src/build/buildpnor/bpm-utils/imageCrc.c and
+src/build/buildpnor/bpm-utils/insertBpmFwCrc.py
+ * These are provided by SMART and utilized by buildBpmFlashImages.pl to
+ generate the correct CRC for the firmware image during the fsp build.
+* src/build/mkrules/dist.targets.mk
+ * This file puts src/build/buildpnor/buildBpmFlashImages.pl,
+ src/build/buildpnor/bpm-utils/imageCrc.c,
+ and src/build/buildpnor/bpm-utils/insertBpmFwCrc.py into the fsp.tar which
+ can then be primed over to an FSP sandbox.
+* <fsp_sandbox>/src/engd/nvdimm/makefile
+ * This makefile compiles the src/build/buildpnor/bpm-utils/imageCrc.c and
+ calls src/build/buildpnor/buildBpmFlashImages.pl to do all the necessary
+ work to bring the flash image binaries up-to-date.
+* In <fsp_sandbox>/obj/ppc/engd/nvdimm/bpm/ are 16GB-NVDIMM-BPM-CONFIG.bin,
+16GB-NVDIMM-BPM-FW.bin, 32GB-NVDIMM-BPM-CONFIG.bin, and 32GB-NVDIMM-BPM-FW.bin
+ * These are the output binaries which will be associated to LIDs for
+ hostboot use.
+
+### BPM Update Flow Overview
+The update procedure for the BPM is fairly rigid. There are many steps that must
+occur in a precise order otherwise the update will fail. We aren't able to
+communicate directly to the BPM for these updates. Instead, we send commands to
+the NVDIMM which in-turn passes those along to the BPM. There are a couple
+"modes" that must be enabled to begin the update process and be able to
+communicate with the BPM. These are:
+
+##### Update Mode
+This is a mode for the NVDIMM. To enter this mode a command is sent to the
+NVDIMM so that the NVDIMM can do some house-keeping work to prepare for the BPM
+update. Since the NVDIMM is always doing background scans of the BPM, this mode
+will quiet those scans so that we are able to communicate with the BPM.
+Otherwise, the communication would be too chaotic to perform the update.
+
+##### Boot Strap Loader (BSL) Mode (Currently, only BSL 1.4 is supported)
+This is the mode that the BPM enters in order to perform the update.In order to
+execute many of the commands necessary to perform the update, the BPM **must**
+be in BSL mode. There are varying versions of BSL mode and these versions are
+not coupled with the firmware version at all. In order for the BSL version to be
+updated on a BPM, the device must be shipped back to SMART because it requires a
+specific hardware programmer device to be updated.
+
+The update procedure does vary between BSL versions, so to ensure a successful
+update the code will first read the BSL version on the BPM. If the BSL version
+is not 1.4 (the supported version) then the update process will not occur as it
+is known that BSL versions prior to 1.4 are different enough that the update
+would fail if attempted and it is unknown if future BSL versions will be
+backward compatible with the BSL 1.4 procedure.
+
+If something happens to the firmware during an update such that the firmware on
+the device is missing or invalid, the BPM is designed to always fall back to
+this mode so that valid firmware can be loaded onto the BPM and the device can
+be recovered. However, if the firmware is corrupted by any means outside of an
+update then it is highly likely that the BPM will not be recoverable and it may
+need to be sent back to SMART for recovery.
+
+#### An update in two parts
+The BPM update cannot be done in one single pass. This is because there are two
+sections of data on the BPM that must be modified to successfully update the
+BPM. These are refered to as the Firmware portion of the update and the
+Configuration Data Segment portion of the update.
+
+##### The Firmware Portion
+This is the actual firmware update. Although, when someone says the BPM Firmware
+Update they are often implicitly referring to both parts of the update. In order
+for the full update to be a success, the firmware portion of the update is
+reliant upon another part to have access to all of the features in a given
+update. That is the Configuration Segment Data. It is safe, and advisable, to
+update the firmware part first and then the configuration part second.
+
+##### The Configuration Data Portion
+The Configuration Data Segment portion is commonly referred to as the segment
+update, config update, or any other variation of the name. The config segment
+portion **requires** working firmware on the BPM to succeed. This is because we
+must read out some of the segment data on the BPM and merge it with parts from
+the image. Without working firmware, it will not work and the update will
+_never_ succeed.
+
+The configuration data on the BPM is broken into four segments, A, B, C, and D.
+These are in reverse order in memory such that D has the lowest address offset.
+For our purposes, we only care about Segment D and B. A and C contain logging
+information and are not necessary to touch. Segment D will be completely
+replaced by the data in the image file. Segment B is the critical segment,
+however, because we must splice data from the image into it. Segment B contains
+statistical information and other valuable information that should never be lost
+during an update. If this segment becomes corrupted then it is very likely the
+BPM will be stuck in a bad state.
+
+##### Bpm::runUpdate Flow
+1. Read the current firmware version on the BPM to determine if updates are
+necessary. If this cannot be done, that is to say that an error occurs during
+this process, then updates will not be attempted due to a probable
+communication issue with the BPM.
+2. Read the current BSL mode version to determine if the BSL version on the BPM
+is compatible with the versions we support. If this cannot be done due to some
+kind of error, then the updates will not be attempted since we cannot be sure
+that the BPM has a compatible BSL version.
+3. Perform the firmware portion of the update. If an error occurs during this
+part of the update then the segment portion of the updates will not be attempt
+as per the given requirement above.
+4. Perform the segment portion of the update.
+
+##### Common Operating Processes between functions
+Reading the BSL version, and performing the firmware and segment updates all
+follow a common operating process to do their work successfully. The steps laid
+out in those functions must be followed in the given order otherwise the
+functions will not execute successfully and the BPM may go into a bad state.
+These steps are:
+1. Enter Update Mode
+2. Verify the NVDIMM is in Update Mode
+3. Command the BPM to enter BSL mode
+4. Unlock the BPM so that writing can be performed.
+5. Do function's work.
+6. Reset the BPM, which is the way that BSL mode is exited.
+7. Exit Update Mode
+
+By following these steps, the BPM is able to some background work to verify its
+state. If firmware and config updates are attempted at the same time this will
+introduce unpredicatable behavior. Meaning if only one set of steps 1-4 have
+executed then step 5a and 5b are to perform firmware and config updates, and
+then 6-7 are done that will produce unpredicable behavior. It is best run
+through the whole process for each. Reading the BSL version does not have this
+limitation. As long as steps 1-4 have been executed, the BSL version can be read
+at any time.
+
+-------------------------------------------------------------------------------
+# Node Controller (NC) Update Overview
+To support different firmware versions released by SMART, the nvdimm_update.C
+and nvdimm_update.H files were created to facilitate upgrades and downgrades of
+the firmware version of node controllers for NVDIMM. There are two kinds of
+NVDIMM node controllers: one that supports 16GB type NVDIMMs and one that
+supports 32GB type NVDIMMs. Although they have separate image files, the update
+is functionally the same for each. This overview will not go into fine-grain
+detail on every process of the update. For more information see the comments in
+nvdimm_update.H, nvdimm_update.C and in the various supporting files.
+
+Supporting Files:
+* Two signed image files are provided by SMART.
+ The name contains the NC type (16GB or 32GB) + the version (v##)
+
+ Example:
+ nvc4_fpga_31mm_X4_16GB_A7_2TLC_GA6_IBM_JEDEC_2019_03_22_v30_r29325-SIGNED.bin
+ nvc4_fpga_31mm_X4_32GB_A7_2TLC_GA6_IBM_JEDEC_2019_03_22_v30_r29325-SIGNED.bin
+
+* Files checked into cmvc/build process
+ Note: Each file contains two bytes that describe the NC type and version, so
+ we can use a generic name in CMVC
+ * NVDIMM_SRN7A2G4IBM26MP1SC.bin (16GB one)
+ * NVDIMM_SRN7A4G4IBM24KP2SB.bin (32GB one)
+
+* Build process creates lid files that are loaded on system
+ * 80d00025.lid (secure content LID)
+ * 81e00640.lid (signed 16GB)
+ * 81e00641.lid (signed 32GB)
+
+### NC Update Flow Overview
+The update procedure for the NC is fairly rigid. There are many steps that must
+occur in a precise order otherwise the update will fail.
+
+### Design points
+Three classes are used for the NC update
+* NvdimmsUpdate -- container/driver class
+ This is where all the functional NVDIMM NCs are checked and updated if necessary
+* NvdimmLidImage -- accessors for a given NC LID image (16 or 32)
+ This provides the LID content for easy checking and use during update
+* NvdimmInstalledImage -- accessor to current installed NC image
+ This is the main workhorse. It uses i2c communication to check what is
+ installed and performs the update to a new LID image level
+
+##### NvdimmsUpdate::runUpdate Flow
+1. Build up installed NVDIMM image lists (determine what NC types are installed)
+2. Using secure content lid, now call runUpdateUsingLid() for each LID type
+with the appropriate target NVDIMMs associated with that type.
+3. runUpdateUsingLid() cycles through each NVDIMM target and checks if the
+current NC level is different then the lid version level.
+Only update if the levels do not match to allow upgrade and downgrading.
+4. NvdimmInstalledImage::updateImage() is called on each NVDIMM node controller
+that requires an update
+5. updateImage runs through the steps outlined in 9.7 Firmware Update workflow
+in the JEDEC document JESD245B
+6. Basic steps of the update done one NVDIMM controller at a time
+ 1. Validate module manufacturer ID and module product identifier (done before this)
+ 2. Verify 'Operation In Progress' bit in the NVDIMM_CMD_STATUS0
+ register is cleared (ie. NV controller is NOT busy)
+ 3. Make sure we start from a cleared state
+ 4. Enable firmware update mode
+ 5. Clear the Firmware Operation status
+ 6. Clear the firmware data block to ensure there is no residual data
+ 7. Send the first part (header + SMART signature) of the Firmware Image Data
+ Include sending data and checking checksum after data is sent
+ 8. Command the module to validate that the firmware image is valid for
+ the module based on the header
+ 9. Commit the first firmware data region
+ 10. Send and commit the remaining firmware data in REGION_BLOCK_SIZE regions
+ - each block is 32 bytes
+ - each region contains upto REGION_BLOCK_SIZE blocks (currently 1,024)
+ - each region is verfied by checksum before next region is sent
+ 11. Command the module to validate the firmware data
+ 12. Disable firmware update mode
+ 13. Switch from slot0 to slot1 which contains the new image code
+ 14. Validate running new code level
+
+# NVDIMM Secure Erase Verify Flow
+DS8K lpar -> HBRT NVDIMM operation = factory_default + secure_erase_verify_start
+ HBRT executes factory_default and steps 1) and 2)
+DS8K lpar -> HBRT NVDIMM operation = secure_erase_verify_complete
+ HBRT executes step 3)
+ If secure erase verify has not completed, return status with verify_complete bit = 0
+ DS8K lpar is responsible for monitoring elapsed time (2/4 hours) and restart process (step 6)
+ If secure erase verify has completed
+ HBRT executes steps 4) and 5), generating error logs for any non-zero register values
+ Return status with verify_complete bit = 1
+
+## Procedure Flow for NVDIMM Secure Erase Verify
+ *Note: Secure Erase Verify should only be run after a Factory Default operation.
+ Secure Erase Verify is intended to verify whether all NAND blocks have been erased.
+ *Note: Full breakout of all Page 5 Secure Erase Verify registers can be found in
+ SMART document "JEDEC NVDIMM Vendor Page 2 Extensions".
+ 1) Set Page 5 Register 0x1B to value "0x00"
+ // this clears the status register
+ 2) Set Page 5 Register 0x1A to value "0xC0"
+ // this kicks off the erase verify operation
+ 3) Wait for Page 5 Register 0x1A Bit 7 to be reset to value "0"
+ // i.e., the overall register value should be "0x40";
+ this means that erase verify has completed
+ a. If Page 5 Register 0x1A Bit 7 has not reset to value "0"
+ after 2 hours (16GB NVDIMM) or after 4 hours (32GB NVDIMM),
+ report a timeout error and skip to step (6)
+ 4) Read Page 5 Register 0x1B; value should be "0x00"
+ // this is the erase verify status register
+ a. If Page 5 Register 0x1B value is not "0x00",
+ report any/all errors as outlined in the table at the end of this document,
+ then skip to step (6)
+ 5) Read Page 5 Registers 0x1D (MSB) and 0x1C (LSB);
+ combined the two registers should have a value of "0x0000"
+ // this is the number of chunks failing Secure Erase Verify
+ a. If the combined value of the two registers is not "0x0000",
+ report a threshold exceeded error along with the combined value of the two registers,
+ then skip to step (6)
+ 6) If any errors have been reported in steps (3), (4), or (5),
+ retry the secure erase verify operation starting again from step (1)
+ a. If the secure erase verify operation fails even after retrying,
+ report that secure erase verify operation has failed
+ 7) If no errors have been reported, report that secure erase verify operation
+ has been completed successfully
+ *Addendum: Breakout of Page 5 Register 0x1B Erase Verify Status bit values referenced in step (4) above.
+ All these bits should return as "0". Any bits returning as "1" should be reported with the error name below.
+ Bits 7:6 - Reserved
+ Bit 5 - BAD BLOCK
+ Bit 4 - OTHER
+ Bit 3 - ENCRYPTION LOCKED
+ Bit 2 - INVALID PARAMETER
+ Bit 1 - INTERRUPTED
+ Bit 0 - NAND ERROR
+
diff --git a/src/usr/isteps/nvdimm/bpm_update.C b/src/usr/isteps/nvdimm/bpm_update.C new file mode 100644 index 000000000..3ffdb595b --- /dev/null +++ b/src/usr/isteps/nvdimm/bpm_update.C @@ -0,0 +1,4108 @@ +/* IBM_PROLOG_BEGIN_TAG */ +/* This is an automatically generated prolog. */ +/* */ +/* $Source: src/usr/isteps/nvdimm/bpm_update.C $ */ +/* */ +/* OpenPOWER HostBoot Project */ +/* */ +/* Contributors Listed Below - COPYRIGHT 2019 */ +/* [+] International Business Machines Corp. */ +/* */ +/* */ +/* Licensed under the Apache License, Version 2.0 (the "License"); */ +/* you may not use this file except in compliance with the License. */ +/* You may obtain a copy of the License at */ +/* */ +/* http://www.apache.org/licenses/LICENSE-2.0 */ +/* */ +/* Unless required by applicable law or agreed to in writing, software */ +/* distributed under the License is distributed on an "AS IS" BASIS, */ +/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */ +/* implied. See the License for the specific language governing */ +/* permissions and limitations under the License. */ +/* */ +/* IBM_PROLOG_END_TAG */ + +#include "nvdimm.H" +#include "bpm_update.H" +#include "nvdimm_update.H" + +#include <isteps/nvdimm/nvdimm.H> +#include <errl/hberrltypes.H> +#include <errl/errlmanager.H> +#include <endian.h> +#include <sys/time.h> +#include <hbotcompid.H> +#include <trace/interface.H> +#include <initservice/istepdispatcherif.H> +#include <isteps/nvdimm/bpmreasoncodes.H> + +#include <hwas/common/hwasCallout.H> + +#include <targeting/common/targetservice.H> +#include <attributeenums.H> + +namespace NVDIMM +{ +namespace BPM +{ + +trace_desc_t* g_trac_bpm = nullptr; +TRAC_INIT(&g_trac_bpm, BPM_COMP_NAME, 4*KILOBYTE); + +// For debug traces +#define TRACUCOMP(args...) +//#define TRACUCOMP(args...) TRACFCOMP(args) +#define TRACUBIN(args...) +//#define TRACUBIN(args...) TRACUBIN(args) + +// These constants are kept out of the header file since they aren't relevant +// outside of this file. +const uint16_t BPM_ADDRESS_ZERO = 0; +const uint16_t BPM_CONFIG_START_ADDRESS = 0x1800; +// There are two potential start addresses for the firmware section. +// They are: +const uint16_t MAIN_PROGRAM_ADDRESS = 0x8000; +const uint16_t MAIN_PROGRAM_ADDRESS_ALT = 0xA000; + +// In order to disable write protection on the BPM to perform updates a sequence +// of characters must be written. The hex represenation of those characters are +// defined by this constant. The sequence is SMOD +const uint8_t BPM_PASSWORD[] = {0x53, 0x4D, 0x4F, 0x44}; +const size_t BPM_PASSWORD_LENGTH = 4; + +// These are the segment codes used to dump out a particular config data segment +// on the BPM. +const uint16_t DEFAULT_REG_PAGE = 0x905E; +const uint16_t SEGMENT_A_CODE = 0x9A5E; +const uint16_t SEGMENT_B_CODE = 0x9B5E; +const uint16_t SEGMENT_C_CODE = 0x9C5E; +const uint16_t SEGMENT_D_CODE = 0x9D5E; + +// Starting addresses relative to address 0x1800. +// Segments appear in reverse order on BPM. +// Each segment is SEGMENT_SIZE long. +const size_t SEGMENT_D_START_ADDR = 0x000; +const size_t SEGMENT_C_START_ADDR = 0x080; +const size_t SEGMENT_B_START_ADDR = 0x100; +const size_t SEGMENT_A_START_ADDR = 0x180; + +const std::map<uint16_t, size_t> segmentMap +{ + {SEGMENT_A_CODE, SEGMENT_A_START_ADDR}, + {SEGMENT_B_CODE, SEGMENT_B_START_ADDR}, + {SEGMENT_C_CODE, SEGMENT_C_START_ADDR}, + {SEGMENT_D_CODE, SEGMENT_D_START_ADDR}, +}; + +const uint8_t MAX_RETRY = 3; + +/** + * @brief A helper function used in assert statements to verify the correct + * BSP commands were passed into the correct function arguments. + * + * @param[in] i_command The command that will verified to be a BSP command. + * + * @return bool true if i_command is a BSP command. + * false if it's not a BSP command. + */ +bool isBspCommand(const uint8_t i_command) +{ + bool result = ((i_command == BPM_PASSTHROUGH) || (i_command == BPM_LOCAL)) + ? true : false; + + return result; +} + +/** + * @brief A helper function used in assert statements to verify the correct + * BCL commands were passed into the correct function arguments. + * + * @param[in] i_command The command that will verified to be a BCL command. + * + * @return bool true if i_command is a BCL command. + * false if it's not a BCL command. + */ +bool isBclCommand(const uint8_t i_command) +{ + bool result = false; + switch(i_command) + { + case BCL_ENTER_BSL_MODE: + case BCL_IS_BSL_MODE: + case BCL_WRITE_REG: + case BCL_START_UPDATE: + case BCL_END_UPDATE: + case BCL_IS_UPDATE_IN_PROGRESS: + { + result = true; + break; + } + default: + { + result = false; + break; + } + } + + return result; +} + +/** + * @brief A helper function used in assert statements to verify the correct + * BSL commands were passed into the correct function arguments. + * + * @param[in] i_command The command that will verified to be a BSL command. + * + * @return bool true if i_command is a BSL command. + * false if it's not a BSL command. + */ +bool isBslCommand(const uint8_t i_command) +{ + bool result = false; + switch(i_command) + { + case BSL_RX_DATA_BLOCK: + case BSL_RX_PASSWORD: + case BSL_ERASE_SEGMENT: + case BSL_TOGGLE_INFO: + case BSL_ERASE_BLOCK: + case BSL_MASS_ERASE: + case BSL_CRC_CHECK: + case BSL_LOAD_PC: + case BSL_TX_DATA_BLOCK: + case BSL_TX_BSL_VERSION: + case BSL_TX_BUFFER_SIZE: + case BSL_RX_DATA_BLOCK_FAST: + case BSL_RESET_DEVICE: + case BSL_VERIFY_BLOCK: + { + result = true; + break; + } + default: + { + result = false; + break; + } + } + + return result; +} + +/** + * @brief Helper function to pull out the BPM address offset in the given + * payload. + * + * @param[in] i_payload The payload from which to extract the address + * offset. + */ +uint16_t getPayloadAddressBE(payload_t i_payload) +{ + // Get the payload address and convert back to big endian. + uint16_t payloadAddress = (i_payload[PAYLOAD_ADDRESS_START_INDEX]) + | (i_payload[PAYLOAD_ADDRESS_START_INDEX + 1] << 8); + return payloadAddress; +} + +/** + * @brief Helper function to extract the Segement ID from the segment code. + * + * @param[in] i_segmentCode The Segment code to pull the segment ID from + * + * @return uint8_t The Segment ID (A, B, C, D) as a hex value. + * For example 0xA, 0xB, etc. + */ +uint8_t getSegmentIdentifier(uint16_t i_segmentCode) +{ + uint8_t segmentId = (i_segmentCode >> 8) & 0xF; + return segmentId; +} + +/** + * @brief Helper function to sleep for longer durations in 5 second increments. + * + * @param[in] i_sleepInSeconds How many seconds to sleep. + */ +void longSleep(uint8_t const i_sleepInSeconds) +{ + int iterations = i_sleepInSeconds / 5; + do + { + // Send progress code. + INITSERVICE::sendProgressCode(); + + // Sleep for 5 seconds + nanosleep(5, 0); + + --iterations; + } while (iterations > 0); +} + +void runBpmUpdates(bpmList_t * const i_16gb_BPMs, + bpmList_t * const i_32gb_BPMs, + BpmFirmwareLidImage * const i_16gb_fwImage, + BpmFirmwareLidImage * const i_32gb_fwImage, + BpmConfigLidImage * const i_16gb_configImage, + BpmConfigLidImage * const i_32gb_configImage) +{ + + assert( (i_16gb_BPMs == nullptr) + || i_16gb_BPMs->empty() + || ((i_16gb_fwImage != nullptr) && (i_16gb_configImage != nullptr)), + "BPM::runBpmUpdates(): Update images for 16gb BPMs was nullptr and " + "there are 16gb BPMs in the system to may require updates."); + assert( (i_32gb_BPMs == nullptr) + || i_32gb_BPMs->empty() + || ((i_32gb_fwImage != nullptr) && (i_32gb_configImage != nullptr)), + "BPM::runBpmUpdates(): Update images for 32gb BPMs was nullptr and " + "there are 32gb BPMs in the system to may require updates."); + + errlHndl_t errl = nullptr; + + do { + + if ( (i_16gb_BPMs != nullptr) + && (i_16gb_fwImage != nullptr) + && (i_16gb_configImage != nullptr)) + { + TRACFCOMP(g_trac_bpm, + "Check/update %d BPMs on 16GB_TYPE NVDIMMs", + i_16gb_BPMs->size()); + + for(auto& bpm : *i_16gb_BPMs) + { + errl = bpm.runUpdate(*i_16gb_fwImage, *i_16gb_configImage); + if (errl != nullptr) + { + uint32_t nvdimmHuid = TARGETING::get_huid(bpm.getNvdimm()); + if (bpm.attemptAnotherUpdate()) + { + TRACFCOMP(g_trac_bpm, ERR_MRK + "An error occurred during a 16GB_TYPE BPM " + "update for NVDIMM 0x%.8X. " + "Try again.", + nvdimmHuid); + + delete errl; + errl = bpm.runUpdate(*i_16gb_fwImage, + *i_16gb_configImage); + if (errl != nullptr) + { + TRACFCOMP(g_trac_bpm, ERR_MRK + "Another error occurred while attempting " + "to update the same 16GB_TYPE BPM for " + "NVDIMM 0x%.8X. Commit and move onto the " + "next BPM", + nvdimmHuid); + } + else + { + continue; + } + } + else + { + TRACFCOMP(g_trac_bpm, ERR_MRK + "An error occurred during a 16GB_TYPE BPM " + "update for NVDIMM 0x%.8X. " + "Commit and move onto the next BPM", + nvdimmHuid); + } + ERRORLOG::errlCommit(errl, BPM_COMP_ID); + } + } + } + + if ( (i_32gb_BPMs != nullptr) + && (i_32gb_fwImage != nullptr) + && (i_32gb_configImage != nullptr)) + { + TRACFCOMP(g_trac_bpm, + "Check/update %d BPMs on 32GB_TYPE NVDIMMs", + i_32gb_BPMs->size()); + + for(auto& bpm : *i_32gb_BPMs) + { + errl = bpm.runUpdate(*i_32gb_fwImage, *i_32gb_configImage); + if (errl != nullptr) + { + uint32_t nvdimmHuid = TARGETING::get_huid(bpm.getNvdimm()); + if (bpm.attemptAnotherUpdate()) + { + TRACFCOMP(g_trac_bpm, ERR_MRK + "An error occurred during a 32GB_TYPE BPM " + "update for NVDIMM 0x%.8X. " + "Try again.", + nvdimmHuid); + + delete errl; + errl = bpm.runUpdate(*i_32gb_fwImage, + *i_32gb_configImage); + if (errl != nullptr) + { + TRACFCOMP(g_trac_bpm, ERR_MRK + "Another error occurred while attempting " + "to update the same 32GB_TYPE BPM for " + "NVDIMM 0x%.8X. Commit and move onto the " + "next BPM", + nvdimmHuid); + } + else + { + continue; + } + } + else + { + TRACFCOMP(g_trac_bpm, ERR_MRK + "An error occurred during a 32GB_TYPE BPM " + "update for NVDIMM 0x%.8X. " + "Commit and move onto the next BPM", + nvdimmHuid); + } + ERRORLOG::errlCommit(errl, BPM_COMP_ID); + } + } + } + } while(0); +} + +// ============================================================================= +// BpmFirmwareLidImage Class Functions +// ============================================================================= + +BpmFirmwareLidImage::BpmFirmwareLidImage(void * const i_lidImageAddr, + size_t i_size) + : iv_lidImage(i_lidImageAddr), iv_lidImageSize(i_size) +{ + assert(i_lidImageAddr != nullptr, + "BPM::BpmFirmwareLidImage(): Provided LID image must not be nullptr"); +} + +uint16_t BpmFirmwareLidImage::getVersion() const +{ + uint16_t version = INVALID_VERSION; + + if (iv_lidImageSize >= sizeof(firmware_image_header_t)) + { + const firmware_image_header_t * header = + reinterpret_cast<const firmware_image_header_t*>(iv_lidImage); + + version = TWO_UINT8_TO_UINT16(header->iv_versionMajor, + header->iv_versionMinor); + } + + return version; +} + +uint16_t BpmFirmwareLidImage::getNumberOfBlocks() const +{ + uint16_t numberOfBlocks = 0; + + if (iv_lidImageSize >= sizeof(firmware_image_header_t)) + { + const firmware_image_header_t * header = + reinterpret_cast<const firmware_image_header_t*>(iv_lidImage); + + numberOfBlocks = header->iv_numberOfBlocks; + } + + return numberOfBlocks; +} + +void const * BpmFirmwareLidImage::getFirstBlock() const +{ + void * block = nullptr; + + if (getNumberOfBlocks() > 0) + { + block = reinterpret_cast<uint8_t* const>(iv_lidImage) + + sizeof(firmware_image_header_t); + } + + return block; +} + +// ============================================================================= +// BpmConfigLidImage Class Functions +// ============================================================================= + +BpmConfigLidImage::BpmConfigLidImage(void * const i_lidImageAddr, + size_t i_size) + : iv_lidImage(i_lidImageAddr), iv_lidImageSize(i_size) +{ + assert(i_lidImageAddr != nullptr, + "BPM::BpmConfigLidImage(): Provided LID image must not be nullptr"); +} + +uint16_t BpmConfigLidImage::getVersion() const +{ + uint16_t version = INVALID_VERSION; + + if (iv_lidImageSize >= sizeof(config_image_header_t)) + { + const config_image_header_t * header = + reinterpret_cast<const config_image_header_t*>(iv_lidImage); + + version = TWO_UINT8_TO_UINT16(header->iv_versionMajor, + header->iv_versionMinor); + } + + return version; +} + +uint16_t BpmConfigLidImage::getNumberOfFragments() const +{ + uint16_t numberOfFragments = 0; + + if (iv_lidImageSize >= sizeof(config_image_header_t)) + { + const config_image_header_t * header = + reinterpret_cast<const config_image_header_t*>(iv_lidImage); + + numberOfFragments = header->iv_numberOfFragments; + } + + return numberOfFragments; +} + +void const * BpmConfigLidImage::getFirstFragment() const +{ + void * fragment = nullptr; + + if (getNumberOfFragments() > 0) + { + fragment = reinterpret_cast<uint8_t* const>(iv_lidImage) + + sizeof(config_image_header_t); + } + + return fragment; +} + +// ============================================================================= +// Bpm Class Functions +// ============================================================================= + +Bpm::Bpm(const TARGETING::TargetHandle_t i_nvdimm) + : iv_nvdimm(i_nvdimm), + iv_bslVersion(0), + iv_firmwareStartAddress(0), + iv_attemptAnotherUpdate(false), + iv_segmentDMerged(false), + iv_segmentBMerged(false), + iv_updateAttempted(false) +{ + assert((i_nvdimm != nullptr) && (isNVDIMM(i_nvdimm)), + "BPM::Bpm(): An nvdimm target must be given."); + + memset(&iv_segmentD, 0, SEGMENT_SIZE); + memset(&iv_segmentB, 0, SEGMENT_SIZE); + +} + +bool Bpm::attemptAnotherUpdate() +{ + return iv_attemptAnotherUpdate; +} + +bool Bpm::hasAttemptedUpdate() +{ + return iv_updateAttempted; +} + +void Bpm::setAttemptAnotherUpdate() +{ + + if (iv_updateAttempted) + { + // Since iv_updateAttempted is true that means that this function was + // called on a subsequent update attempt, meaning we should no longer + // attempt updates if the current attempt fails. + iv_attemptAnotherUpdate = false; + } + else + { + // Since iv_updateAttempted is false that means that this function was + // called on the first update attempt because by default + // iv_updateAttempted is false and is only set to true as the last part + // of the update procedure. + iv_attemptAnotherUpdate = true; + } + +} + +const TARGETING::TargetHandle_t Bpm::getNvdimm() +{ + return iv_nvdimm; +} + +errlHndl_t Bpm::readBslVersion() +{ + TRACFCOMP(g_trac_bpm, ENTER_MRK"Bpm::readBslVersion()"); + errlHndl_t errl = nullptr; + + do { + // Enter Update mode + errl = enterUpdateMode(); + if (errl != nullptr) + { + break; + } + + // Verify in Update mode + errl = inUpdateMode(); + if (errl != nullptr) + { + break; + } + + // Enter Bootstrap Loader (BSL) mode + errl = enterBootstrapLoaderMode(); + if (errl != nullptr) + { + break; + } + + // Unlock the device. This is a BSL command so we must already be in + // BSL mode to execute it. + errl = unlockDevice(); + if (errl != nullptr) + { + break; + } + + // Command to get the version is a BSL command, so it has to be sent as + // a payload. + payload_t payload; + errl = setupPayload(payload, BSL_TX_BSL_VERSION, BPM_ADDRESS_ZERO); + if (errl != nullptr) + { + break; + } + + // Issue the BSL command + errl = issueCommand(BPM_PASSTHROUGH, + payload, + WRITE, + NO_DELAY_EXTERNAL_RESPONSE); + if (errl != nullptr) + { + break; + } + + // Get the result from the BPM. + errl = getResponse(&iv_bslVersion, sizeof(uint8_t)); + if (errl != nullptr) + { + TRACFCOMP(g_trac_bpm, "Bpm::readBslVersion(): " + "Failed to determine BSL Version."); + break; + } + + TRACFCOMP(g_trac_bpm, "Bpm::readBslVersion(): BSL Version is 0x%X", + iv_bslVersion); + } while(0); + + // Reset the device. This will exit BSL mode. + errlHndl_t exitErrl = resetDevice(); + if (exitErrl != nullptr) + { + handleMultipleErrors(errl, exitErrl); + } + + // Exit update mode + exitErrl = exitUpdateMode(); + if (exitErrl != nullptr) + { + handleMultipleErrors(errl, exitErrl); + } + + return errl; +} + +errlHndl_t Bpm::getFwVersion(uint16_t & o_fwVersion) const +{ + TRACFCOMP(g_trac_bpm, ENTER_MRK"Bpm::getFwVersion()"); + errlHndl_t errl = nullptr; + + do { + uint8_t bpmMajor = 0, bpmMinor = 0; + errl = nvdimmReadReg(iv_nvdimm, + ES_FWREV1, + bpmMajor); + if (errl != nullptr) + { + TRACFCOMP(g_trac_bpm, "Bpm::getFwVersion(): " + "Failed to read BPM major version byte"); + errl->collectTrace(BPM_COMP_NAME); + break; + } + + errl = nvdimmReadReg(iv_nvdimm, + ES_FWREV0, + bpmMinor); + if (errl != nullptr) + { + TRACFCOMP(g_trac_bpm, "Bpm::getFwVersion(): " + "Failed to read BPM minor version byte"); + errl->collectTrace(BPM_COMP_NAME); + break; + } + + o_fwVersion = TWO_UINT8_TO_UINT16(bpmMajor, bpmMinor); + + } while(0); + + return errl; +} + +errlHndl_t Bpm::issueCommand(const uint8_t i_bspCommand, + const uint8_t i_command, + const uint8_t i_opType, + const int i_msDelay) +{ + assert(isBspCommand(i_bspCommand), + "i_bspCommand must be a valid BSP command"); + assert(isBclCommand(i_command), + "i_command must be a valid BCL command"); + // i_opType gets set in the BPM_CMD_STATUS register where it is only given + // two bits. So any value above 3 is not valid. + assert(i_opType <= 3, "i_opType can only range between 0 and 3"); + + errlHndl_t errl = nullptr; + + // i_command must be sent in BPM_REG_PAYLOAD_START, but it doesn't need to + // be formatted into a typical payload since the command isn't a BSL + // command. So, just create a payload_t, push_back the command, and let the + // issueCommand function that takes a payload_t parameter handle the rest. + payload_t payloadCommand; + payloadCommand.push_back(i_command); + + errl = issueCommand(i_bspCommand, payloadCommand, i_opType, i_msDelay); + + return errl; +} + +errlHndl_t Bpm::issueCommand(const uint8_t i_command, + payload_t i_payload, + const uint8_t i_opType, + const int i_msDelay) +{ + assert(isBspCommand(i_command), + "i_bspCommand must be a valid BSP command"); + + // i_opType gets set in the BPM_CMD_STATUS register where it is only given + // two bits. So any value above 3 is not valid. + assert(i_opType <= 3, "i_opType can only range between 0 and 3"); + + errlHndl_t errl = nullptr; + + do { + + // Check the full payload size to make sure it's not too large. Add the + // size of the SYNC_BYTE that was dropped during payload creation to + // verify that the full payload sent by the NVDIMM won't exceed the max + // size the BPM is able to receive. + if ((i_payload.size() + SYNC_BYTE_SIZE) > MAX_PAYLOAD_SIZE) + { + uint8_t payloadSize = i_payload.size() + SYNC_BYTE_SIZE; + uint8_t payloadHeaderDataSize = + i_payload[PAYLOAD_HEADER_DATA_LENGTH_INDEX]; + TRACFCOMP(g_trac_bpm, ERR_MRK"Bpm::issueCommand(): " + "payload size %d exceeds max payload size of %d", + payloadSize, MAX_PAYLOAD_SIZE); + /*@ + * @errortype + * @severity ERRORLOG::ERRL_SEV_PREDICTIVE + * @moduleid BPM_RC::BPM_ISSUE_COMMAND + * @reasoncode BPM_RC::BPM_INVALID_PAYLOAD_SIZE + * @userdata1[00:31] Full Payload Size, including SYNC_BYTE + * @userdata1[32:63] MAX_PAYLOAD_SIZE + * @userdata2[00:31] Payload Header + Data size + * @userdata2[32:63] NVDIMM Target HUID associated with this BPM + * @devdesc The maximum payload size to be sent to the BPM + * was exceeded. + * @custdesc A problem occurred during IPL of the system. + */ + errl = new ERRORLOG::ErrlEntry(ERRORLOG::ERRL_SEV_PREDICTIVE, + BPM_RC::BPM_ISSUE_COMMAND, + BPM_RC::BPM_INVALID_PAYLOAD_SIZE, + TWO_UINT16_TO_UINT32(payloadSize, + MAX_PAYLOAD_SIZE), + TWO_UINT32_TO_UINT64( + payloadHeaderDataSize, + TARGETING::get_huid(iv_nvdimm)) + ); + errl->addProcedureCallout(HWAS::EPUB_PRC_HB_CODE, + HWAS::SRCI_PRIORITY_HIGH); + errl->collectTrace(BPM_COMP_NAME); + nvdimmAddPage4Regs(iv_nvdimm,errl); + nvdimmAddVendorLog(iv_nvdimm, errl); + break; + } + + // Load the payload + int i = 0; + for (const auto& byte : i_payload) + { + errl = nvdimmWriteReg(iv_nvdimm, + (BPM_REG_PAYLOAD_START + (i * sizeof(uint8_t))), + byte); + if (errl != nullptr) + { + TRACFCOMP(g_trac_bpm, "Bpm::issueCommand(): " + "Failed to write payload to BPM_REG_PAYLOAD_START"); + errl->collectTrace(BPM_COMP_NAME); + break; + } + + ++i; + } + if (errl != nullptr) + { + break; + } + + // Clear the error status register + errl = nvdimmWriteReg(iv_nvdimm, + BPM_REG_ERR_STATUS, + 0x00); + if (errl != nullptr) + { + TRACFCOMP(g_trac_bpm, "Bpm::issueCommand(): " + "Failed to clear error status register"); + errl->collectTrace(BPM_COMP_NAME); + break; + } + + // Set the payload length. This is the actual length of the payload + // excluding the size of the SYNC_BYTE that was dropped during payload + // creation which is already missing from size(). + uint8_t data = i_payload.size(); + errl = nvdimmWriteReg(iv_nvdimm, + BPM_PAYLOAD_LENGTH, + data); + if (errl != nullptr) + { + TRACFCOMP(g_trac_bpm, "Bpm::issueCommand(): " + "Failed to set payload length"); + errl->collectTrace(BPM_COMP_NAME); + break; + } + + // Setup the command status register + command_status_register_t commandStatus; + commandStatus.bits.Bsp_Cmd_In_Progress = 1; + commandStatus.bits.Operator_Type = i_opType; + errl = nvdimmWriteReg(iv_nvdimm, + BPM_CMD_STATUS, + commandStatus.value); + if (errl != nullptr) + { + TRACFCOMP(g_trac_bpm, "Bpm::issueCommand(): " + "Failed to setup the command status register"); + errl->collectTrace(BPM_COMP_NAME); + break; + } + + // Setup command type. The basically executes the command + errl = nvdimmWriteReg(iv_nvdimm, + BPM_REG_CMD, + i_command); + if (errl != nullptr) + { + TRACFCOMP(g_trac_bpm, "Bpm::issueCommand(): " + "Failed to set the command type. " + "The command was not issued to the BPM"); + errl->collectTrace(BPM_COMP_NAME); + break; + } + + errl = waitForCommandStatusBitReset(commandStatus); + if (errl != nullptr) + { + break; + } + + // If a delay was given then wait for the delay and check the response. + // Otherwise, do not wait and do not check the response. For a list of + // commands and delays, see bpm_update.H for more info. + if (i_msDelay > 0) + { + // Wait the given time in ms. Default 1ms for most commands. + nanosleep(0, i_msDelay * NS_PER_MSEC); + + // Check the response from the BPM. A non-zero response value + // indicates failure. So, assume a failure and check for success. + uint8_t data = 0xFF; + errl = getResponse(&data, sizeof(uint8_t)); + if (errl != nullptr) + { + break; + } + + // If the data read from the response is a non-zero value then the + // issued command failed. + if (data != 0) + { + /*@ + * @errortype + * @severity ERRORLOG::ERRL_SEV_PREDICTIVE + * @moduleid BPM_RC::BPM_ISSUE_COMMAND + * @reasoncode BPM_RC::BPM_BAD_RESPONSE + * @userdata1 The command that failed to execute. + * See bpm_update.H for list of commands. + * @userdata2 NVDIMM Target HUID associated with this BPM + * @devdesc The command sent to the BPM failed. + * @custdesc A problem occurred during IPL of the system. + */ + errl = new ERRORLOG::ErrlEntry(ERRORLOG::ERRL_SEV_PREDICTIVE, + BPM_RC::BPM_ISSUE_COMMAND, + BPM_RC::BPM_BAD_RESPONSE, + i_payload[PAYLOAD_COMMAND_INDEX], + TARGETING::get_huid(iv_nvdimm)); + errl->addPartCallout(iv_nvdimm, + HWAS::BPM_PART_TYPE, + HWAS::SRCI_PRIORITY_HIGH); + errl->collectTrace(BPM_COMP_NAME); + nvdimmAddPage4Regs(iv_nvdimm,errl); + nvdimmAddVendorLog(iv_nvdimm, errl); + break; + } + } + + } while(0); + + return errl; +} + +errlHndl_t Bpm::runUpdate(BpmFirmwareLidImage i_fwImage, + BpmConfigLidImage i_configImage) +{ + TRACFCOMP(g_trac_bpm, ENTER_MRK"Bpm::runUpdate(): " + "Running BPM Update for NVDIMM 0x%.8X", + TARGETING::get_huid(iv_nvdimm)); + + errlHndl_t errl = nullptr; + // Assume an update is necessary for the BPM and determine if it isn't. + bool shouldPerformUpdate = true; + + // Get the sys target to check for attribute overrides. + TARGETING::Target* sys = nullptr; + TARGETING::targetService().getTopLevelTarget(sys); + + auto updateOverride = + sys->getAttr<TARGETING::ATTR_BPM_UPDATE_OVERRIDE>(); + uint16_t firmwareOverrideFlag = (updateOverride & 0xFF00); + uint16_t configOverrideFlag = (updateOverride & 0x00FF); + + do { + + // First check if there is a BPM connected + errl = verifyGoodBpmState(); + if (errl != nullptr) + { + // Either there isn't a BPM connected to this NVDIMM or it's not + // functional. Don't bother with updates. + shouldPerformUpdate = false; + iv_attemptAnotherUpdate = false; + break; + } + + // Check the version on the BPM against the version in the image. + uint16_t bpmFwVersion = INVALID_VERSION; + errl = getFwVersion(bpmFwVersion); + if (errl != nullptr) + { + TRACFCOMP(g_trac_bpm, ERR_MRK"Bpm::runUpdate(): " + "Could not determine firmware version on BPM " + "Skipping update."); + break; + } + + TRACFCOMP(g_trac_bpm, INFO_MRK"Bpm::runUpdate(): " + "Firmware version on the BPM 0x%.4X, " + "Firmware version of image 0x%.4X.", + bpmFwVersion, i_fwImage.getVersion()); + + if (i_fwImage.getVersion() == bpmFwVersion) + { + shouldPerformUpdate = false; + if (updateOverride == TARGETING::BPM_UPDATE_BEHAVIOR_DEFAULT_ALL) + { + TRACFCOMP(g_trac_bpm, INFO_MRK"Bpm::runUpdate(): " + "Firmware version on the BPM matches the version in " + "the image. Skipping update."); + break; + } + } + + if (updateOverride == TARGETING::BPM_UPDATE_BEHAVIOR_SKIP_ALL) + { + TRACFCOMP(g_trac_bpm, INFO_MRK"Bpm::runUpdate(): " + "ATTR_BPM_UPDATE_OVERRIDE set to SKIP_ALL. " + "Skipping update."); + break; + } + + // Depending on the BSL version a CRC check may be necessary + errl = readBslVersion(); + if (errl != nullptr) + { + break; + } + + // If the BSL version read from the BPM isn't a supported version then + // don't perform the updates as the update flow may have changed between + // BSL versions. + if (iv_bslVersion != BSL_VERSION_1_4) + { + TRACFCOMP(g_trac_bpm, "Bpm::runUpdate(): " + "Unsupported BSL Version 0x%.2X detected on BPM. " + "Cancelling Update."); + + break; + } + + if ((shouldPerformUpdate + || (firmwareOverrideFlag == TARGETING::BPM_UPDATE_BEHAVIOR_FORCE_FW)) + && !(firmwareOverrideFlag == TARGETING::BPM_UPDATE_BEHAVIOR_SKIP_FW)) + { + if (firmwareOverrideFlag == TARGETING::BPM_UPDATE_BEHAVIOR_FORCE_FW) + { + TRACFCOMP(g_trac_bpm, INFO_MRK"Bpm::runUpdate(): " + "ATTR_BPM_UPDATE_OVERRIDE set to force firmware " + "portion of BPM updates. Running Firmware Update..."); + } + + errl = runFirmwareUpdates(i_fwImage); + if (errl != nullptr) + { + break; + } + } + else + { + if (firmwareOverrideFlag == TARGETING::BPM_UPDATE_BEHAVIOR_SKIP_FW) + { + TRACFCOMP(g_trac_bpm, INFO_MRK"Bpm::runUpdate(): " + "ATTR_BPM_UPDATE_OVERRIDE set to skip firmware " + "portion of BPM updates. Skipping Firmware Update..."); + } + else + { + TRACFCOMP(g_trac_bpm, INFO_MRK"Bpm::runUpdate(): " + "Firmware Data on BPM already up-to-date. " + "Skipping Firmware Update..."); + } + } + + if ((shouldPerformUpdate + || (configOverrideFlag == TARGETING::BPM_UPDATE_BEHAVIOR_FORCE_CONFIG)) + && !(configOverrideFlag == TARGETING::BPM_UPDATE_BEHAVIOR_SKIP_CONFIG)) + { + if (configOverrideFlag == TARGETING::BPM_UPDATE_BEHAVIOR_FORCE_CONFIG) + { + TRACFCOMP(g_trac_bpm, INFO_MRK"Bpm::runUpdate(): " + "ATTR_BPM_UPDATE_OVERRIDE set to force config " + "portion of BPM updates. Running Config Update..."); + } + errl = runConfigUpdates(i_configImage); + if (errl != nullptr) + { + break; + } + } + else + { + if (configOverrideFlag == TARGETING::BPM_UPDATE_BEHAVIOR_SKIP_CONFIG) + { + TRACFCOMP(g_trac_bpm, INFO_MRK"Bpm::runUpdate(): " + "ATTR_BPM_UPDATE_OVERRIDE set to skip config " + "portion of BPM updates. Skipping Config Update..."); + } + else + { + TRACFCOMP(g_trac_bpm, INFO_MRK"Bpm::runUpdate(): " + "Configuration Data on BPM already up-to-date. " + "Skipping Config Update..."); + } + } + + } while(0); + + if ((shouldPerformUpdate + || (configOverrideFlag == TARGETING::BPM_UPDATE_BEHAVIOR_FORCE_CONFIG) + || (firmwareOverrideFlag == TARGETING::BPM_UPDATE_BEHAVIOR_FORCE_FW)) + && (updateOverride != TARGETING::BPM_UPDATE_BEHAVIOR_SKIP_ALL)) + { + // Reset controller and unlock encryption if necessary + errlHndl_t exitErrl = nvdimmResetController(iv_nvdimm); + if (exitErrl != nullptr) + { + TRACFCOMP(g_trac_bpm, ERR_MRK"Bpm::runUpdate() " + "Couldn't reset NVDIMM controller."); + handleMultipleErrors(errl, exitErrl); + } + + // If the update was successful then we must wait for 15 seconds before + // polling the status of the BPM since it has to finish updating its + // firmware and resetting. + TRACFCOMP(g_trac_bpm, "Bpm::runUpdate(): " + "Wait for the BPM to finish update and reset procedure, " + "sleep for 15 seconds"); + longSleep(15); + + // Poll SCAP_STATUS register for BPM state before we check final + // firmware version. + exitErrl = verifyGoodBpmState(); + if (exitErrl != nullptr) + { + TRACFCOMP(g_trac_bpm, ERR_MRK"Bpm::runUpdate(): " + "Could not verify that BPM was present and enabled!"); + handleMultipleErrors(errl, exitErrl); + } + + uint16_t bpmFwVersion = INVALID_VERSION; + exitErrl = getFwVersion(bpmFwVersion); + if (exitErrl != nullptr) + { + TRACFCOMP(g_trac_bpm, ERR_MRK"Bpm::runUpdate(): " + "Could not determine firmware version on the BPM"); + handleMultipleErrors(errl, exitErrl); + } + + TRACFCOMP(g_trac_bpm, INFO_MRK"Bpm::runUpdate(): " + "Firmware version on the BPM 0x%.4X, " + "Firmware version of image 0x%.4X.", + bpmFwVersion, i_fwImage.getVersion()); + + if (i_fwImage.getVersion() == bpmFwVersion) + { + TRACFCOMP(g_trac_bpm, INFO_MRK"Bpm::runUpdate(): " + "Firmware version on the BPM matches the version in the " + "image. Firmware Update Successful."); + iv_attemptAnotherUpdate = false; + } + else + { + // Attempt another update if one hasn't already been attempted. + setAttemptAnotherUpdate(); + TRACFCOMP(g_trac_bpm, ERR_MRK"Bpm::runUpdate(): " + "Version on BPM didn't match image. %s ", + iv_attemptAnotherUpdate ? + "Attempt another update..." + : "Attempts to update the BPM have failed."); + if (iv_attemptAnotherUpdate == false) + { + /*@ + * @errortype + * @severity ERRORLOG::ERRL_SEV_UNRECOVERABLE + * @moduleid BPM_RC::BPM_RUN_FW_UPDATES + * @reasoncode BPM_RC::BPM_VERSION_MISMATCH + * @userdata1[00:31] Version on the BPM + * @userdata1[32:63] Version of the flash image + * @userdata2 NVDIMM Target HUID associated with this BPM + * @devdesc The version on the BPM didn't match the + * version in the flash image. + * @custdesc A problem occurred during IPL of the system. + */ + exitErrl = new ERRORLOG::ErrlEntry( + ERRORLOG::ERRL_SEV_UNRECOVERABLE, + BPM_RC::BPM_RUN_FW_UPDATES, + BPM_RC::BPM_VERSION_MISMATCH, + TWO_UINT32_TO_UINT64(bpmFwVersion, + i_fwImage.getVersion()), + TARGETING::get_huid(iv_nvdimm)); + exitErrl->collectTrace(BPM_COMP_NAME); + handleMultipleErrors(errl, exitErrl); + } + } + + TRACFCOMP(g_trac_bpm, EXIT_MRK"Bpm::runUpdate(): " + "Concluding BPM Update for NVDIMM 0x%.8X %s", + TARGETING::get_huid(iv_nvdimm), + (errl != nullptr) ? "with errors" : "without errors"); + } + + // An update has been attempted at least once. Set member variable to true + // to dictate future update attempts. This variable should only be set at + // the end of the update procedure in order to properly control future + // update attempts. + iv_updateAttempted = true; + + if (errl == nullptr) + { + /*@ + * @errortype + * @severity ERRORLOG::ERRL_SEV_INFORMATIONAL + * @moduleid BPM_RC::BPM_RUN_UPDATE + * @reasoncode BPM_RC::BPM_UPDATE_SUCCESSFUL + * @userdata1 NVDIMM Target HUID associated with this BPM + * @devdesc BPM Update finished without errors. + * @custdesc Informational log associated with DIMM updates. + */ + errlHndl_t infoErrl = new ERRORLOG::ErrlEntry( + ERRORLOG::ERRL_SEV_INFORMATIONAL, + BPM_RC::BPM_RUN_UPDATE, + BPM_RC::BPM_UPDATE_SUCCESSFUL, + TARGETING::get_huid(iv_nvdimm)); + infoErrl->collectTrace(BPM_COMP_NAME); + ERRORLOG::errlCommit(infoErrl, BPM_COMP_ID); + } + + return errl; +} + +errlHndl_t Bpm::inUpdateMode() +{ + TRACFCOMP(g_trac_bpm, ENTER_MRK"Bpm::inUpdateMode()"); + errlHndl_t errl = nullptr; + + do { + + errl = issueCommand(BPM_LOCAL, + BCL_IS_UPDATE_IN_PROGRESS, + READ, + NO_DELAY_NO_RESPONSE); + if (errl != nullptr) + { + break; + } + + uint8_t isUpdateInProgress = 0; + errl = nvdimmReadReg(iv_nvdimm, + BPM_REG_ERR_STATUS, + isUpdateInProgress); + if (errl != nullptr) + { + TRACFCOMP(g_trac_bpm, "Bpm::inUpdateMode(): " + "Failed to read error status register"); + errl->collectTrace(BPM_COMP_NAME); + nvdimmAddVendorLog(iv_nvdimm, errl); + break; + } + + if (!isUpdateInProgress) + { + TRACFCOMP(g_trac_bpm, ERR_MRK"Bpm::inUpdateMode(): " + "Failed to enter update mode"); + /*@ + * @errortype + * @severity ERRORLOG::ERRL_SEV_PREDICTIVE + * @moduleid BPM_RC::BPM_IN_UPDATE_MODE + * @reasoncode BPM_RC::BPM_UPDATE_MODE_VERIFICATION_FAIL + * @userdata1 NVDIMM Target HUID associated with this BPM + * @devdesc Failed to verify update mode was entered using + * the BSL interface. + * @custdesc A problem occurred during IPL of the system. + */ + errl = new ERRORLOG::ErrlEntry(ERRORLOG::ERRL_SEV_PREDICTIVE, + BPM_RC::BPM_IN_UPDATE_MODE, + BPM_RC::BPM_UPDATE_MODE_VERIFICATION_FAIL, + TARGETING::get_huid(iv_nvdimm)); + errl->addPartCallout(iv_nvdimm, + HWAS::BPM_PART_TYPE, + HWAS::SRCI_PRIORITY_HIGH); + errl->collectTrace(BPM_COMP_NAME); + nvdimmAddPage4Regs(iv_nvdimm,errl); + nvdimmAddVendorLog(iv_nvdimm, errl); + break; + } + + } while(0); + + return errl; +} + +errlHndl_t Bpm::enterUpdateMode() +{ + TRACFCOMP(g_trac_bpm, ENTER_MRK"Bpm::enterUpdateMode()"); + errlHndl_t errl = nullptr; + + do { + + // Disable write protection on the BPM. Otherwise, we can't write the + // magic values that enable the nvdimm-bpm interface. + errl = disableWriteProtection(); + if (errl != nullptr) + { + break; + } + + // Write the magic values to enable nvdimm-bpm interface + errl = writeToMagicRegisters(UPDATE_MODE_MAGIC_VALUES); + if (errl != nullptr) + { + TRACFCOMP(g_trac_bpm, "Bpm::enterUpdateMode(): " + "Failed to write magic numbers that enable " + "update mode"); + break; + } + + TRACFCOMP(g_trac_bpm, "Bpm::enterUpdateMode(): " + "Issuing BPM_LOCAL BCL_START_UPDATE command."); + + errl = issueCommand(BPM_LOCAL, + BCL_START_UPDATE, + WRITE, + NO_DELAY_NO_RESPONSE); + if (errl != nullptr) + { + break; + } + + nanosleep(2,0); + + /*@ + * @errortype + * @severity ERRORLOG::ERRL_SEV_INFORMATIONAL + * @moduleid BPM_RC::BPM_START_UPDATE + * @reasoncode BPM_RC::BPM_ENTER_UPDATE_MODE + * @userdata1 NVDIMM Target HUID associated with this BPM + * @devdesc BPM has entered update mode. + * @custdesc Informational log associated with DIMM updates. + */ + errlHndl_t infoErrl = new ERRORLOG::ErrlEntry( + ERRORLOG::ERRL_SEV_INFORMATIONAL, + BPM_RC::BPM_START_UPDATE, + BPM_RC::BPM_ENTER_UPDATE_MODE, + TARGETING::get_huid(iv_nvdimm)); + infoErrl->addPartCallout(iv_nvdimm, + HWAS::BPM_PART_TYPE, + HWAS::SRCI_PRIORITY_HIGH); + infoErrl->collectTrace(BPM_COMP_NAME); + nvdimmAddVendorLog(iv_nvdimm, infoErrl); + nvdimmAddPage4Regs(iv_nvdimm, infoErrl); + ERRORLOG::errlCommit(infoErrl, BPM_COMP_ID); + + } while(0); + + return errl; +} + +errlHndl_t Bpm::exitUpdateMode() +{ + TRACFCOMP(g_trac_bpm, ENTER_MRK"Bpm::exitUpdateMode()"); + errlHndl_t errl = nullptr; + + do { + + errl = writeToMagicRegisters(UPDATE_MODE_MAGIC_VALUES); + if (errl != nullptr) + { + TRACFCOMP(g_trac_bpm, "Bpm::exitUpdateMode(): " + "Failed to write the update magic values to " + " be able to send BPM_LOCAL commands."); + break; + } + + errl = issueCommand(BPM_LOCAL, + BCL_IS_UPDATE_IN_PROGRESS, + READ, + NO_DELAY_NO_RESPONSE); + if (errl != nullptr) + { + break; + } + + uint8_t isUpdateInProgress = 0; + errl = nvdimmReadReg(iv_nvdimm, + BPM_REG_ERR_STATUS, + isUpdateInProgress); + if (errl != nullptr) + { + TRACFCOMP(g_trac_bpm, "Bpm::exitUpdateMode(): " + "Failed to read BPM_REG_ERR_STATUS register to determine " + "if BPM is in update mode."); + errl->collectTrace(BPM_COMP_NAME); + break; + } + + // Sending the exit update command when the BPM isn't in update mode can + // cause unpredicatable behavior and errors. + if (isUpdateInProgress) + { + errl = issueCommand(BPM_LOCAL, + BCL_END_UPDATE, + WRITE, + NO_DELAY_NO_RESPONSE); + if (errl != nullptr) + { + break; + } + } + else + { + TRACFCOMP(g_trac_bpm, "Bpm::exitUpdateMode(): " + "Not in update mode. " + "Exit update command will not be sent."); + } + + // Write back the production magic values + errl = writeToMagicRegisters(PRODUCTION_MAGIC_VALUES); + if (errl != nullptr) + { + TRACFCOMP(g_trac_bpm, "Bpm::exitUpdateMode(): " + "Failed to write the production magic values to " + "disable update mode."); + break; + } + + /*@ + * @errortype + * @severity ERRORLOG::ERRL_SEV_INFORMATIONAL + * @moduleid BPM_RC::BPM_END_UPDATE + * @reasoncode BPM_RC::BPM_EXIT_UPDATE_MODE + * @userdata1 NVDIMM Target HUID associated with this BPM + * @devdesc BPM has exited update mode. + * @custdesc Informational log associated with DIMM updates. + */ + errlHndl_t infoErrl = new ERRORLOG::ErrlEntry( + ERRORLOG::ERRL_SEV_INFORMATIONAL, + BPM_RC::BPM_END_UPDATE, + BPM_RC::BPM_EXIT_UPDATE_MODE, + TARGETING::get_huid(iv_nvdimm)); + infoErrl->addPartCallout(iv_nvdimm, + HWAS::BPM_PART_TYPE, + HWAS::SRCI_PRIORITY_HIGH); + infoErrl->collectTrace(BPM_COMP_NAME); + nvdimmAddVendorLog(iv_nvdimm, infoErrl); + nvdimmAddPage4Regs(iv_nvdimm, infoErrl); + ERRORLOG::errlCommit(infoErrl, BPM_COMP_ID); + + } while(0); + + return errl; +} + +errlHndl_t Bpm::updateFirmware(BpmFirmwareLidImage i_image) +{ + TRACFCOMP(g_trac_bpm, ENTER_MRK"Bpm::updateFirmware()"); + errlHndl_t errl = nullptr; + + // The reset vector address is near the end of the firmware section. + // We must do a special operation on it when it shows up during the update. + const uint16_t RESET_VECTOR_ADDRESS = 0xFFFE; + + bool mainAddressEncountered = false; + + // Get the number of blocks in the image + const uint16_t NUMBER_OF_BLOCKS = i_image.getNumberOfBlocks(); + + char const * data = + reinterpret_cast<char const *>(i_image.getFirstBlock()); + + firmware_image_block_t const * block = + reinterpret_cast<firmware_image_block_t const *> + (data); + + for(size_t i = 0; i < NUMBER_OF_BLOCKS; ++i) + { + // This is done once at the main program address. + if ( ((block->iv_addressOffset == MAIN_PROGRAM_ADDRESS) + || (block->iv_addressOffset == MAIN_PROGRAM_ADDRESS_ALT)) + && !mainAddressEncountered) + { + // Only execute this once. + mainAddressEncountered = true; + + // Save the firmware start address for later. This will be needed + // for the final CRC check when the update is completed. + iv_firmwareStartAddress = block->iv_addressOffset; + + payload_t payload; + errl = setupPayload(payload, + BSL_MASS_ERASE, + iv_firmwareStartAddress); + if (errl != nullptr) + { + break; + } + + errl = issueCommand(BPM_PASSTHROUGH, + payload, + WRITE, + ERASE_FIRMWARE_DELAY); + if (errl != nullptr) + { + break; + } + + TRACFCOMP(g_trac_bpm, "Bpm::updateFirmware(): " + "Performing BSL_MASS_ERASE on BPM, sleep for 5 seconds."); + longSleep(5); + + TRACFCOMP(g_trac_bpm, "Bpm::updateFirmware(): " + "Begin writing flash image to BPM " + "with a starting address of 0x%.4X", + iv_firmwareStartAddress); + + } + + if (block->iv_addressOffset % 0x400 == 0) + { + TRACFCOMP(g_trac_bpm, "Bpm::updateFirmware(): " + "Writing to address offset 0x%.4X. " + "Firmware blocks written: %d; Remaining: %d", + block->iv_addressOffset, + i, (NUMBER_OF_BLOCKS - i)); + } + + // Construct the payload for this block in the image + payload_t payload; + errl = setupPayload(payload, block, BSL_RX_DATA_BLOCK); + if (errl != nullptr) + { + break; + } + + if (block->iv_addressOffset == RESET_VECTOR_ADDRESS) + { + TRACFCOMP(g_trac_bpm, "Bpm::updateFirmware(): " + "Encountered RESET_VECTOR_ADDRESS 0x%.4X. " + "Attempt to write RESET_VECTOR to BPM up to %d times.", + RESET_VECTOR_ADDRESS, + MAX_RETRY); + // Attempting to BSL_VERIFY_BLOCK on the reset vector data will + // fail. To verify that this data is written correctly we will check + // the response packet sent by the BPM. + const uint8_t RESET_VECTOR_RECEIVE_SUCCESS = 0x80; + uint8_t retry = 1; + do + { + // Issue the write command to the BPM. + // The RESET_VECTOR is special in that its response is checked + // externally. + errl = issueCommand(BPM_PASSTHROUGH, + payload, + WRITE, + NO_DELAY_EXTERNAL_RESPONSE); + if (errl != nullptr) + { + break; + } + + // Get the response packet and verify that the status is + // RESET_VECTOR_RECEIVE_SUCCESS. + // + // Any status besides RESET_VECTOR_RECEIVE_SUCCESS is considered + // a fail. So, assume a failure and check. + uint8_t status = 0xFF; + errl = getResponse(&status, + sizeof(uint8_t)); + if (errl != nullptr) + { + break; + } + + if (status != RESET_VECTOR_RECEIVE_SUCCESS) + { + TRACFCOMP(g_trac_bpm, "Bpm::updateFirmware(): " + "status %d from BPM was not " + "RESET_VECTOR_RECEIVE_SUCCESS value of %d. " + "Retrying...", + status, + RESET_VECTOR_RECEIVE_SUCCESS); + + if (++retry > MAX_RETRY) + { + TRACFCOMP(g_trac_bpm, "Bpm::updateFirmware(): " + "Never received RESET_VECTOR_RECEIVE_SUCCESS " + "status from BPM in three attempts. " + "Aborting Update"); + /*@ + * @errortype + * @severity ERRORLOG::ERRL_SEV_PREDICTIVE + * @moduleid BPM_RC::BPM_UPDATE_FIRMWARE + * @reasoncode BPM_RC::BPM_RESET_VECTOR_NEVER_RECEIVED + * @userdata1 NVDIMM Target HUID associated with this BPM + * @devdesc RESET_VECTOR_RECEIVE_SUCCESS status was not + * received in three attempts. + * @custdesc A problem occurred during IPL of the system. + */ + errl = new ERRORLOG::ErrlEntry( + ERRORLOG::ERRL_SEV_PREDICTIVE, + BPM_RC::BPM_UPDATE_FIRMWARE, + BPM_RC::BPM_RESET_VECTOR_NEVER_RECEIVED, + TARGETING::get_huid(iv_nvdimm)); + errl->addPartCallout(iv_nvdimm, + HWAS::BPM_PART_TYPE, + HWAS::SRCI_PRIORITY_HIGH); + errl->collectTrace(BPM_COMP_NAME); + nvdimmAddPage4Regs(iv_nvdimm,errl); + nvdimmAddVendorLog(iv_nvdimm, errl); + + // Change the state of iv_attemptAnotherUpdate to signal + // if another update attempt should occur. + setAttemptAnotherUpdate(); + + break; + } + } + else + { + // RESET_VECTOR was written and received successfully. + // Exit retry loop. + break; + } + + // Sleep for 0.001 second before attempting again. + nanosleep(0, 1 * NS_PER_MSEC); + + } while(retry <= MAX_RETRY); + if (errl != nullptr) + { + break; + } + } + else + { + // Attempt to write the data using a retry loop. This will also + // verify that the data was correctly written to the BPM. + errl = blockWrite(payload); + if (errl != nullptr) + { + break; + } + } + + // Move to the next block + // iv_blocksize doesn't include the sizeof itself. So, add another byte + // for it. + data += block->iv_blockSize + sizeof(uint8_t); + block = reinterpret_cast<firmware_image_block_t const *>(data); + } + + TRACFCOMP(g_trac_bpm, EXIT_MRK"Bpm::updateFirmware(): " + "Firmware flash image write and verification completed " + "%s", + (errl == nullptr) ? "without errors" : "with errors"); + + return errl; +} + +errlHndl_t Bpm::updateConfig() +{ + TRACFCOMP(g_trac_bpm, ENTER_MRK"Bpm::updateConfig()"); + errlHndl_t errl = nullptr; + + do { + + // Erase Segment D on the BPM via the BSL interface. + errl = eraseSegment(SEGMENT_D_CODE); + if (errl != nullptr) + { + TRACFCOMP(g_trac_bpm, ERR_MRK"Bpm::updateConfig(): " + "Failed to erase Segment D."); + break; + } + + // Write the updated Segment D buffer to the BPM via the BSL interface. + errl = writeSegment(iv_segmentD, SEGMENT_D_CODE); + if (errl != nullptr) + { + TRACFCOMP(g_trac_bpm, ERR_MRK"Bpm::updateConfig(): " + "Failed to write Segment D."); + break; + } + + + // Erase Segment B on the BPM via the BSL interface. + errl = eraseSegment(SEGMENT_B_CODE); + if (errl != nullptr) + { + TRACFCOMP(g_trac_bpm, ERR_MRK"Bpm::updateConfig(): " + "Failed to erase Segment B."); + break; + } + + // Write the updated Segment B buffer to the BPM via the BSL interface. + errl = writeSegment(iv_segmentB, SEGMENT_B_CODE); + if (errl != nullptr) + { + TRACFCOMP(g_trac_bpm, ERR_MRK"Bpm::updateConfig(): " + "Failed to write Segment B."); + break; + } + + } while(0); + + return errl; +} + +errlHndl_t Bpm::enterBootstrapLoaderMode() +{ + TRACFCOMP(g_trac_bpm, ENTER_MRK"Bpm::enterBootstrapLoaderMode()"); + errlHndl_t errl = nullptr; + + do { + + // Entering BSL mode depends on the state of the BPM and it may need + // several retries in order to successfully enter BSL mode. + int retry = 5; + bool inBslMode = false; + + while (retry != 0) + { + + errl = issueCommand(BPM_LOCAL, + BCL_IS_BSL_MODE, + WRITE, + NO_DELAY_NO_RESPONSE); + if (errl != nullptr) + { + break; + } + + uint8_t data = 0; + errl = nvdimmReadReg(iv_nvdimm, + BPM_REG_ERR_STATUS, + data); + if (errl != nullptr) + { + TRACFCOMP(g_trac_bpm, "Bpm::enterBootstrapLoaderMode(): " + "Failed to read BPM_REG_ERR_STATUS to verify that " + "BSL mode was enabled."); + errl->collectTrace(BPM_COMP_NAME); + break; + } + // data will be 1 if the BPM successfully entered BSL mode. + if (data == 1) + { + inBslMode = true; + TRACFCOMP(g_trac_bpm, "Bpm::enterBootstrapLoaderMode(): " + "BSL Mode entered, sleep for 5 seconds."); + longSleep(5); + break; + } + + // Sleep for 0.001 second. + nanosleep(0, 1 * NS_PER_MSEC); + + errl = issueCommand(BPM_LOCAL, + BCL_ENTER_BSL_MODE, + WRITE, + NO_DELAY_NO_RESPONSE); + if (errl != nullptr) + { + break; + } + + TRACUCOMP(g_trac_bpm, "Bpm::enterBootstrapLoaderMode(): " + "Unable to enter BSL Mode, retries remaining %d. " + "Sleep for 2 seconds before trying again.", + (retry - 1)); + nanosleep(2,0); + --retry; + + } + if (errl != nullptr) + { + break; + } + + if (!inBslMode) + { + TRACFCOMP(g_trac_bpm, ERR_MRK"Bpm::enterBootstrapLoaderMode(): " + "Failed to enter BSL mode on the BPM"); + /*@ + * @errortype + * @severity ERRORLOG::ERRL_SEV_PREDICTIVE + * @moduleid BPM_RC::BPM_ENTER_BSL_MODE + * @reasoncode BPM_RC::BPM_FAILED_TO_ENTER_BSL_MODE + * @userdata1[0:63] NVDIMM Target HUID associated with this BPM + * @devdesc Failed to enter BSL mode after several attempts. + * @custdesc A problem occurred during IPL of the system. + */ + errl = new ERRORLOG::ErrlEntry(ERRORLOG::ERRL_SEV_PREDICTIVE, + BPM_RC::BPM_ENTER_BSL_MODE, + BPM_RC::BPM_FAILED_TO_ENTER_BSL_MODE, + TARGETING::get_huid(iv_nvdimm)); + errl->addPartCallout(iv_nvdimm, + HWAS::BPM_PART_TYPE, + HWAS::SRCI_PRIORITY_HIGH); + nvdimmAddVendorLog(iv_nvdimm, errl); + errl->collectTrace(BPM_COMP_NAME); + nvdimmAddPage4Regs(iv_nvdimm,errl); + break; + } + + } while(0); + + return errl; +} + +errlHndl_t Bpm::setupPayload(payload_t & o_payload, + const uint8_t i_command, + const uint16_t i_address, + const uint8_t i_data[], + const size_t i_length) +{ + // Enforce sane inputs + assert(( (i_data == nullptr && i_length == 0) + || (i_data != nullptr && i_length != 0)), + "if i_length is non-zero then i_data must not be nullptr, otherwise i_data must be nullptr."); + assert(isBslCommand(i_command), + "i_command must be a valid BSL command"); + + errlHndl_t errl = nullptr; + + // Calculate the block size. + size_t blockSize = sizeof(uint16_t) + i_length; + + // Allocate memory for the block + firmware_image_block_t* myBlock = reinterpret_cast<firmware_image_block_t*>( + malloc(sizeof(firmware_image_block_t) + i_length)); + + // Setup the block "header" info + myBlock->iv_blockSize = blockSize; + myBlock->iv_addressOffset = i_address; + + // Copy the data if any exists. + if (i_data != nullptr) + { + memcpy(&myBlock->iv_data, i_data, i_length); + } + + // Setup the return payload + errl = setupPayload(o_payload, myBlock, i_command); + + // Block is no longer needed. + free(myBlock); + + return errl; +} + +errlHndl_t Bpm::setupPayload(payload_t & o_payload, + const firmware_image_block_t * i_block, + const uint8_t i_command) +{ + assert(i_block != nullptr, "i_block must not be nullptr."); + assert(isBslCommand(i_command), + "i_command must be a valid BSL command"); + + errlHndl_t errl = nullptr; + + // The data size in the block is the total block size + // minus the 2 bytes for the address offset. + const uint8_t blockDataSize = i_block->iv_blockSize - sizeof(uint16_t); + + // The header plus payload data section size. This excludes the address + // offset, extra bytes, and CRC bytes. + const uint8_t headerDataSize = PAYLOAD_HEADER_SIZE + blockDataSize; + + do { + + if (blockDataSize > MAX_PAYLOAD_DATA_SIZE) + { + TRACFCOMP(g_trac_bpm, ERR_MRK + "Bpm::setupPayload(): Block Data Size %d exceeds max payload " + "size of %d", + blockDataSize, + MAX_PAYLOAD_DATA_SIZE); + /*@ + * @errortype + * @severity ERRORLOG::ERRL_SEV_PREDICTIVE + * @moduleid BPM_RC::BPM_SETUP_PAYLOAD + * @reasoncode BPM_RC::BPM_INVALID_PAYLOAD_DATA_SIZE + * @userdata1[0:7] Block Data Size + * @userdata1[8:15] MAX_PAYLOAD_DATA_SIZE + * @userdata2[0:63] NVDIMM Target HUID associated with this BPM + * @devdesc Failed to enter BSL mode after several attempts. + * @custdesc A problem occurred during IPL of the system. + */ + errl = new ERRORLOG::ErrlEntry(ERRORLOG::ERRL_SEV_PREDICTIVE, + BPM_RC::BPM_SETUP_PAYLOAD, + BPM_RC::BPM_INVALID_PAYLOAD_DATA_SIZE, + TWO_UINT8_TO_UINT16(blockDataSize, + MAX_PAYLOAD_DATA_SIZE), + TARGETING::get_huid(iv_nvdimm)); + errl->collectTrace(BPM_COMP_NAME); + nvdimmAddPage4Regs(iv_nvdimm,errl); + nvdimmAddVendorLog(iv_nvdimm, errl); + break; + } + + // Create the payload with the exact size needed. + payload_t payload(MAX_PAYLOAD_OTHER_DATA_SIZE + blockDataSize); + + // Instead of using push_back, use a pointer to an element in the vector. + // Since the size of the vector is declared and intialized to zero ahead of + // time push_back will not work. Also, some of the data is larger than + // uint8_t and so it's easier to just use memcpy for insertion. + // NOTE: Because push_back isn't being used the size() of the vector doesn't + // change along with the data being added to the vector. This was + // corrected by explicitly setting the payload size in the constructor + // call above. + uint8_t * payloadIterator = payload.data(); + + // According to SMART, we must supply the header + data size twice. + uint8_t header[PAYLOAD_HEADER_SIZE] = { SYNC_BYTE, + i_command, + headerDataSize, + headerDataSize }; + + memcpy(payloadIterator, &header, PAYLOAD_HEADER_SIZE); + + // Move past the header + payloadIterator += PAYLOAD_HEADER_SIZE; + + // Write the address offset in little endian form. + uint16_t addressLE = htole16(i_block->iv_addressOffset); + uint8_t* addressOffset = reinterpret_cast<uint8_t*>(&addressLE); + memcpy(payloadIterator, addressOffset, sizeof(uint16_t)); + + // Move past the address + payloadIterator += sizeof(uint16_t); + + // The extra bytes vary based on the given command. + // These are the extra bytes for their corresponding bootstrap loader + // commands. They are arranged in little endian form so that no byte + // swapping is required. + const uint8_t BSL_ERASE_SEGMENT_EXTRA_BYTES[] = {0x02, 0xA5}; + const uint8_t BSL_MASS_ERASE_EXTRA_BYTES[] = {0x06, 0xA5}; + switch(i_command) + { + case BSL_ERASE_SEGMENT: + { + memcpy(payloadIterator, + &BSL_ERASE_SEGMENT_EXTRA_BYTES, + sizeof(uint16_t)); + + break; + } + case BSL_MASS_ERASE: + { + memcpy(payloadIterator, + &BSL_MASS_ERASE_EXTRA_BYTES, + sizeof(uint16_t)); + break; + } + default: + { + // Give the size of the data section as a uint16_t in little + // endian form. + uint8_t dataLength[] = {blockDataSize, 0x0}; + memcpy(payloadIterator, &dataLength, sizeof(uint16_t)); + break; + } + } + + // Move past the payload's extra bytes. + payloadIterator += sizeof(uint16_t); + + if (blockDataSize > 0) + { + // Copy the payload data from the LID image block to the payload's data + // section. + memcpy(payloadIterator, &i_block->iv_data, blockDataSize); + + // Move past the payload's data section. + payloadIterator += blockDataSize; + } + + // Calculate the CRC bytes + // Pass in the size of the payload excluding the two reserved bytes + // for the CRC. + uint16_t crc = htole16(crc16_calc(payload.data(), payload.size()-2)); + + // Write the CRC bytes + uint8_t* crcBytes = reinterpret_cast<uint8_t*>(&crc); + memcpy(payloadIterator, crcBytes, sizeof(uint16_t)); + + // The sync byte is automatically sent by the NVDIMM to the BPM so + // including it in the payload isn't necessary. It is only needed to + // calculate the CRC bytes. + payload.erase(payload.begin()); + // Force the returned payload to have the exact capacity and size of the + // payload. + o_payload.swap(payload); + + } while(0); + + return errl; +} + +errlHndl_t Bpm::unlockDevice() +{ + TRACFCOMP(g_trac_bpm, ENTER_MRK"Bpm::unlockDevice()"); + errlHndl_t errl = nullptr; + + do { + + // This is a BSL command, so it must be formatted into a payload. + payload_t payload; + + // This command must send the password in order to unlock the device. + errl = setupPayload(payload, + BSL_RX_PASSWORD, + BPM_ADDRESS_ZERO, + BPM_PASSWORD, + BPM_PASSWORD_LENGTH); + if (errl != nullptr) + { + break; + } + + errl = issueCommand(BPM_PASSTHROUGH, payload, WRITE); + if (errl != nullptr) + { + break; + } + + } while(0); + + return errl; +} + +errlHndl_t Bpm::resetDevice() +{ + TRACFCOMP(g_trac_bpm, ENTER_MRK"Bpm::resetDevice()"); + errlHndl_t errl = nullptr; + + do { + + // Verify we are in BSL mode by checking SCAP_STATUS because if we aren't + // then we don't need to do anything. + scap_status_register_t status; + errl = nvdimmReadReg(iv_nvdimm, + SCAP_STATUS, + status.full); + if (errl != nullptr) + { + errl->collectTrace(BPM_COMP_NAME); + break; + } + + if (status.bit.Bpm_Bsl_Mode) + { + // This is a BSL command, so it must be formatted into a payload. + payload_t payload; + errl = setupPayload(payload, BSL_RESET_DEVICE, BPM_ADDRESS_ZERO); + if (errl != nullptr) + { + break; + } + + // Despite this being a BSL command we cannot check the response + // because the BPM will either be offline and cannot respond or + // the command will have completed and we won't be in BSL mode + // anymore and therefor shouldn't check the response. + errl = issueCommand(BPM_PASSTHROUGH, + payload, + WRITE, + NO_DELAY_NO_RESPONSE); + if (errl != nullptr) + { + break; + } + + // If we wait less than 15 seconds for the reset to occur it is + // possible that BPM won't be ready for more commands via the NVDIMM + TRACFCOMP(g_trac_bpm, "Bpm::resetDevice(): " + "Resetting BPM for NVDIMM 0x%.8X, sleep for 15 seconds.", + TARGETING::get_huid(iv_nvdimm)); + longSleep(15); + } + else + { + TRACFCOMP(g_trac_bpm, "Bpm::resetDevice(): " + "Not in BSL Mode. Don't send the reset command."); + break; + } + + } while(0); + + return errl; +} + +errlHndl_t Bpm::readViaScapRegister(uint8_t const i_reg, uint8_t & io_data) +{ + TRACUCOMP(g_trac_bpm, ENTER_MRK"Bpm::readViaScapRegister()"); + errlHndl_t errl = nullptr; + + do { + + // Wait for the SCAP_STATUS Busy bit to be zero. + errl = waitForBusyBit(); + if (errl != nullptr) + { + break; + } + + // Write to SCAP register which register we're attempting to access on + // the BPM + errl = nvdimmWriteReg(iv_nvdimm, + SCAP_REG, + i_reg); + if (errl != nullptr) + { + TRACFCOMP(g_trac_bpm, "Bpm::readViaScapRegister(): " + "Failed to set SCAP_REG to register 0x%.2X", + i_reg); + errl->collectTrace(BPM_COMP_NAME); + break; + } + + // Wait for the SCAP_STATUS Busy bit to be zero. + errl = waitForBusyBit(); + if (errl != nullptr) + { + break; + } + + // Read out the data from the requested register + errl = nvdimmReadReg(iv_nvdimm, + SCAP_DATA, + io_data); + if (errl != nullptr) + { + TRACFCOMP(g_trac_bpm, "BPM::readViaScapRegister(): " + "Failed to read data from SCAP_DATA for register 0x%.2X.", + i_reg); + errl->collectTrace(BPM_COMP_NAME); + break; + } + + } while(0); + + return errl; +} + +errlHndl_t Bpm::writeViaScapRegister(uint8_t const i_reg, uint8_t const i_data) +{ + TRACUCOMP(g_trac_bpm, ENTER_MRK"Bpm::writeViaScapRegister()"); + errlHndl_t errl = nullptr; + + do { + + // The SCAP_REG and SCAP_DATA registers require a few retries to get the + // values to stick. This loop sets SCAP_REG to i_reg + uint8_t retry = 0; + uint8_t data = 0; + do { + + // Wait for the SCAP_STATUS Busy bit to be zero. + errl = waitForBusyBit(); + if (errl != nullptr) + { + break; + } + + // Write to SCAP register which register we're attempting to access + // on the BPM + errl = nvdimmWriteReg(iv_nvdimm, + SCAP_REG, + i_reg); + if (errl != nullptr) + { + TRACFCOMP(g_trac_bpm, "Bpm::writeViaScapRegister(): " + "Failed to set SCAP_REG to register 0x%.2X", + i_reg); + errl->collectTrace(BPM_COMP_NAME); + break; + } + + // Wait for the SCAP_STATUS Busy bit to be zero. + errl = waitForBusyBit(); + if (errl != nullptr) + { + break; + } + + // Wait 100ms + nanosleep(0, 100 * NS_PER_MSEC); + + errl = nvdimmReadReg(iv_nvdimm, + SCAP_REG, + data); + if (errl != nullptr) + { + TRACFCOMP(g_trac_bpm, "BPM::writeViaScapRegister(): " + "Failed to read from SCAP_REG to verify that " + "requested register 0x%.2X was written successfully.", + i_reg); + errl->collectTrace(BPM_COMP_NAME); + break; + } + + if (data == i_reg) + { + TRACUCOMP(g_trac_bpm, "Bpm::writeViaScapRegister(): " + "REG 0x%X was successfully written to SCAP_REG 0x434. " + "Stop retries.", + i_reg); + break; + } + + } while(++retry < MAX_RETRY); + if (errl != nullptr) + { + break; + } + if ((retry >= MAX_RETRY) && (data != i_reg)) + { + /*@ + * @errortype + * @severity ERRORLOG::ERRL_SEV_PREDICTIVE + * @moduleid BPM_RC::BPM_WRITE_VIA_SCAP + * @reasoncode BPM_RC::BPM_EXCEEDED_RETRY_LIMIT_REG + * @userdata1[0:31] The register that we were attempting to write to + * SCAP_REG. + * @userdata1[32:63] The data that was found in the register on the + * final attempt. + * @userdata2 NVDIMM Target HUID associated with this BPM + * @devdesc The command sent to the BPM failed. + * @custdesc A problem occurred during IPL of the system. + */ + errl = new ERRORLOG::ErrlEntry(ERRORLOG::ERRL_SEV_PREDICTIVE, + BPM_RC::BPM_WRITE_VIA_SCAP, + BPM_RC::BPM_EXCEEDED_RETRY_LIMIT_REG, + TWO_UINT32_TO_UINT64(i_reg, + data), + TARGETING::get_huid(iv_nvdimm)); + errl->addPartCallout(iv_nvdimm, + HWAS::BPM_PART_TYPE, + HWAS::SRCI_PRIORITY_HIGH); + errl->collectTrace(BPM_COMP_NAME); + nvdimmAddPage4Regs(iv_nvdimm,errl); + nvdimmAddVendorLog(iv_nvdimm, errl); + break; + } + + // The SCAP_REG and SCAP_DATA registers require a few retries to get the + // values to stick. This loop sets SCAP_DATA to i_data + retry = 0; + data = 0; + do { + + // Wait for the SCAP_STATUS Busy bit to be zero. + errl = waitForBusyBit(); + if (errl != nullptr) + { + break; + } + + // Write the data to the register we're attempting to access + // on the BPM. + errl = nvdimmWriteReg(iv_nvdimm, + SCAP_DATA, + i_data); + if (errl != nullptr) + { + TRACFCOMP(g_trac_bpm, ERR_MRK"BPM::writeViaScapRegister(): " + "Failed to write data 0x%.2X to SCAP_DATA for " + "register 0x%.2X.", + i_data, + i_reg); + errl->collectTrace(BPM_COMP_NAME); + break; + } + + // Wait for the SCAP_STATUS Busy bit to be zero. + errl = waitForBusyBit(); + if (errl != nullptr) + { + break; + } + + // Wait 100ms + nanosleep(0, 100 * NS_PER_MSEC); + + errl = nvdimmReadReg(iv_nvdimm, + SCAP_DATA, + data); + if (errl != nullptr) + { + TRACFCOMP(g_trac_bpm, ERR_MRK"BPM::writeViaScapRegister(): " + "Failed to read from SCAP_DATA to verify " + "that requested data 0x%.2X was written successfully.", + i_data); + errl->collectTrace(BPM_COMP_NAME); + break; + } + + if (data == i_data) + { + TRACUCOMP(g_trac_bpm, "Bpm::writeViaScapRegister(): " + "DATA 0x%X was successfully written to SCAP_DATA 0x435." + " Stop retries.", + i_data); + break; + } + + } while(++retry < MAX_RETRY); + if (errl != nullptr) + { + break; + } + if ((retry >= MAX_RETRY) && (data != i_data)) + { + /*@ + * @errortype + * @severity ERRORLOG::ERRL_SEV_PREDICTIVE + * @moduleid BPM_RC::BPM_WRITE_VIA_SCAP + * @reasoncode BPM_RC::BPM_EXCEEDED_RETRY_LIMIT_DATA + * @userdata1[0:31] The data that we were attempting to write to + * SCAP_DATA. + * @userdata1[32:63] The data that was found in the register on the + * final attempt. + * @userdata2 NVDIMM Target HUID associated with this BPM + * @devdesc The command sent to the BPM failed. + * @custdesc A problem occurred during IPL of the system. + */ + errl = new ERRORLOG::ErrlEntry(ERRORLOG::ERRL_SEV_PREDICTIVE, + BPM_RC::BPM_WRITE_VIA_SCAP, + BPM_RC::BPM_EXCEEDED_RETRY_LIMIT_DATA, + TWO_UINT32_TO_UINT64(i_data, + data), + TARGETING::get_huid(iv_nvdimm)); + errl->addPartCallout(iv_nvdimm, + HWAS::BPM_PART_TYPE, + HWAS::SRCI_PRIORITY_HIGH); + errl->collectTrace(BPM_COMP_NAME); + nvdimmAddPage4Regs(iv_nvdimm,errl); + nvdimmAddVendorLog(iv_nvdimm, errl); + break; + } + + } while(0); + + return errl; +} + +errlHndl_t Bpm::disableWriteProtection() +{ + TRACFCOMP(g_trac_bpm, ENTER_MRK"Bpm::disableWriteProtection()"); + errlHndl_t errl = nullptr; + + do { + + // The following write sequence to the I2C_REG_PROTECT register + // indirectly removes write protection from registers 0x40-0x7F on + // page 4. + for ( size_t i = 0; i < BPM_PASSWORD_LENGTH; ++i) + { + errl = nvdimmWriteReg(iv_nvdimm, + I2C_REG_PROTECT, + BPM_PASSWORD[i]); + if (errl != nullptr) + { + TRACFCOMP(g_trac_bpm, "Bpm::disableWriteProtection(): " + "Failed to write the unlock sequence to " + "I2C_REG_PROTECT"); + errl->collectTrace(BPM_COMP_NAME); + break; + } + } + if (errl != nullptr) + { + break; + } + + nanosleep(0, 100 * NS_PER_MSEC); + + // Make sure protection was removed + uint8_t data = 0; + errl = nvdimmReadReg(iv_nvdimm, + I2C_REG_PROTECT, + data); + if (errl != nullptr) + { + TRACFCOMP(g_trac_bpm, "Bpm::disableWriteProtection(): " + "Failed to verify that write protection was removed"); + errl->collectTrace(BPM_COMP_NAME); + break; + } + const uint8_t WRITE_PROTECT_DISABLED = 0x80; + if (!(data & WRITE_PROTECT_DISABLED)) + { + TRACFCOMP(g_trac_bpm, ERR_MRK"Bpm::disableWriteProtection(): " + "Failed to disable write protection. I2C_REG_PROTECT"); + /*@ + * @errortype + * @severity ERRORLOG::ERRL_SEV_PREDICTIVE + * @moduleid BPM_RC::BPM_DISABLE_WRITE_PROTECTION + * @reasoncode BPM_RC::BPM_DISABLE_WRITE_PROTECTION_FAILED + * @userdata1 NVDIMM Target HUID associated with this BPM + * @devdesc Failed to enter BSL mode after several attempts. + * @custdesc A problem occurred during IPL of the system. + */ + errl = new ERRORLOG::ErrlEntry(ERRORLOG::ERRL_SEV_PREDICTIVE, + BPM_RC::BPM_DISABLE_WRITE_PROTECTION, + BPM_RC::BPM_DISABLE_WRITE_PROTECTION_FAILED, + TARGETING::get_huid(iv_nvdimm)); + errl->addPartCallout(iv_nvdimm, + HWAS::BPM_PART_TYPE, + HWAS::SRCI_PRIORITY_HIGH); + errl->collectTrace(BPM_COMP_NAME); + nvdimmAddPage4Regs(iv_nvdimm,errl); + nvdimmAddVendorLog(iv_nvdimm, errl); + break; + } + + } while(0); + + return errl; +} + +errlHndl_t Bpm::switchBpmPage(uint16_t const i_segmentCode) +{ + + errlHndl_t errl = nullptr; + + do { + const uint8_t SPECIAL_CONTROL_COMMAND1 = 0x3E; + const uint8_t SPECIAL_CONTROL_COMMAND2 = 0x3F; + // Next, switch to the desired BPM segment by writing the segment code + // to the BPM's Special Control Command registers. + // + // Since the SCAP_DATA register can only hold 1 byte at a time we must + // do this in two steps. + // According to SMART, the segment code must be written in the following + // form to those registers: + // Register 0x3E gets LO(i_segmentCode) byte + // Register 0x3F gets HI(i_segmentCode) byte + // Example: 0x9D5E is the segment code for Segment D. It must be written + // as follows + // 0x3E, 0x5E + // 0x3F, 0x9D + const uint8_t loSegCode = i_segmentCode & 0xFF; + const uint8_t hiSegCode = (i_segmentCode >> 8) & 0xFF; + + TRACUCOMP(g_trac_bpm, "Bpm::switchBpmPage(): " + "Writing 0x%.2X to SPECIAL_CONTROL_COMMAND1 and " + "0x%.2X to SPECIAL_CONTROL_COMMAND2", + loSegCode, + hiSegCode); + + // First, clear the SPECIAL_CONTROL_COMMAND2 register so that we can + // write the full sequence without issue. + errl = writeViaScapRegister(SPECIAL_CONTROL_COMMAND2, 0x00); + if (errl != nullptr) + { + TRACFCOMP(g_trac_bpm, ERR_MRK"Bpm::switchBpmPage(): " + "Writing 0x%.2X to SPECIAL_CONTROL_COMMAND2 " + "FAILED. BPM page will not have switched properly!!", + hiSegCode); + break; + } + + // Write the LO segment code. + errl = writeViaScapRegister(SPECIAL_CONTROL_COMMAND1, loSegCode); + if (errl != nullptr) + { + TRACFCOMP(g_trac_bpm, ERR_MRK"Bpm::switchBpmPage(): " + "Writing 0x%.2X to SPECIAL_CONTROL_COMMAND1 " + "FAILED. BPM page will not have switched properly!!", + loSegCode); + break; + } + + // Write the HI segment code. + errl = writeViaScapRegister(SPECIAL_CONTROL_COMMAND2, hiSegCode); + if (errl != nullptr) + { + TRACFCOMP(g_trac_bpm, ERR_MRK"Bpm::switchBpmPage(): " + "Writing 0x%.2X to SPECIAL_CONTROL_COMMAND2 " + "FAILED. BPM page will not have switched properly!!", + hiSegCode); + break; + } + + + // Request to open segment page is sent. + // Wait a few seconds for the operation to complete. + nanosleep(2,0); + + } while(0); + + return errl; +} + +errlHndl_t Bpm::writeToMagicRegisters( + uint8_t const (&i_magicValues)[NUM_MAGIC_REGISTERS]) +{ + TRACFCOMP(g_trac_bpm, ENTER_MRK"Bpm::writeToMagicRegisters() 0x%.2X 0x%.2X", + i_magicValues[0], + i_magicValues[1]); + errlHndl_t errl = nullptr; + + do { + const uint16_t magic_registers[NUM_MAGIC_REGISTERS] = + {BPM_MAGIC_REG1, BPM_MAGIC_REG2}; + + for (size_t i = 0; i < NUM_MAGIC_REGISTERS; ++i) + { + errl = nvdimmWriteReg(iv_nvdimm, + magic_registers[i], + i_magicValues[i]); + if (errl != nullptr) + { + TRACFCOMP(g_trac_bpm, "Bpm::writeToMagicRegisters(): " + "Failed to write the magic values to the magic " + "registers"); + errl->collectTrace(BPM_COMP_NAME); + break; + } + } + if (errl != nullptr) + { + break; + } + + // Verify the magic values were written + uint8_t magic_data[NUM_MAGIC_REGISTERS] = {0}; + for (size_t i = 0; i < NUM_MAGIC_REGISTERS; ++i) + { + errl = nvdimmReadReg(iv_nvdimm, + magic_registers[i], + magic_data[i]); + if (errl != nullptr) + { + TRACFCOMP(g_trac_bpm, "Bpm::writeToMagicRegisters(): " + "Failed to read back magic values to verify that " + "they were written."); + errl->collectTrace(BPM_COMP_NAME); + break; + } + } + if (errl != nullptr) + { + break; + } + + // If either of the magic values stored in magic_data don't match the + // corresponding expected values in magic_values then an error occurred. + if ( (magic_data[0] != i_magicValues[0]) + || (magic_data[1] != i_magicValues[1])) + { + TRACFCOMP(g_trac_bpm, ERR_MRK"Bpm::writeToMagicRegisters(): " + "Magic values read from BPM didn't match expected values " + "BPM_MAGIC_REG1 Expected 0x%.2X Actual 0x%.2X " + "BPM_MAGIC_REG2 Expected 0x%.2X Actual 0x%.2X", + i_magicValues[0], magic_data[0], + i_magicValues[1], magic_data[1]); + + /*@ + * @errortype + * @severity ERRORLOG::ERRL_SEV_PREDICTIVE + * @moduleid BPM_RC::BPM_WRITE_MAGIC_REG + * @reasoncode BPM_RC::BPM_WRITE_TO_MAGIC_REG_FAILED + * @userdata1[0:7] BPM_MAGIC_REG1 expected value + * @userdata1[8:15] BPM_MAGIC_REG1 actual value + * @userdata1[16:23] BPM_MAGIC_REG2 expected value + * @userdata1[24:31] BPM_MAGIC_REG2 actual value + * @userdata2[0:63] NVDIMM Target HUID associated with this BPM + * @devdesc Failed to write values to the magic registers on + * the BPM. + * @custdesc A problem occurred during IPL of the system. + */ + errl = new ERRORLOG::ErrlEntry(ERRORLOG::ERRL_SEV_PREDICTIVE, + BPM_RC::BPM_WRITE_MAGIC_REG, + BPM_RC::BPM_WRITE_TO_MAGIC_REG_FAILED, + TWO_UINT16_TO_UINT32( + TWO_UINT8_TO_UINT16(i_magicValues[0], + magic_data[0]), + TWO_UINT8_TO_UINT16(i_magicValues[1], + magic_data[1])), + TARGETING::get_huid(iv_nvdimm)); + errl->addPartCallout(iv_nvdimm, + HWAS::BPM_PART_TYPE, + HWAS::SRCI_PRIORITY_HIGH); + errl->collectTrace(BPM_COMP_NAME); + nvdimmAddPage4Regs(iv_nvdimm,errl); + nvdimmAddVendorLog(iv_nvdimm, errl); + break; + } + + TRACUCOMP(g_trac_bpm, "Bpm::writeToMagicRegisters(): " + "Magic values successfully written to BPM " + "BPM_MAGIC_REG1 0x%.2X " + "BPM_MAGIC_REG2 0x%.2X ", + magic_data[0], + magic_data[1]); + + } while(0); + + return errl; +} + +errlHndl_t Bpm::dumpSegment(uint16_t const i_segmentCode, + uint8_t (&o_buffer)[SEGMENT_SIZE]) +{ + TRACFCOMP(g_trac_bpm, ENTER_MRK"Bpm::dumpSegment(): Segment %X", + getSegmentIdentifier(i_segmentCode)); + assert(i_segmentCode == SEGMENT_B_CODE, "Bpm::dumpSegment(): Only Segment B is supported."); + + errlHndl_t errl = nullptr; + + do { + + errl = disableWriteProtection(); + if (errl != nullptr) + { + break; + } + + // We cannot be in BSL mode when dumping the config segments. Verify we + // aren't in BSL mode by checking SCAP_STATUS + scap_status_register_t status; + errl = nvdimmReadReg(iv_nvdimm, + SCAP_STATUS, + status.full); + if (errl != nullptr) + { + errl->collectTrace(BPM_COMP_NAME); + break; + } + + if (status.bit.Bpm_Bsl_Mode) + { + TRACFCOMP(g_trac_bpm, ERR_MRK"Bpm::dumpSegment(): " + "BSL Mode is enabled. Attempting to exit BSL mode."); + + // Try to exit BSL mode. This function will exit BSL. + errl = resetDevice(); + if (errl != nullptr) + { + break; + } + + // Exit update mode if on and write back production magic values. + errl = exitUpdateMode(); + if (errl != nullptr) + { + break; + } + + errl = nvdimmReadReg(iv_nvdimm, + SCAP_STATUS, + status.full); + if (errl != nullptr) + { + errl->collectTrace(BPM_COMP_NAME); + break; + } + if (status.bit.Bpm_Bsl_Mode) + { + TRACFCOMP(g_trac_bpm, ERR_MRK"Bpm::dumpSegment(): " + "Couldn't dump Segment %X. BSL Mode is enabled.", + getSegmentIdentifier(i_segmentCode)); + + /*@ + * @errortype + * @severity ERRORLOG::ERRL_SEV_PREDICTIVE + * @moduleid BPM_RC::BPM_DUMP_SEGMENT + * @reasoncode BPM_RC::BPM_BSL_MODE_ENABLED + * @userdata1[0:63] NVDIMM Target HUID associated with this BPM + * @devdesc Couldn't dump segment data because BSL mode + * was enabled. + * @custdesc A problem occurred during IPL of the system. + */ + errl = new ERRORLOG::ErrlEntry(ERRORLOG::ERRL_SEV_PREDICTIVE, + BPM_RC::BPM_DUMP_SEGMENT, + BPM_RC::BPM_BSL_MODE_ENABLED, + TARGETING::get_huid(iv_nvdimm)); + errl->addPartCallout(iv_nvdimm, + HWAS::BPM_PART_TYPE, + HWAS::SRCI_PRIORITY_HIGH); + errl->collectTrace(BPM_COMP_NAME); + nvdimmAddPage4Regs(iv_nvdimm,errl); + nvdimmAddVendorLog(iv_nvdimm, errl); + + break; + } + } + + // First the NVDIMM MAGIC registers BPM_MAGIC_REG1 and BPM_MAGIC_REG2 + // must be programmed to 0xBA and 0xAB respectively. + const uint8_t magic_values[NUM_MAGIC_REGISTERS] = {0xBA, 0xAB}; + errl = writeToMagicRegisters(magic_values); + if (errl != nullptr) + { + TRACFCOMP(g_trac_bpm, "Bpm::dumpSegment(): " + "Failed to write magic numbers that enable " + "reading of segment data."); + break; + } + + uint8_t retry = 1; + // Attempt to switch to the correct page and dump data twice. + do { + // Set buffer to be all zeroes. + memset(&o_buffer, 0, SEGMENT_SIZE); + + // Open this segments page on the BPM. + errl = switchBpmPage(i_segmentCode); + if (errl != nullptr) + { + break; + } + + TRACFCOMP(g_trac_bpm, "Bpm::dumpSegment(): " + "Dumping Segment %X to buffer.", + getSegmentIdentifier(i_segmentCode)); + + // Dump the segment data + bool wrongPage = false; + for (uint8_t reg = 0; reg < SEGMENT_SIZE; ++reg) + { + errl = readViaScapRegister(reg, o_buffer[reg]); + if (errl != nullptr) + { + break; + } + + // We can determine if the page switch succeeded based on the + // first three bytes from regs 0x10-0x12. If Segment B was + // opened, then 0x10-0x1F is serial number for the BPM. + // SMART guarantees the first three bytes to be as follows: + if ((reg == 0x10) && (o_buffer[reg] != 0x53)) + { + TRACFCOMP(g_trac_bpm, INFO_MRK"Bpm::dumpSegment " + "data 0x%.2X at offset 0x%.2x wasn't expected " + "value 0x53", + o_buffer[reg], reg); + wrongPage = true; + } + if ((reg == 0x11) && (o_buffer[reg] != 0x46)) + { + TRACFCOMP(g_trac_bpm, INFO_MRK"Bpm::dumpSegment " + "data 0x%.2X at offset 0x%.2x wasn't expected " + "value 0x46", + o_buffer[reg], reg); + wrongPage = true; + } + if ((reg == 0x12) && (o_buffer[reg] != 0x52)) + { + TRACFCOMP(g_trac_bpm, INFO_MRK"Bpm::dumpSegment " + "data 0x%.2X at offset 0x%.2x wasn't expected " + "value 0x52", + o_buffer[reg], reg); + wrongPage = true; + } + + if (wrongPage && (reg == 0x20)) + { + break; + } + + } + + TRACUBIN(g_trac_bpm, "Segment BIN DUMP", o_buffer, SEGMENT_SIZE); + + if ((errl != nullptr) || (wrongPage == false)) + { + break; + } + + // Close this segments page on the BPM before making another + // attempt. + errl = switchBpmPage(DEFAULT_REG_PAGE); + if (errl != nullptr) + { + break; + } + + } while(++retry < MAX_RETRY); + if (errl != nullptr) + { + break; + } + if (retry >= MAX_RETRY) + { + /*@ + * @errortype + * @severity ERRORLOG::ERRL_SEV_PREDICTIVE + * @moduleid BPM_RC::BPM_DUMP_SEGMENT + * @reasoncode BPM_RC::BPM_EXCEEDED_RETRY_LIMIT + * @userdata1 The segment code for the page that failed to + * open. + * @userdata2 NVDIMM Target HUID associated with this BPM + * @devdesc Failed to open the segment page in the given + * amount of retries. + * @custdesc A problem occurred during IPL of the system. + */ + errl = new ERRORLOG::ErrlEntry(ERRORLOG::ERRL_SEV_PREDICTIVE, + BPM_RC::BPM_DUMP_SEGMENT, + BPM_RC::BPM_EXCEEDED_RETRY_LIMIT, + i_segmentCode, + TARGETING::get_huid(iv_nvdimm)); + errl->addPartCallout(iv_nvdimm, + HWAS::BPM_PART_TYPE, + HWAS::SRCI_PRIORITY_HIGH); + errl->collectTrace(BPM_COMP_NAME); + nvdimmAddPage4Regs(iv_nvdimm,errl); + nvdimmAddVendorLog(iv_nvdimm, errl); + setAttemptAnotherUpdate(); + break; + } + + } while(0); + + TRACFCOMP(g_trac_bpm, "Bpm::dumpSegment(): " + "Closing Segment %X's page.", + getSegmentIdentifier(i_segmentCode)); + + // Close the Segment page by switching back to the default page. + errlHndl_t closeSegmentErrl = switchBpmPage(DEFAULT_REG_PAGE); + if (closeSegmentErrl != nullptr) + { + handleMultipleErrors(errl, closeSegmentErrl); + } + + // Write back the production magic values. + errlHndl_t magicErrl = writeToMagicRegisters(PRODUCTION_MAGIC_VALUES); + if (magicErrl != nullptr) + { + TRACFCOMP(g_trac_bpm, "Bpm::dumpSegment(): " + "Failed to write update mode magic numbers."); + handleMultipleErrors(errl, magicErrl); + } + + return errl; +} + +errlHndl_t Bpm::mergeSegment(BpmConfigLidImage const i_configImage, + uint16_t const i_segmentCode, + uint8_t (&o_buffer)[SEGMENT_SIZE]) +{ + TRACFCOMP(g_trac_bpm, ENTER_MRK"Bpm::mergeSegment(): Segment %X", + getSegmentIdentifier(i_segmentCode)); + errlHndl_t errl = nullptr; + + size_t segmentStartOffset = 0; + auto it = segmentMap.find(i_segmentCode); + if (it != segmentMap.end()) + { + segmentStartOffset = it->second; + } + else + { + TRACFCOMP(g_trac_bpm, ERR_MRK"Bpm::mergeSegment(): " + "Couldn't find start offset for Segment %X", + getSegmentIdentifier(i_segmentCode)); + assert(false, "Add the missing Segment %X Start Offset to the offset map", getSegmentIdentifier(i_segmentCode)); + } + + TRACFCOMP(g_trac_bpm, "Bpm::mergeSegment(): " + "Segment %X Start offset: 0x%X", + getSegmentIdentifier(i_segmentCode), + segmentStartOffset); + + do { + + const size_t NUMBER_OF_FRAGMENTS = i_configImage.getNumberOfFragments(); + char const * data = reinterpret_cast<char const *>( + i_configImage.getFirstFragment()); + + config_image_fragment_t const * fragment = + reinterpret_cast<config_image_fragment_t const *>(data); + + TRACUCOMP(g_trac_bpm, "mergeSegment(): " + "NUMBER_OF_FRAGMENTS = 0x%.4X", NUMBER_OF_FRAGMENTS); + + for(size_t i = 0; i < NUMBER_OF_FRAGMENTS; ++i) + { + // The fragment offsets are given as offsets within the + // configuration segment data. So, if the fragment offset is less + // than the starting offset of this segment then the fragment is not + // relevant to this segment. + if (fragment->iv_offset < segmentStartOffset) + { + // This fragment is not for the segment we are dealing with. + TRACUCOMP(g_trac_bpm, "mergeSegment(): " + "Fragment with offset 0x%.4X not related to " + "Segment %X, skipping", + fragment->iv_offset, + getSegmentIdentifier(i_segmentCode)); + + // Move to the next fragment + data += sizeof(config_image_fragment_t) + + fragment->iv_fragmentSize; + fragment = + reinterpret_cast<config_image_fragment_t const *>(data); + continue; + } + // Each segment is 128 bytes in size. So, if the offset given for + // the fragment is greater than the upper boundry then no more + // fragments exist for this segment. + if (fragment->iv_offset >= segmentStartOffset + SEGMENT_SIZE) + { + // This fragment and all other fragments afterward are not for + // this segment. + TRACUCOMP(g_trac_bpm, "mergeSegment(): " + "Fragment with offset 0x%.4X greater than/equal to " + "Segment %X ending offset, skipping", + fragment->iv_offset, + getSegmentIdentifier(i_segmentCode)); + break; + } + + // The fragment offset may be out of bounds for the buffer so + // scale it down to be within the buffer size. + size_t offset = fragment->iv_offset % SEGMENT_SIZE; + + // Overwrite the BPM segment data at the offset specified by the + // fragment. + memcpy(&o_buffer[offset], + &(fragment->iv_data), + fragment->iv_fragmentSize); + + // Move to the next fragment + data += sizeof(config_image_fragment_t) + fragment->iv_fragmentSize; + fragment = reinterpret_cast<config_image_fragment_t const *>(data); + } + + TRACUBIN(g_trac_bpm, "Merged Segment", o_buffer, SEGMENT_SIZE); + + } while(0); + + return errl; +} + +errlHndl_t Bpm::eraseSegment(uint16_t i_segmentCode) +{ + TRACFCOMP(g_trac_bpm, ENTER_MRK"Bpm::eraseSegment(): Segment %X", + getSegmentIdentifier(i_segmentCode)); + errlHndl_t errl = nullptr; + + do { + + payload_t payload; + + size_t segmentStartOffset = 0; + auto it = segmentMap.find(i_segmentCode); + if (it != segmentMap.end()) + { + segmentStartOffset = it->second; + } + else + { + TRACFCOMP(g_trac_bpm, ERR_MRK"Bpm::mergeSegment(): " + "Couldn't find start offset for Segment %X", + getSegmentIdentifier(i_segmentCode)); + assert(false, "Add the missing Segment %X Start Offset to the offset map", getSegmentIdentifier(i_segmentCode)); + } + errl = setupPayload(payload, + BSL_ERASE_SEGMENT, + BPM_CONFIG_START_ADDRESS + segmentStartOffset); + if (errl != nullptr) + { + break; + } + + errl = issueCommand(BPM_PASSTHROUGH, + payload, + WRITE, + ERASE_SEGMENT_DELAY); + if (errl != nullptr) + { + break; + } + + // Wait 1 second for the operation to complete. + TRACFCOMP(g_trac_bpm, "Bpm::eraseSegment(): " + "Erasing Segment %X. " + "Waiting 1 second for operation to complete.", + getSegmentIdentifier(i_segmentCode)); + nanosleep(1,0); + + } while(0); + + TRACFCOMP(g_trac_bpm, EXIT_MRK"Bpm::eraseSegment(): " + "Segment %X erase operation completed " + "%s", + getSegmentIdentifier(i_segmentCode), + (errl == nullptr) ? "without errors" : "with errors"); + + return errl; +} + +errlHndl_t Bpm::writeSegment(uint8_t const (&i_buffer)[SEGMENT_SIZE], + uint16_t const i_segmentCode) +{ + TRACFCOMP(g_trac_bpm, ENTER_MRK"Bpm::writeSegment(): Segment %X", + getSegmentIdentifier(i_segmentCode)); + errlHndl_t errl = nullptr; + + do { + + auto it = segmentMap.find(i_segmentCode); + size_t segmentStartOffset = 0; + if (it != segmentMap.end()) + { + segmentStartOffset = it->second; + } + + // To update the given segment, we have to send over the data as + // payloads. Since the max size of a payload's data is 16 bytes, there + // will be 8 payloads sent to update a given segment because each + // segment is 128 bytes. + for (size_t offset = 0; + offset < SEGMENT_SIZE; + offset += MAX_PAYLOAD_DATA_SIZE) + { + // Construct a payload for the data at this offset up to the + // MAX_PAYLOAD_DATA_SIZE. + payload_t payload; + // Each segment is 128 bytes and the segment start addresses + // are their relative position to BPM_CONFIG_START_ADDRESS. To + // arrive at the correct address offset for this data we must + // calculate the addressOffset in the following way. + uint16_t addressOffset = BPM_CONFIG_START_ADDRESS + + segmentStartOffset + + offset; + errl = setupPayload(payload, + BSL_RX_DATA_BLOCK, + addressOffset, + &i_buffer[offset], + MAX_PAYLOAD_DATA_SIZE); + if (errl != nullptr) + { + break; + } + + if (addressOffset % 0x20 == 0) + { + TRACFCOMP(g_trac_bpm, "Bpm::writeSegment(): " + "Writing to address offset 0x%.4X. " + "Config bytes written: 0x%X; Remaining: 0x%X", + addressOffset, + offset, (SEGMENT_SIZE - offset)); + } + + // Attempt to write the payload using a retry loop. + errl = blockWrite(payload); + if (errl != nullptr) + { + break; + } + } + if (errl != nullptr) + { + break; + } + + } while(0); + + TRACFCOMP(g_trac_bpm, EXIT_MRK"Bpm::writeSegment(): " + "Segment %X write and verification completed " + "%s", + getSegmentIdentifier(i_segmentCode), + (errl == nullptr) ? "without errors" : "with errors"); + + return errl; +} + +errlHndl_t Bpm::preprocessSegments(BpmConfigLidImage const i_configImage) +{ + TRACFCOMP(g_trac_bpm, ENTER_MRK"Bpm::preprocessSegments()"); + errlHndl_t errl = nullptr; + + do { + + if (iv_attemptAnotherUpdate && iv_segmentDMerged && iv_segmentBMerged) + { + // The segment data has already been merged with the flash image + // data. Doing it again has the potential to fail depending on where + // the last update attempt failed. + TRACFCOMP(g_trac_bpm, "Bpm::preprocessSegments(): " + "Segment data was merged in a previous update attempt, " + "skipping preprocessing and using existing data."); + break; + } + + // Merge the fragments for D with the data from the BPM. For D, this + // will just populate the empty segment with the data from the flash + // image. + if (!iv_segmentDMerged) + { + errl = mergeSegment(i_configImage, SEGMENT_D_CODE, iv_segmentD); + if (errl != nullptr) + { + TRACFCOMP(g_trac_bpm, ERR_MRK"Bpm::preprocessSegments(): " + "Failed to merge Segment D."); + break; + } + iv_segmentDMerged = true; + } + else + { + TRACFCOMP(g_trac_bpm, INFO_MRK"Bpm::preprocessSegments(): " + "Segment %X has been merged already. Skipping merge...", + getSegmentIdentifier(SEGMENT_D_CODE)); + } + + // Merge the fragments for B with the data from the BPM. + if (!iv_segmentBMerged) + { + // Dump the segment into a buffer. This is only necessary for + // segment B as segment D comes straight from the flash image file. + errl = dumpSegment(SEGMENT_B_CODE, iv_segmentB); + if (errl != nullptr) + { + break; + } + + errl = mergeSegment(i_configImage, SEGMENT_B_CODE, iv_segmentB); + if (errl != nullptr) + { + TRACFCOMP(g_trac_bpm, ERR_MRK"Bpm::preprocessSegments(): " + "Failed to merge Segment B."); + break; + } + iv_segmentBMerged = true; + } + else + { + TRACFCOMP(g_trac_bpm, INFO_MRK"Bpm::preprocessSegments(): " + "Segment %X has been merged already. Skipping merge...", + getSegmentIdentifier(SEGMENT_B_CODE)); + } + + } while(0); + + return errl; +} + +errlHndl_t Bpm::getResponse(uint8_t * const o_responseData, + uint8_t const i_responseSize) +{ + TRACUCOMP(g_trac_bpm, ENTER_MRK"Bpm::getResponse()"); + + errlHndl_t errl = nullptr; + memset(o_responseData, 0xFF, i_responseSize); + + do { + + // Get the result from the BPM. + // First clear the error status register + errl = nvdimmWriteReg(iv_nvdimm, + BPM_REG_ERR_STATUS, + 0x00); + if (errl != nullptr) + { + TRACFCOMP(g_trac_bpm, "Bpm::getResponse(): " + "Failed to clear error status register"); + errl->collectTrace(BPM_COMP_NAME); + break; + } + + // Set the payload length + // The 4 header bytes plus 2 CRC bytes make up the other data size in + // the response payload. + const uint8_t RESPONSE_PAYLOAD_OTHER_DATA_SIZE = 6; + uint8_t responsePayloadSize = RESPONSE_PAYLOAD_OTHER_DATA_SIZE + + i_responseSize; + + errl = nvdimmWriteReg(iv_nvdimm, + BPM_PAYLOAD_LENGTH, + responsePayloadSize); + if (errl != nullptr) + { + TRACFCOMP(g_trac_bpm, "Bpm::getResponse(): " + "Failed to set payload length"); + errl->collectTrace(BPM_COMP_NAME); + break; + } + + // Setup the command status register + command_status_register_t commandStatus; + commandStatus.bits.Bsp_Cmd_In_Progress = 1; + commandStatus.bits.Operator_Type = READ; + errl = nvdimmWriteReg(iv_nvdimm, + BPM_CMD_STATUS, + commandStatus.value); + if (errl != nullptr) + { + TRACFCOMP(g_trac_bpm, "Bpm::getResponse(): " + "Failed to setup command status register"); + errl->collectTrace(BPM_COMP_NAME); + break; + } + + // Setup command type. + errl = nvdimmWriteReg(iv_nvdimm, + BPM_REG_CMD, + BPM_PASSTHROUGH); + if (errl != nullptr) + { + TRACFCOMP(g_trac_bpm, "Bpm::getResponse(): " + "Failed to setup command type."); + errl->collectTrace(BPM_COMP_NAME); + break; + } + + errl = waitForCommandStatusBitReset(commandStatus); + if (errl != nullptr) + { + break; + } + + // Read out the response payload. + payload_t responsePayload; + + for (size_t i = 0; i < responsePayloadSize; ++i) + { + uint8_t data = 0; + errl = nvdimmReadReg(iv_nvdimm, + (BPM_REG_PAYLOAD_START + (i * sizeof(uint8_t))), + data); + if (errl != nullptr) + { + TRACFCOMP(g_trac_bpm, "Bpm::getResponse(): " + "Failed to read response payload"); + errl->collectTrace(BPM_COMP_NAME); + break; + } + + responsePayload.push_back(data); + } + if (errl != nullptr) + { + break; + } + + // Verify the data from the response was good. + uint8_t* responseIterator = responsePayload.data(); + uint16_t responseCrc = *(reinterpret_cast<uint16_t *> + (&responseIterator[PAYLOAD_HEADER_SIZE + i_responseSize])); + // The BPM is going to give the response CRC in LE. So convert it to BE. + responseCrc = le16toh(responseCrc); + uint16_t expectedCrc = crc16_calc(responseIterator, + PAYLOAD_HEADER_SIZE + i_responseSize); + if (responseCrc != expectedCrc) + { + memset(o_responseData, 0xFF, i_responseSize); + TRACFCOMP(g_trac_bpm, ERR_MRK"Bpm::getResponse(): " + "Response CRC verification failed. " + "Received invalid data from BPM."); + /*@ + * @errortype + * @severity ERRORLOG::ERRL_SEV_PREDICTIVE + * @moduleid BPM_RC::BPM_GET_RESPONSE + * @reasoncode BPM_RC::BPM_RESPONSE_CRC_MISMATCH + * @userdata1[00:31] Expected Response CRC (in Big Endian) + * @userdata1[32:63] Actual Response CRC (in Big Endian) + * @userdata2 NVDIMM Target HUID associated with this BPM + * @devdesc The response CRC calculated by the BPM didn't + * match the CRC calculated by hostboot. + * @custdesc A problem occurred during IPL of the system. + */ + errl = new ERRORLOG::ErrlEntry(ERRORLOG::ERRL_SEV_PREDICTIVE, + BPM_RC::BPM_GET_RESPONSE, + BPM_RC::BPM_RESPONSE_CRC_MISMATCH, + TWO_UINT32_TO_UINT64(expectedCrc, + responseCrc), + TARGETING::get_huid(iv_nvdimm)); + errl->addPartCallout(iv_nvdimm, + HWAS::BPM_PART_TYPE, + HWAS::SRCI_PRIORITY_HIGH); + errl->collectTrace(BPM_COMP_NAME); + nvdimmAddPage4Regs(iv_nvdimm,errl); + nvdimmAddVendorLog(iv_nvdimm, errl); + break; + } + + // Write the data to the output buffer + for (size_t i = 0; i < i_responseSize; ++i) + { + // Only copy the response data from the payload to the output buffer + o_responseData[i] = responsePayload[i + PAYLOAD_HEADER_SIZE]; + } + + } while(0); + + return errl; +} + +errlHndl_t Bpm::verifyBlockWrite(payload_t i_payload, + uint8_t i_dataLength, + uint8_t & o_status) +{ + errlHndl_t errl = nullptr; + // Assume a bad status. + o_status = 0xFF; + + do { + + // Pull the address to verify out of the payload. It was inserted in + // little endian form so it needs to be converted back to big endian + // because setupPayload expects an address in big endian. + uint16_t address = getPayloadAddressBE(i_payload); + + // The data section of the payload is organized in the following way: + // 2 bytes: uint16_t size of data to verify in little endian format + // 2 bytes: CRC of the data to be verified on the BPM in little endian. + const size_t VERIFY_BLOCK_PAYLOAD_DATA_SIZE = 4; + uint8_t data[VERIFY_BLOCK_PAYLOAD_DATA_SIZE] = {0}; + + // Since the data length is stored as uint16_t but the length we deal + // with is uint8_t we can easily convert this to little endian by + // storing our uint8_t data length in the first index of the array and + // leaving the next index 0. + data[0] = i_dataLength; + + // Calculate the uint16_t CRC for the data that was written to the BPM. + // The BPM will compare its calculated CRC with this one to verify if + // the block was written correctly. + uint16_t crc = htole16(crc16_calc(&i_payload[PAYLOAD_DATA_START_INDEX], + i_dataLength)); + + memcpy(&data[2], &crc, sizeof(uint16_t)); + + payload_t verifyPayload; + errl = setupPayload(verifyPayload, + BSL_VERIFY_BLOCK, + address, + data, + VERIFY_BLOCK_PAYLOAD_DATA_SIZE); + if (errl != nullptr) + { + break; + } + + // Issue the command to the BPM. + errl = issueCommand(BPM_PASSTHROUGH, + verifyPayload, + WRITE, + NO_DELAY_EXTERNAL_RESPONSE); + if (errl != nullptr) + { + break; + } + + errl = getResponse(&o_status, sizeof(uint8_t)); + if (errl != nullptr) + { + break; + } + + } while(0); + + return errl; +} + +errlHndl_t Bpm::blockWrite(payload_t i_payload) +{ + assert(i_payload[PAYLOAD_COMMAND_INDEX] == BSL_RX_DATA_BLOCK, + "Bpm::blockWrite(): " + "Can only write BSL_RX_DATA_BLOCK commands"); + + errlHndl_t errl = nullptr; + uint8_t retry = 0; + + // Get the payload address for trace output. + uint16_t payloadAddress = getPayloadAddressBE(i_payload); + + // Any status from verifyBlockWrite that is non-zero is considered a + // fail. So, assume a fail and check. + uint8_t wasVerified = 0xFF; + do { + + + // Since the write command has its response packet checked within the + // issueCommand() function we must attempt to retry the write if we get + // a bad response from the BPM. + errl = blockWriteRetry(i_payload); + if (errl != nullptr) + { + break; + } + + // Sleep for 0.001 second + nanosleep(0, 1 * NS_PER_MSEC); + + uint8_t dataLength = i_payload[PAYLOAD_HEADER_DATA_LENGTH_INDEX] + - PAYLOAD_HEADER_SIZE; + errl = verifyBlockWrite(i_payload, + dataLength, + wasVerified); + if ( (errl != nullptr) + && (errl->reasonCode() == BPM_RC::BPM_RESPONSE_CRC_MISMATCH) + && ((retry + 1) < MAX_RETRY)) + { + // Delete the retryable error and continue + TRACFCOMP(g_trac_bpm, "Bpm::blockWrite(): " + "Encountered a retryable error. Delete and continue."); + delete errl; + errl = nullptr; + } + else if (errl != nullptr) + { + TRACFCOMP(g_trac_bpm, ERR_MRK"Bpm::blockWrite(): " + "BSL_VERIFY_BLOCK failed for address 0x%.4X. " + "A non-retryable error occurred on attempt %d/%d", + payloadAddress, + (retry + 1), + MAX_RETRY); + // A non-retryable error occurred. Break from retry loop. + break; + } + + if (wasVerified != 0) + { + TRACUCOMP(g_trac_bpm, "Bpm::blockWrite(): " + "BSL_VERIFY_BLOCK failed for address 0x%.4X. " + "Attempt %d/%d", + payloadAddress, + (retry + 1), + MAX_RETRY); + } + else + { + // Write verified successfully, stop retries. + break; + } + + } while (++retry < MAX_RETRY); + if ((errl == nullptr) && (retry >= MAX_RETRY) && (wasVerified != 0)) + { + TRACFCOMP(g_trac_bpm, ERR_MRK"Bpm::blockWrite(): " + "Failed to write payload data to BPM after %d retries.", + MAX_RETRY); + /*@ + * @errortype + * @severity ERRORLOG::ERRL_SEV_PREDICTIVE + * @moduleid BPM_RC::BPM_BLOCK_WRITE + * @reasoncode BPM_RC::BPM_EXCEEDED_RETRY_LIMIT + * @userdata1[0:63] NVDIMM Target HUID associated with this BPM + * @devdesc The block of data to be written to the BPM + * failed to write successfully in the given number + * of retries. + * @custdesc A problem occurred during IPL of the system. + */ + errl = new ERRORLOG::ErrlEntry(ERRORLOG::ERRL_SEV_PREDICTIVE, + BPM_RC::BPM_BLOCK_WRITE, + BPM_RC::BPM_EXCEEDED_RETRY_LIMIT, + TARGETING::get_huid(iv_nvdimm)); + errl->addPartCallout(iv_nvdimm, + HWAS::BPM_PART_TYPE, + HWAS::SRCI_PRIORITY_HIGH); + errl->collectTrace(BPM_COMP_NAME); + nvdimmAddVendorLog(iv_nvdimm, errl); + + } + + if (errl != nullptr) + { + // Change the state of iv_attemptAnotherUpdate. This will signal + // another update attempt or cease further attempts. + setAttemptAnotherUpdate(); + } + + return errl; +} + +errlHndl_t Bpm::blockWriteRetry(payload_t i_payload) +{ + assert(i_payload[PAYLOAD_COMMAND_INDEX] == BSL_RX_DATA_BLOCK, + "Bpm::blockWriteRetry(): " + "Can only retry BSL_RX_DATA_BLOCK commands"); + + errlHndl_t errl = nullptr; + uint8_t retry = 0; + + // Get the payload address for trace output. + uint16_t payloadAddress = getPayloadAddressBE(i_payload); + + do { + + // Send the payload data over as a pass-through command. The response + // will be checked internally. + errl = issueCommand(BPM_PASSTHROUGH, i_payload, WRITE); + if (errl == nullptr) + { + // Command was a success. Stop retries. + break; + } + + if ( (errl != nullptr) + && (errl->reasonCode() == BPM_RC::BPM_RESPONSE_CRC_MISMATCH) + && ((retry + 1) < MAX_RETRY)) + { + // Delete the retryable error and continue + TRACFCOMP(g_trac_bpm, "Bpm::blockWriteRetry(): " + "Encountered a retryable error. Delete and continue."); + delete errl; + errl = nullptr; + } + else if (errl != nullptr) + { + TRACFCOMP(g_trac_bpm, ERR_MRK"Bpm::blockWriteRetry(): " + "BSL_RX_DATA_BLOCK failed for address 0x%.4X. " + "A non-retryable error occurred on attempt %d/%d", + payloadAddress, + (retry + 1), + MAX_RETRY); + // A non-retryable error occurred. Break from retry loop. + break; + } + + } while (++retry < MAX_RETRY); + if ((errl == nullptr) && (retry >= MAX_RETRY)) + { + TRACFCOMP(g_trac_bpm, ERR_MRK"Bpm::blockWriteRetry(): " + "Failed to write payload data to BPM after %d retries.", + MAX_RETRY); + /*@ + * @errortype + * @severity ERRORLOG::ERRL_SEV_PREDICTIVE + * @moduleid BPM_RC::BPM_RETRY_BLOCK_WRITE + * @reasoncode BPM_RC::BPM_EXCEEDED_RETRY_LIMIT + * @userdata1[0:63] NVDIMM Target HUID associated with this BPM + * @devdesc The block of data to be written to the BPM + * failed to write successfully in the given number + * of retries. + * @custdesc A problem occurred during IPL of the system. + */ + errl = new ERRORLOG::ErrlEntry(ERRORLOG::ERRL_SEV_PREDICTIVE, + BPM_RC::BPM_RETRY_BLOCK_WRITE, + BPM_RC::BPM_EXCEEDED_RETRY_LIMIT, + TARGETING::get_huid(iv_nvdimm)); + errl->addPartCallout(iv_nvdimm, + HWAS::BPM_PART_TYPE, + HWAS::SRCI_PRIORITY_HIGH); + errl->collectTrace(BPM_COMP_NAME); + nvdimmAddPage4Regs(iv_nvdimm,errl); + nvdimmAddVendorLog(iv_nvdimm, errl); + + } + + if (errl != nullptr) + { + // Change the state of iv_attemptAnotherUpdate. This will signal + // another update attempt or cease further attempts. + setAttemptAnotherUpdate(); + } + + return errl; +} + +errlHndl_t Bpm::waitForCommandStatusBitReset( + command_status_register_t i_commandStatus) +{ + errlHndl_t errl = nullptr; + + do { + // Wait until the COMMAND_IN_PROGRESS bit is reset + errl = nvdimmReadReg(iv_nvdimm, + BPM_CMD_STATUS, + i_commandStatus.value); + if (errl != nullptr) + { + errl->collectTrace(BPM_COMP_NAME); + break; + } + + // Give the BPM 20 seconds to complete any given command before we time + // out and cancel the update procedure. + int retry = 20 * MS_PER_SEC; + + while (i_commandStatus.bits.Bsp_Cmd_In_Progress) + { + nanosleep(0, 1 * NS_PER_MSEC); + errl = nvdimmReadReg(iv_nvdimm, + BPM_CMD_STATUS, + i_commandStatus.value); + if (errl != nullptr) + { + TRACFCOMP(g_trac_bpm, "Bpm::waitForCommandStatusBitReset(): " + "Failed to read BPM_CMD_STATUS register"); + errl->collectTrace(BPM_COMP_NAME); + break; + } + + if (--retry <= 0) + { + TRACFCOMP(g_trac_bpm, ERR_MRK + "BPM::waitForCommandStatusBitReset(): " + "BSP_CMD_IN_PROGRESS bit has not reset in allotted " + "number of retries. Cancel update procedure"); + /*@ + * @errortype + * @severity ERRORLOG::ERRL_SEV_PREDICTIVE + * @moduleid BPM_RC::BPM_WAIT_FOR_CMD_BIT_RESET + * @reasoncode BPM_RC::BPM_EXCEEDED_RETRY_LIMIT + * @userdata1[0:63] NVDIMM Target HUID associated with this BPM + * @devdesc The command status bit failed to reset in + * the given number of retries. + * @custdesc A problem occurred during IPL of the system. + */ + errl = new ERRORLOG::ErrlEntry(ERRORLOG::ERRL_SEV_PREDICTIVE, + BPM_RC::BPM_WAIT_FOR_CMD_BIT_RESET, + BPM_RC::BPM_EXCEEDED_RETRY_LIMIT, + TARGETING::get_huid(iv_nvdimm)); + errl->addPartCallout(iv_nvdimm, + HWAS::BPM_PART_TYPE, + HWAS::SRCI_PRIORITY_HIGH); + errl->collectTrace(BPM_COMP_NAME); + nvdimmAddPage4Regs(iv_nvdimm,errl); + nvdimmAddVendorLog(iv_nvdimm, errl); + break; + } + + } + if (errl != nullptr) + { + break; + } + + // Check for error + if (i_commandStatus.bits.Error_Flag) + { + uint8_t error = 0; + errl = nvdimmReadReg(iv_nvdimm, + BPM_REG_ERR_STATUS, + error); + if (errl != nullptr) + { + TRACFCOMP(g_trac_bpm, "Bpm::waitForCommandStatusBitReset(): " + "Failed to read BPM_REG_ERR_STATUS"); + errl->collectTrace(BPM_COMP_NAME); + break; + } + + TRACFCOMP(g_trac_bpm, ERR_MRK"Bpm::waitForCommandStatusBitReset(): " + "BPM_CMD_STATUS Error Flag is set"); + /*@ + * @errortype + * @severity ERRORLOG::ERRL_SEV_PREDICTIVE + * @moduleid BPM_RC::BPM_WAIT_FOR_CMD_BIT_RESET + * @reasoncode BPM_RC::BPM_CMD_STATUS_ERROR_BIT_SET + * @userdata1[0:7] Error status code returned by BPM + * @userdata2[0:63] NVDIMM Target HUID associated with this BPM + * @devdesc The command status register returned an error. + * @custdesc A problem occurred during IPL of the system. + */ + errl = new ERRORLOG::ErrlEntry(ERRORLOG::ERRL_SEV_PREDICTIVE, + BPM_RC::BPM_WAIT_FOR_CMD_BIT_RESET, + BPM_RC::BPM_CMD_STATUS_ERROR_BIT_SET, + error, + TARGETING::get_huid(iv_nvdimm)); + errl->collectTrace(BPM_COMP_NAME); + nvdimmAddPage4Regs(iv_nvdimm,errl); + nvdimmAddVendorLog(iv_nvdimm, errl); + break; + + } + + } while(0); + + return errl; +} + +errlHndl_t Bpm::verifyGoodBpmState() +{ + errlHndl_t errl = nullptr; + int retry = 100; + scap_status_register_t status; + const uint8_t BPM_PRESENT_AND_ENABLED = 0x11; + + while (retry > 0) + { + + errl = nvdimmReadReg(iv_nvdimm, + SCAP_STATUS, + status.full); + if (errl != nullptr) + { + TRACFCOMP(g_trac_bpm, "Bpm::verifyGoodBpmState(): " + "Failed to read SCAP_STATUS to determine " + "state of BPM."); + errl->collectTrace(BPM_COMP_NAME); + break; + } + + if ((status.full & 0xFF) == BPM_PRESENT_AND_ENABLED) + { + // BPM is present and enabled. Stop retries. + break; + } + + --retry; + nanosleep(0, 1 * NS_PER_MSEC); + } + if (retry <= 0) + { + TRACFCOMP(g_trac_bpm, ERR_MRK"Bpm::verifyGoodBpmState(): " + "BPM failed to become present and enabled " + "in 100 retries."); + /*@ + * @errortype + * @severity ERRORLOG::ERRL_SEV_PREDICTIVE + * @moduleid BPM_RC::BPM_VERIFY_GOOD_BPM_STATE + * @reasoncode BPM_RC::BPM_EXCEEDED_RETRY_LIMIT + * @userdata1 NVDIMM Target HUID associated with this BPM + * @userdata2 SCAP_STATUS register contents. See nvdimm.H + * for bits associated with this register. + * @devdesc The BPM did not become present and enabled + * in given number of retries. + * @custdesc A problem occurred during IPL of the system. + */ + errl = new ERRORLOG::ErrlEntry(ERRORLOG::ERRL_SEV_PREDICTIVE, + BPM_RC::BPM_VERIFY_GOOD_BPM_STATE, + BPM_RC::BPM_EXCEEDED_RETRY_LIMIT, + TARGETING::get_huid(iv_nvdimm), + status.full); + errl->collectTrace(BPM_COMP_NAME); + errl->addPartCallout(iv_nvdimm, + HWAS::BPM_PART_TYPE, + HWAS::SRCI_PRIORITY_HIGH); + nvdimmAddPage4Regs(iv_nvdimm,errl); + nvdimmAddVendorLog(iv_nvdimm, errl); + } + + return errl; +} + +errlHndl_t Bpm::waitForBusyBit() +{ + errlHndl_t errl = nullptr; + int retry = 10; + scap_status_register_t status; + + while (retry > 0) + { + + errl = nvdimmReadReg(iv_nvdimm, + SCAP_STATUS, + status.full); + if (errl != nullptr) + { + TRACFCOMP(g_trac_bpm, "Bpm::waitForBusyBit(): " + "Failed to read from SCAP_STATUS to determine " + "state of Busy bit."); + errl->collectTrace(BPM_COMP_NAME); + break; + } + + if (!status.bit.Busy) + { + // SCAP Register is no longer busy. Stop retries. + break; + } + + if (retry <= 0) + { + TRACFCOMP(g_trac_bpm, ERR_MRK"Bpm::waitForBusyBit(): " + "SCAP_STATUS Busy bit failed to reset to 0 " + "in 10 retries."); + /*@ + * @errortype + * @severity ERRORLOG::ERRL_SEV_PREDICTIVE + * @moduleid BPM_RC::BPM_WAIT_FOR_BUSY_BIT_RESET + * @reasoncode BPM_RC::BPM_EXCEEDED_RETRY_LIMIT + * @userdata1[0:63] NVDIMM Target HUID associated with this BPM + * @devdesc The SCAP status register busy bit failed to + * reset in given number of retries. + * @custdesc A problem occurred during IPL of the system. + */ + errl = new ERRORLOG::ErrlEntry(ERRORLOG::ERRL_SEV_PREDICTIVE, + BPM_RC::BPM_WAIT_FOR_BUSY_BIT_RESET, + BPM_RC::BPM_EXCEEDED_RETRY_LIMIT, + TARGETING::get_huid(iv_nvdimm)); + errl->collectTrace(BPM_COMP_NAME); + nvdimmAddPage4Regs(iv_nvdimm,errl); + nvdimmAddVendorLog(iv_nvdimm, errl); + break; + } + + --retry; + nanosleep(0, 2 * NS_PER_MSEC); + } + + return errl; +} + +errlHndl_t Bpm::runConfigUpdates(BpmConfigLidImage i_configImage) +{ + TRACFCOMP(g_trac_bpm, ENTER_MRK"Bpm::runConfigUpdates()"); + errlHndl_t errl = nullptr; + + do { + + // Before the entering BSL mode, we must do preprocessing prior to the + // config part of the update. Segment B needs to be dumped from the + // BPM into a buffer and then the config data from the image needs to be + // inserted into it. To dump segment data, it is required to have + // working firmware which will not be the case during BSL mode. + errl = preprocessSegments(i_configImage); + if (errl != nullptr) + { + break; + } + + // Enter Update mode + errl = enterUpdateMode(); + if (errl != nullptr) + { + break; + } + + // Verify in Update mode + errl = inUpdateMode(); + if (errl != nullptr) + { + break; + } + + // Enter Bootstrap Loader (BSL) mode to perform firmware update + errl = enterBootstrapLoaderMode(); + if (errl != nullptr) + { + break; + } + + // Unlock the device. This is a BSL command so we must already be in + // BSL mode to execute it. + errl = unlockDevice(); + if (errl != nullptr) + { + break; + } + + // Perform the configuration data segment updates. + // As of BSL 1.4 this is done via the BSL interface instead of SCAP + // registers. + errl = updateConfig(); + if (errl != nullptr) + { + // We are returning with an error. Since the error is from the + // config part of the updates it's best to erase the firmware on the + // BPM so that updates will be attempted on it in the future. + // Because there isn't a way to determine the validity of the config + // section on the BPM we're completely reliant on what the firmware + // version reports to decide if we need to update or not. If we see + // that the firmware version matches the image but for some reason + // the config data wasn't updated properly we could believe we + // updated successfully when, in fact, we just left the BPM in a bad + // state. + if ( (iv_firmwareStartAddress == MAIN_PROGRAM_ADDRESS) + || (iv_firmwareStartAddress == MAIN_PROGRAM_ADDRESS_ALT)) + { + payload_t payload; + errlHndl_t fwEraseErrl = setupPayload(payload, + BSL_MASS_ERASE, + iv_firmwareStartAddress); + if (fwEraseErrl != nullptr) + { + handleMultipleErrors(errl, fwEraseErrl); + break; + } + + fwEraseErrl = issueCommand(BPM_PASSTHROUGH, + payload, + WRITE, + ERASE_FIRMWARE_DELAY); + if (fwEraseErrl != nullptr) + { + handleMultipleErrors(errl, fwEraseErrl); + break; + } + + TRACFCOMP(g_trac_bpm, "Bpm::updateFirmware(): " + "Performing BSL_MASS_ERASE on BPM to force full " + "update on any subsequent attempt. Sleep for 5 " + "seconds."); + longSleep(5); + } + break; + } + + } while(0); + + // Reset the device. This will exit BSL mode. + errlHndl_t exitErrl = resetDevice(); + if (exitErrl != nullptr) + { + TRACFCOMP(g_trac_bpm, ERR_MRK"Bpm::runConfigUpdates(): " + "Failed to reset the device"); + handleMultipleErrors(errl, exitErrl); + } + + // Exit update mode + exitErrl = exitUpdateMode(); + if (exitErrl != nullptr) + { + TRACFCOMP(g_trac_bpm, ERR_MRK"Bpm::runConfigUpdates(): " + "Failed to exit update mode"); + handleMultipleErrors(errl, exitErrl); + } + + + return errl; +} + +errlHndl_t Bpm::runFirmwareUpdates(BpmFirmwareLidImage i_image) +{ + TRACFCOMP(g_trac_bpm, ENTER_MRK"Bpm::runFirmwareUpdates()"); + errlHndl_t errl = nullptr; + + do { + + // Enter Update mode + errl = enterUpdateMode(); + if (errl != nullptr) + { + break; + } + + // Verify in Update mode + errl = inUpdateMode(); + if (errl != nullptr) + { + break; + } + + // Enter Bootstrap Loader (BSL) mode to perform firmware update + errl = enterBootstrapLoaderMode(); + if (errl != nullptr) + { + break; + } + + // Unlock the device. This is a BSL command so we must already be in + // BSL mode to execute it. + errl = unlockDevice(); + if (errl != nullptr) + { + break; + } + + // Run Firmware Update + errl = updateFirmware(i_image); + if (errl != nullptr) + { + break; + } + + TRACFCOMP(g_trac_bpm, "Bpm::runFirmwareUpdates(): " + "Perform final CRC check on entire BPM flash to load " + "new firmware."); + + errl = checkFirmwareCrc(); + if (errl != nullptr) + { + setAttemptAnotherUpdate(); + TRACFCOMP(g_trac_bpm, ERR_MRK"Bpm:: runFirmwareUpdates(): " + "Final CRC check failed. %s ", + (iv_attemptAnotherUpdate == false) ? + "Attempt another update..." + : "Attempts to update the BPM have failed. Firmware will not load."); + break; + } + + } while(0); + + // Reset the device. This will exit BSL mode. + errlHndl_t exitErrl = resetDevice(); + if (exitErrl != nullptr) + { + TRACFCOMP(g_trac_bpm, ERR_MRK"Bpm::runFirmwareUpdates(): " + "Failed to reset the device"); + handleMultipleErrors(errl, exitErrl); + } + + // Exit update mode + exitErrl = exitUpdateMode(); + if (exitErrl != nullptr) + { + TRACFCOMP(g_trac_bpm, ERR_MRK"Bpm::runFirmwareUpdates(): " + "Failed to exit update mode"); + handleMultipleErrors(errl, exitErrl); + } + + return errl; +} + +errlHndl_t Bpm::checkFirmwareCrc() +{ + TRACFCOMP(g_trac_bpm, ENTER_MRK"Bpm::checkFirmwareCrc()"); + errlHndl_t errl = nullptr; + + // The COMMAND_CRC_CHECK would return a 3 byte response in the following + // format: + // + // ======================================================================== + // [Status Code] [Computed_CRC_Lo] [Computed_CRC_Hi] + // ======================================================================== + // BSL_LOCKED 0x00 0x00 + // PARAMETER_ERROR 0x00 0x00 + // MAIN_FW_NOT_SUPPORT_CRC_CHECK 0x00 0x00 + // MEMORY_WRITE_CHECK_FAILED CRC_Low CRC_Hi + // WRITE_FORBIDDEN CRC_Low CRC_Hi + // VERIFY_MISMATCH CRC_Low CRC_Hi + // SUCCESSFUL_OPERATION CRC_Low CRC_Hi + // + // For status codes BSL_LOCKED, PARAMETER_ERROR, and + // MAIN_FW_NOT_SUPPORT_CRC_CHECK the response CRC values are considered + // as DONT CARE. + // + // For the remainder of the status codes the CRC values are the + // computed CRC of the image. + // + // For SUCCESSFUL_OPERATION, the RESET_VECTOR was written. + // See bpm_update.H for more info on the status codes + const uint8_t CRC_CHECK_RESPONSE_SIZE = 3; + uint8_t responseData[CRC_CHECK_RESPONSE_SIZE] = {0}; + + do { + + TRACFCOMP(g_trac_bpm, "Bpm::checkFirmwareCrc(): " + "Performing final CRC check."); + payload_t crcPayload; + errl = setupPayload(crcPayload, + BSL_CRC_CHECK, + iv_firmwareStartAddress); + if (errl != nullptr) + { + break; + } + + errl = issueCommand(BPM_PASSTHROUGH, + crcPayload, + WRITE, + NO_DELAY_EXTERNAL_RESPONSE); + if (errl != nullptr) + { + break; + } + + // Wait 10 seconds for the CRC check to complete. + TRACFCOMP(g_trac_bpm, "Bpm::checkFirmwareCrc(): " + "Allow CRC check to complete on BPM by waiting 10 seconds."); + longSleep(10); + + errl = getResponse(responseData, CRC_CHECK_RESPONSE_SIZE); + if (errl != nullptr) + { + break; + } + + TRACFCOMP(g_trac_bpm, "Bpm::checkFirmwareCrc(): " + "Response Packet CRC check status = 0x%X, CRC_Low = 0x%X, " + "CRC_Hi = 0x%X", + responseData[0], + responseData[1], + responseData[2]); + + if (responseData[0] != SUCCESSFUL_OPERATION) + { + /*@ + * @errortype + * @severity ERRORLOG::ERRL_SEV_PREDICTIVE + * @moduleid BPM_RC::BPM_CHECK_FIRMWARE_CRC + * @reasoncode BPM_RC::BPM_FIRMWARE_CRC_VERIFY_FAILURE + * @userdata1[0:7] CRC check response status code. See bpm_update.H + * @userdata1[8:15] CRC low byte + * @userdata1[16:23] CRC high byte + * @userdata2[0:63] NVDIMM Target HUID associated with this BPM + * @devdesc The firmware CRC check failed. Cross check the + * CRC check response status code for more details. + * @custdesc A problem occurred during IPL of the system. + */ + errl = new ERRORLOG::ErrlEntry(ERRORLOG::ERRL_SEV_PREDICTIVE, + BPM_RC::BPM_CHECK_FIRMWARE_CRC, + BPM_RC::BPM_FIRMWARE_CRC_VERIFY_FAILURE, + FOUR_UINT8_TO_UINT32(responseData[0], + responseData[1], + responseData[2], + 0), + TARGETING::get_huid(iv_nvdimm)); + nvdimmAddPage4Regs(iv_nvdimm,errl); + nvdimmAddVendorLog(iv_nvdimm, errl); + break; + } + + } while(0); + + if (errl != nullptr) + { + TRACFCOMP(g_trac_bpm, ERR_MRK"Bpm::checkFirmwareCrc(): " + "Error occurred during BPM Firmware CRC check. " + "Firmware image will not load on BPM and update must be " + "attempted again."); + errl->collectTrace(BPM_COMP_NAME); + } + + return errl; +} + +/** + * @brief Helper function to handle two potential errors that might occur in a + * function that only returns a single error log. If the return error is + * not nullptr then the second error will be linked to it and committed + * if this is the final update attempt. Otherwise, it will be deleted + * since the update procedure will occur again and may be successful. + * If the return error is nullptr then the return error will point to + * the second's error and the second error will point to nullptr. + * + * @param[in/out] io_returnErrl A pointer to the error that would be + * returned by the function that called + * this one. If nullptr, then it will be + * set point to the secondary error and + * that error will become nullptr. + * + * @param[in/out] io_secondErrl The secondary error that occurred which + * in addition to the usual returned error. + */ +void Bpm::handleMultipleErrors(errlHndl_t& io_returnErrl, + errlHndl_t& io_secondErrl) +{ + if (iv_updateAttempted && (io_returnErrl != nullptr)) + { + io_secondErrl->plid(io_returnErrl->plid()); + TRACFCOMP(g_trac_bpm, "Committing second error eid=0x%X with plid of " + "returned error: 0x%X", + io_secondErrl->eid(), + io_returnErrl->plid()); + io_secondErrl->collectTrace(BPM_COMP_NAME); + io_secondErrl->addPartCallout(iv_nvdimm, + HWAS::BPM_PART_TYPE, + HWAS::SRCI_PRIORITY_HIGH); + ERRORLOG::errlCommit(io_secondErrl, BPM_COMP_ID); + } + else if (io_returnErrl == nullptr) + { + io_returnErrl = io_secondErrl; + io_secondErrl = nullptr; + } + else + { + // Another update attempt will be made, delete this secondary error. + delete io_secondErrl; + io_secondErrl = nullptr; + } +} + +uint16_t Bpm::crc16_calc(const void* i_ptr, int i_size) +{ + uint16_t crc = 0xFFFF; + const uint8_t* data = reinterpret_cast<const uint8_t*>(i_ptr); + + while (--i_size >= 0) + { + crc = crc ^ *(data++) << 8; + for (size_t i = 0; i < 8; ++i) + { + if (crc & 0x8000) + { + crc = crc << 1 ^ 0x1021; + } + else + { + crc = crc << 1; + } + } + } + + return (crc & 0xFFFF); +} + +}; // End of BPM namespace +}; // End of NVDIMM namespace diff --git a/src/usr/isteps/nvdimm/bpm_update.H b/src/usr/isteps/nvdimm/bpm_update.H new file mode 100644 index 000000000..4886a8abd --- /dev/null +++ b/src/usr/isteps/nvdimm/bpm_update.H @@ -0,0 +1,1078 @@ +/* IBM_PROLOG_BEGIN_TAG */ +/* This is an automatically generated prolog. */ +/* */ +/* $Source: src/usr/isteps/nvdimm/bpm_update.H $ */ +/* */ +/* OpenPOWER HostBoot Project */ +/* */ +/* Contributors Listed Below - COPYRIGHT 2019 */ +/* [+] International Business Machines Corp. */ +/* */ +/* */ +/* Licensed under the Apache License, Version 2.0 (the "License"); */ +/* you may not use this file except in compliance with the License. */ +/* You may obtain a copy of the License at */ +/* */ +/* http://www.apache.org/licenses/LICENSE-2.0 */ +/* */ +/* Unless required by applicable law or agreed to in writing, software */ +/* distributed under the License is distributed on an "AS IS" BASIS, */ +/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */ +/* implied. See the License for the specific language governing */ +/* permissions and limitations under the License. */ +/* */ +/* IBM_PROLOG_END_TAG */ + +#ifndef BPM_UPDATE_H +#define BPM_UPDATE_H + +/* @file bpm_update.H + * + * + */ + +#include <vector> +#include <errl/errlentry.H> +#include <targeting/common/util.H> + +namespace NVDIMM +{ +namespace BPM +{ + +/* + * All of the various commands used for the BPM update. All commands can only be + * sent after write protection on the BPM has been disabled and the update magic + * values have been written to the BPM's magic registers. + * + * BSL: Bootstrap Loader commands + * BPM: Backup Power Module +*/ +enum COMMAND : uint8_t +{ + // A payload sent with this command will be interpreted and processed by the + // NVDIMM module. + BPM_LOCAL = 0xFF, + + /* + * These are LOCAL commands. These commands MUST be sent only outside of BSL + * BSL mode and must be paired with the BSP command BPM_LOCAL. Otherwise, + * unpredicatable errors will occur. + * + * When using issueCommand() for these commands they should always be sent + * with a 0 ms delay. This will ensure that the response packet is not + * checked from the BPM. Since these are processed by the NVDIMM it makes + * no sense to get a response from the BPM and attempting to do so will + * cause errors. + */ + BCL_ENTER_BSL_MODE = 0x01, + BCL_IS_BSL_MODE = 0x02, + BCL_WRITE_REG = 0x03, + BCL_START_UPDATE = 0x04, + BCL_END_UPDATE = 0x05, + BCL_IS_UPDATE_IN_PROGRESS = 0x06, + + // A payload sent with this command will be sent transparently to the BPM. + // This command must only be used while the BPM is in BSL mode. + BPM_PASSTHROUGH = 0xFE, + + /* + * These are PASSTHROUGH commands. These commands MUST be sent only while in + * BSL mode and must be paired with BSP command BPM_PASSTHROUGH. Otherwise, + * unpredicatable errors will occur. + */ + // Writes a block of data to the BPM. + // Delay 1ms (default) + BSL_RX_DATA_BLOCK = 0x10, + // Unlocks the BPM that is in BSL mode so that updates can occur. + // Delay 1ms (default) + BSL_RX_PASSWORD = 0x11, + // Erases 128 bytes at the given address offset. + // WARNING: Due to BSL memory limitations, BSL cannot verify the address + // is a valid config segment address offset and will blindly + // erase 128 bytes of data starting at that offset. If an invalid + // address is sent then the BPM will be bricked in a very + // unpredicatable/unrecoverable way. + // Delay 250ms + BSL_ERASE_SEGMENT = 0x12, + // Unknown, unused. + // Delay 0ms + BSL_TOGGLE_INFO = 0x13, + // Unknown, unused. + // Delay 1ms (default) + BSL_ERASE_BLOCK = 0x14, + // Erases the full firmware section on the BPM. The start of the firmware + // address must be supplied with this command. + // WARNING: Due to BSL memory limitations, BSL cannot verify the address + // is a valid firmware address offset and will blindly erase 128 + // data starting at that offset. If an invalid address is sent + // then the BPM will be bricked in a very + // unpredicatable/unrecoverable way. + // Delay 250ms + BSL_MASS_ERASE = 0x15, + // Sends the command to the BPM to perform the final CRC check on the + // firmware written to the BPM. If the CRC check doesn't match the expected + // CRC in the flash image then the firmware will not load on the BPM and it + // will remain in BSL mode until new firmware is loaded onto it. + // + // Delay 0 + // The response packet must be checked externally from the issueCommand() + // function because the response packet returned from this command is unique + // to this command and will return the results of this command. For more + // info see the checkFirmwareCrc() function description, implementation, and + // the COMMAND_BSL_CRC_CHECK_RESPONSE_CODES enum. + BSL_CRC_CHECK = 0x16, + // Unknown, unused. + // Delay 1ms (default) + BSL_LOAD_PC = 0x17, + // Unknown, unused. + // Delay 1ms (default) + BSL_TX_DATA_BLOCK = 0x18, + // Checks the Bootstrap Loader mode version on the BPM. Depending on this + // version, some parts of the update procedure may have changed. + // + // Delay 0ms + // The response packet must be checked externally from the issueCommand() + // function because the response packet returned from this command is unique + // to this command and will return the results of this command. For more + // info see the readBslVersion() function description and implementation. + BSL_TX_BSL_VERSION = 0x19, + // Unknown, unused. + // Delay 1ms (default) + BSL_TX_BUFFER_SIZE = 0x1A, + // Unknown, unused. + // Delay 1ms (default) + BSL_RX_DATA_BLOCK_FAST = 0x1B, + // Resets the BPM and exits BSL mode. + // Delay 0ms + // Never check for a response packet from the BPM after sending the reset + // command because the BPM may not be back up and if it is it will not be + // in BSL mode anymore. If the response packet is checked then errors will + // occur. + BSL_RESET_DEVICE = 0x1C, + // Verifies the block of data written to the BPM flash is identical to what + // was sent to it in a prior write. For more information see the + // verifyBlockWrite() description and implementation. + // + // Delay 0ms + // The response packet must be checked externally from the issueCommand() + // function because the response packet returned from this command is unique + // to this command and will return the results of this command. + BSL_VERIFY_BLOCK = 0x1D, +}; + +// These consts serve as reminders in the code for what was explained in the +// COMMAND enum. +const int NO_DELAY_NO_RESPONSE = 0; +const int NO_DELAY_EXTERNAL_RESPONSE = 0; +const int ERASE_SEGMENT_DELAY = 250; +const int ERASE_FIRMWARE_DELAY = 250; + +// These are the various response codes returned by the BPM after the +// BSL_CRC_CHECK command is sent at the end of the update procedure. +enum COMMAND_BSL_CRC_CHECK_RESPONSE_CODES : uint16_t +{ + // The updated firmware is set up with all necessary loading parameters to + // load and execute upon reset. + SUCCESSFUL_OPERATION = 0x00, + + // Error setting up the necessary loading parameters for the updated + // firmware image. + MEMORY_WRITE_CHECK_FAILED = 0x01, + + // The command was attempted without unlocking the BSL with the password. + BSL_LOCKED = 0x04, + + // Error setting up the necessary loading parameters for the updated + // firmware image. + WRITE_FORBIDDEN = 0x06, + + // The checksum validation of the updated firmware image failed. The + // calculated checksum doesn't match the checksum data provided @FF7A in the + // firmware image file. + VERIFY_MISMATCH = 0x09, + + // The firmware image start address given for the command is wrong. + PARAMETER_ERROR = 0x0A, + + // Firmware image file used for the update doesn't hae the checksum data + // defined @FF7A + MAIN_FW_NOT_SUPPORT_CRC_CHECK = 0x0B, +}; + +// BSL versions that this code supports. +const uint8_t BSL_VERSION_1_4 = 0x14; + +// The operator types for the BPM_CMD_STATUS register +enum COMMAND_STATUS_REGISTER_OP_TYPES : uint8_t +{ + NOP = 0x00, + READ = 0x01, + WRITE = 0x02, + NO_TRASFER = 0x03, +}; + +// Used to overlay onto the LID image +struct firmware_image_block +{ + // The block size is the sizeof(iv_addressOffset) plus sizeof(iv_data). + uint8_t iv_blockSize; + + // The address offset where the first byte in iv_data came from in the + // firmware image. + uint16_t iv_addressOffset; + + // A variable sized array of firmware data. The size of which is always + // iv_blockSize - sizeof(iv_addressOffset) and the max this can be is + // MAX_PAYLOAD_SIZE. + char iv_data[0]; + +} PACKED; + +typedef firmware_image_block firmware_image_block_t; + + +// Used to overlay onto the LID image +struct config_image_fragment +{ + // The fragment size is the size of iv_data. + uint8_t iv_fragmentSize; + + // The offset where the first byte in iv_data should begin overwritting the + // BPM config data in the BPM configuration segment dump buffer. + uint16_t iv_offset; + + // A variable sized array of config segment data. + char iv_data[0]; + +} PACKED; + +typedef config_image_fragment config_image_fragment_t; + + +/* Max payload size is 26 bytes + * 4 bytes: header + * 1 byte: sync byte + * 1 byte: command + * 1 byte: header size + data size + * 1 byte: header size + data size + * 2 bytes: address + * 2 bytes: extra + * 16 bytes: data + * 2 bytes: CRC + */ +constexpr size_t MAX_PAYLOAD_SIZE = 26; + +// Max number of bytes data section of payload can be. +constexpr size_t MAX_PAYLOAD_DATA_SIZE = 16; + +// Number of bytes for header, address, extra, and CRC +constexpr size_t MAX_PAYLOAD_OTHER_DATA_SIZE = 10; + +// Number of bytes for the header. +constexpr uint8_t PAYLOAD_HEADER_SIZE = 4; + +// Indices of where to find certain data within a constructed payload. +// These indices have been subtracted by 1 from the given payload format because +// after a payload is constructed the sync byte is removed from the front. +constexpr uint8_t PAYLOAD_COMMAND_INDEX = 0; +constexpr uint8_t PAYLOAD_ADDRESS_START_INDEX = 3; +constexpr uint8_t PAYLOAD_DATA_START_INDEX = 7; +constexpr uint8_t PAYLOAD_HEADER_DATA_LENGTH_INDEX = 1; + +// The sync byte that must always be at the front of a BPM payload. This is used +// calculate the CRC of the payload and then removed because the nvdimm +// automatically sends the sync byte ahead of the payload. +constexpr uint8_t SYNC_BYTE = 0x80; +constexpr uint8_t SYNC_BYTE_SIZE = sizeof(uint8_t); + +// Maximum size of any segment in the config data section +constexpr size_t SEGMENT_SIZE = 128; + +// Maximum size of the config data section. +constexpr size_t ALL_SEGMENTS_SIZE = 512; + +// Number of magic registers for the BPM +constexpr size_t NUM_MAGIC_REGISTERS = 2; + +// These are the production magic values for the BPM that should be written in +// BPM_MAGIC_REG1 and BPM_MAGIC_REG2 respectively. +const uint8_t PRODUCTION_MAGIC_VALUES[NUM_MAGIC_REGISTERS] = {0x55, 0xAA}; +// These magic values to enable nvdimm-bpm interface. They must be written to +// the magic registers BEFORE writing flash updates to the BPM in BSL mode. +const uint8_t UPDATE_MODE_MAGIC_VALUES[NUM_MAGIC_REGISTERS] = {0xB0, 0xDA}; +// These are the segment read magic values that allow dumping of the segment +// data from the BPM. +const uint8_t SEGMENT_READ_MAGIC_VALUES[NUM_MAGIC_REGISTERS] = {0xBA, 0xAB}; + +typedef std::vector<uint8_t> payload_t; + + +/** + * @brief BPM_CMD_STATUS register bits + */ +struct command_status_register_bits +{ + uint8_t Abort_Request : 1; // Bit 7 + uint8_t Abort_Acknowledge : 1; // Bit 6 + uint8_t Reserved1 : 1; // Bit 5 + uint8_t Reserved2 : 1; // Bit 4 + uint8_t Error_Flag : 1; // Bit 3 + uint8_t Bsp_Cmd_In_Progress : 1; // Bit 2 + uint8_t Operator_Type : 2; // Bit 1-0 +} PACKED; + +/** + * @brief Union simplifying manipulation of REG_CMD_STATUS value + */ +union command_status_register_union +{ + uint8_t value; + command_status_register_bits bits; + + /** + * @brief Constructor + */ + command_status_register_union() + : value(0) + {} + +} PACKED; + +typedef command_status_register_union command_status_register_t; + +class BpmFirmwareLidImage +{ +public: + + /** + * @brief Constructor that sets access to LID information + * + * @param[in] i_lidImageAddr virtual address where LID was loaded + * @param[in] i_size size of the loaded LID + */ + BpmFirmwareLidImage(void * const i_lidImageAddr, size_t i_size); + + /** + * @brief Returns the version of the firmware binary as a uint16_t + * + * @return uint16_t version of the firmware image as MMmm. + * MM = major version, mm = minor. + */ + uint16_t getVersion() const; + + /** + * @brief Returns the number of blocks in the LID image. + * + */ + uint16_t getNumberOfBlocks() const; + + /** + * @brief Returns a pointer to the first block in LID image. + */ + void const * getFirstBlock() const; + + /* Layout of the BPM Firmware image + * Byte 1: Major version number (MM) + * Byte 2: Minor version number (mm) + * Byte 3-4: N number of blocks in the file (NN NN) + * Byte 5-EOF: Blocks of the form: + * BLOCK_SIZE Byte 1: X number of bytes in block excluding + * this byte. (XX) + * ADDRESS_OFFSET Byte 2-3: Original address offset of the + * first data byte. (AD DR) + * DATA_BYTES Byte 4-X: Firmware data bytes (DD) + * + * Example file: + * 01 03 00 01 06 80 00 6a 14 31 80 + * MM mm NN NN XX AD DR DD DD DD DD + */ + typedef struct firmware_image_header + { + uint8_t iv_versionMajor; + uint8_t iv_versionMinor; + uint16_t iv_numberOfBlocks; + } firmware_image_header_t; + +private: + + // Pointer to the LID image allocated outside of the class + void * const iv_lidImage; + + // The size of the LID image. + size_t iv_lidImageSize; +}; + + +class BpmConfigLidImage +{ +public: + + /** + * @brief Constructor that sets access to LID information + * + * @param[in] i_lidImageAddr virtual address where LID was loaded + * @param[in] i_size size of the loaded LID + */ + BpmConfigLidImage(void * const i_lidImageAddr, size_t i_size); + + /** + * @brief Returns the version of the config binary as a uint16_t. There isn't + * a way to check the version of the config data on the BPM but the + * config binary still has the version of the flash image it + * originally came from. + * + * @return uint16_t version of the firmware image as MMmm. + * MM = major version, mm = minor. + */ + uint16_t getVersion() const; + + /** + * @brief Returns the number of fragments in the LID image. + * + */ + uint16_t getNumberOfFragments() const; + + /** + * @brief Returns a pointer to the first fragment in LID image. + */ + void const * getFirstFragment() const; + + /* The binary will be organized in the following way: + * Byte 1: Major version number (MM) + * Byte 2: Minor version number (mm) + * Byte 3: N number of fragments in the file (NN) + * Byte 4-EOF: Fragments of the form: + * FRAGMENT_SIZE Byte 1: X number of bytes in fragment data + * section. (XX) + * INDEX_OFFSET Byte 2-3: Each BPM's config section is unique + * to itself. So, during the update + * the contents of a BPM's config data + * will be dumped into a buffer. + * These two bytes will be used as an + * offset into that buffer from which + * overwritting will take place. + * (IN DX) + * DATA_BYTES Byte 4-X: Fragment data bytes to be written + * at the INDEX_OFFSET in the dumped + * config data buffer. (DD) + * + * Example file output: + * 01 05 01 04 01 28 6a 14 31 80 + * MM mm NN XX IN DX DD DD DD DD + */ + typedef struct config_image_header + { + uint8_t iv_versionMajor; + uint8_t iv_versionMinor; + uint16_t iv_numberOfFragments; + } config_image_header_t; + +private: + + // Pointer to the LID image allocated outside of the class + void * const iv_lidImage; + + // The size of the LID image. + size_t iv_lidImageSize; +}; + +class Bpm +{ + /* + * The Bpm can either be in Bootstrap Loader (BSL) mode or not. Many of + * member functions utilize BSL mode for the update procedure and must + * therefore be in BSL mode to succeed. Other functions perform operations + * that will not work in BSL mode since that mode is strictly for updating + * the device and turns of some functionality while in that mode. The "mode" + * the BPM must be in is given in the function brief description. + */ +public: + + + explicit Bpm(const TARGETING::TargetHandle_t i_nvdimm); + + // Force User to supply a nvdimm target. + Bpm() = delete; + + /** + * @brief Runs the BPM firmware update using the given image. + * + * @param[in] i_image The BPM firmware image. + * + * @return errlHndl_t nullptr on success. Otherwise, pointer to an + * errlEntry. + */ + errlHndl_t runUpdate(BpmFirmwareLidImage i_fwImage, + BpmConfigLidImage i_configImage); + + /** + * @brief At most, one full update retry should occur in some + * circumstances. If one of those occurances happens then the + * member iv_attemptAnotherUpdate will be set to true. Otherwise, it + * will remain false. + * + * @return bool true if another update should be attempted. + * Otherwise, false. + */ + bool attemptAnotherUpdate(); + + /** + * @brief Returns if an update has been attempted on this BPM. + * + * @return bool true if an update has been attempted before. + * Otherwise, false. + */ + bool hasAttemptedUpdate(); + + /** + * @brief returns the nvdimm that is associated with this BPM. + */ + const TARGETING::TargetHandle_t getNvdimm(); + +private: + + // The nvdimm whose battery firmware will be updated. + const TARGETING::TargetHandle_t iv_nvdimm; + + // The Bootstrap Loader version of the BPM + uint8_t iv_bslVersion; + + // The firmware address for the BPM image can be either 0x8000 or 0xA000. + // This member will keep track of which one it is. + uint16_t iv_firmwareStartAddress; + + // Keeps track of if the update should be attempted again. + bool iv_attemptAnotherUpdate; + + // Buffers for the segment data in case another update attempt is needed. + // If the first update fails there won't be any running firmware on the + // device which is required to dump the segment data. + uint8_t iv_segmentD[SEGMENT_SIZE]; + uint8_t iv_segmentB[SEGMENT_SIZE]; + + // Keeps track if the segments have been merged with the flash image data + // yet. + bool iv_segmentDMerged; + bool iv_segmentBMerged; + + // Keeps track of if an update has been attempted at least once. + bool iv_updateAttempted; + + /** + * @brief Determines if another update attempt should occur for this BPM. + */ + void setAttemptAnotherUpdate(); + + /** + * @brief Gets the BSL version from the BPM and sets the iv_bslVersion + * member. Only needs to be called once. + * + * @return errlHndl_t nullptr on success. Otherwise, pointer to an + * errlEntry. + */ + errlHndl_t readBslVersion(); + + /** + * @brief Gets the Firmware version from the BPM + * + * @param[out] o_fwVersion The firmware version currently on the BPM. + * + * @return errlHndl_t nullptr on success. Otherwise, pointer to an + * errlEntry. + */ + errlHndl_t getFwVersion(uint16_t & o_fwVersion) const; + + /** + * @brief This function issues a command to the BPM using a payload as the + * means of sending the command. + * + * @param[in] i_command The BSP command to send to the BPM. + * @param[in] i_payload The payload to write to the + * BPM_REG_PAYLOAD_START register. + * @param[in] i_opType The operation type of the command. Must be one + * of the COMMAND_STATUS_REGISTER_OP_TYPES + * + * @param[in] i_msDelay How long to wait before the response from the + * BPM should be checked. Default 1 ms. If a delay + * of 0 ms is given then the response will not be + * read and it is the caller's responsibilty to + * check the response status. See COMMAND enum for + * required delays. + * + * @return errlHndl_t nullptr on success. Otherwise, pointer to an + * errlEntry. + */ + errlHndl_t issueCommand(uint8_t i_command, + payload_t i_payload, + uint8_t i_opType, + int i_msDelay = 1); + + /** + * @brief This function issues a BSP command to the BPM by setting up a + * payload containing only that command and then calling the + * issueCommand function that accepts a payload as an argument. + * + * NOTE: Since the BSP command is not a BSL command, it doesn't need + * to be formatted as a BSL payload but it still must be written to + * the BPM_REG_PAYLOAD_START register. + * + * @param[in] i_bspCommand The BSP command to send to the BPM. + * @param[in] i_command The BCL command to be written to the + * BPM_REG_PAYLOAD_START register. Must be one + * of the BCL_ commands. + * @param[in] i_opType The operation type of the BSP command. Must + * be a COMMAND_STATUS_REGISTER_OP_TYPES + * + * @param[in] i_msDelay How long to wait before the response from the + * BPM should be checked. Default 1 ms. If a delay + * of 0 ms is given then the response will not be + * read and it is the caller's responsibilty to + * check the response status. See COMMAND enum for + * required delays. + * + * @return errlHndl_t nullptr on success. Otherwise, pointer to an + * errlEntry. + */ + errlHndl_t issueCommand(uint8_t i_bspCommand, + uint8_t i_command, + uint8_t i_opType, + int i_msDelay = 1); + + /** + * @brief This function checks if the BPM has entered update mode + * + * @return errlHndl_t nullptr on success. + * Otherwise, pointer to an errlEntry. + */ + errlHndl_t inUpdateMode(); + + /** + * @brief Send the command to the BPM to enter update mode + * + * @return errlHndl_t nullptr if no errors occurred during command + * execution. Otherwise, pointer to an errlEntry. + */ + errlHndl_t enterUpdateMode(); + + /** + * @brief Send the command to the BPM to exit update mode + * + * @return errlHndl_t nullptr if no errors occurred during command + * execution. Otherwise, pointer to an errlEntry. + */ + errlHndl_t exitUpdateMode(); + + /** + * @brief Executes the firmware portion of the BPM update. + * + * @param[in] i_image The BPM firmware LID image to apply to the BPM. + * + * @return errlHndl_t nullptr if no errors occurred. + * Otherwise, pointer to an errlEntry. + */ + errlHndl_t updateFirmware(BpmFirmwareLidImage i_image); + + /** + * @brief Helper function that executes the firmware portion of the BPM + * update by calling all necessary functions in order. + * + * @param[in] i_image The BPM firmware LID image to apply to the BPM. + * + * @return errlHndl_t nullptr if no errors occurred. + * Otherwise, pointer to an errlEntry. + */ + errlHndl_t runFirmwareUpdates(BpmFirmwareLidImage i_image); + + /** + * @brief Executes the config portion of the BPM update. + * + * @return errlHndl_t nullptr on success. Otherwise, an Error. + */ + errlHndl_t updateConfig(); + + /** + * @brief Helper function that executes the config portion of the BPM + * update by calling all necessary functions in order. + * + * @param[in] i_image The BPM config LID image to apply to the BPM. + * + * @return errlHndl_t nullptr on success. Otherwise, an Error. + */ + errlHndl_t runConfigUpdates(BpmConfigLidImage i_image); + + /** + * @brief Commands the BPM to enter BSL mode to allow for BSL commands to be + * executed. + * + * @return errlHndl_t nullptr on success. Otherwise, pointer to an + * errlEntry. + */ + errlHndl_t enterBootstrapLoaderMode(); + + /** + * @brief Creates a valid BSL payload given a firmware_image_block_t. + * + * @param[out] o_payload The BSL payload + * @param[in] i_block A pointer to a firmware image block. + * @param[in] i_command The BSL command to be included with the payload + * + * @return errlHndl_t nullptr on success. Otherwise, pointer to an + * errlEntry. + */ + errlHndl_t setupPayload(payload_t & o_payload, + const firmware_image_block_t * i_block, + uint8_t i_command); + + /** + * @brief Creates a valid BSL payload given a BSL command, address, and + * optionally data to include with the command. This function is used + * to create firmware_image_block_t objects which are then passed + * onto the version of setupPayload that turns them into payloads. + * + * @param[out] o_payload The BSL payload + * @param[in] i_command The BSL command to be included with the payload + * @param[in] i_address The address to execute the command from. This + * will be zero or the address to execute the + * command from. + * @param[in] i_data The array of data to be included with the BSL + * command. Default nullptr. + * @param[in] i_length Length of the i_data array parameter. Default 0. + * + * @return errlHndl_t nullptr on success. Otherwise, pointer to an + * errlEntry. + */ + errlHndl_t setupPayload(payload_t & o_payload, + uint8_t i_command, + uint16_t i_address, + const uint8_t i_data[] = nullptr, + size_t i_length = 0); + + /** + * @brief This function unlocks the BPM. + * + * @return errlHndl_t nullptr on success. Otherwise, pointer to an + * errlEntry. + */ + errlHndl_t unlockDevice(); + + /** + * @brief This function will send the command to reset the BPM. This will + * exit BSL mode if the BPM was in that mode. + * + * @return errlHndl_t nullptr on success. Otherwise, pointer to an + * errlEntry. + */ + errlHndl_t resetDevice(); + + /** + * @brief Write to the BPM register via the SCAP registers + * + * @param[in] i_reg The BPM register to write to. + * + * @param[in] i_data The data to write to the given register. + * + * @return errlHndl_t nullptr on success. Otherwise, an error. + */ + errlHndl_t writeViaScapRegister(uint8_t i_reg, uint8_t i_data); + + /** + * @brief Reads the BPM register via the SCAP registers + * + * @param[in] i_reg The BPM register to read from. + * + * @param[in/out] io_data The data that was in the given register. + * + * @return errlHndl_t nullptr on success. Otherwise, an error. + */ + errlHndl_t readViaScapRegister(uint8_t i_reg, uint8_t & io_data); + + /** + * @brief Disables write protection on the BPM by sending the password + * sequence to I2C_REG_PROTECT + * + * @return errlHndl_t nullptr on success. Otherwise, an error. + */ + errlHndl_t disableWriteProtection(); + + /** + * @brief Many operations performed on the BPM require the magic registers + * to have specific values written in them. This function acts as a + * helper to facilitate that process. + * + * NOTE: Write protection on the BPM must be disabled, otherwise + * this function will fail. + * + * @param[in] i_magicValues The pair of magic values to be written to + * BPM_MAGIC_REG1 and BPM_MAGIC_REG2 + * respectively. + * + * @return errlHndl_t nullptr on success. Otherwise, an error. + */ + errlHndl_t writeToMagicRegisters( + uint8_t const (&i_magicValues)[NUM_MAGIC_REGISTERS]); + + /** + * @brief Switches the page on the BPM to the given page. This function + * must be executed only after the segment read magic values have + * been written to the BPM's magic registers. + * + * @param[in] i_segmentCode The segment code that corresponds to the + * page to switch to on the BPM. + * + * @return errlHndl_t nullptr on success. Otherwise, an error + * + */ + errlHndl_t switchBpmPage(uint16_t i_segmentCode); + + /** + * @brief Dumps the given segment data from the BPM. CANNOT be in BSL mode. + * + * @param[in] i_segmentCode The segment code that corresponds to the + * segment to dump from the BPM. + * + * @param[out] o_buffer A pointer to the buffer to fill with segment + * data. Must be SEGMENT_SIZE in size. + * + * @return errlHndl_t nullptr on success. Otherwise, an error + * + */ + errlHndl_t dumpSegment(uint16_t i_segmentCode, + uint8_t (&o_buffer)[SEGMENT_SIZE]); + + /** + * @brief Merges the segment data dumped from the BPM with the segment data + * fragments present in the BpmConfigLidImage that correspond to the + * given segment code. + * + * @param[in] i_configImage The image that holds the fragments of + * segment data. + * + * @param[in] i_segmentCode The segment code that corresponds to the + * segment to dump from the BPM. + * + * @param[out] o_buffer The merged segment data for the BPM. + * Must be SEGMENT_SIZE in length. + * + * @return errlHndl_t nullptr on success. Otherwise, an error. + */ + errlHndl_t mergeSegment(BpmConfigLidImage i_configImage, + uint16_t i_segmentCode, + uint8_t (&o_buffer)[SEGMENT_SIZE]); + + /** + * @brief Commands the BPM to erase the segment data on the BPM using the + * given segment code to tell it which to erase. + * The BPM must be in BSL mode for this function to work. + * + * @param[in] i_segmentCode The segment from the config data section to + * erase. + * + * @return errlHndl_t nullptr on success. Otherwise, an error. + */ + errlHndl_t eraseSegment(uint16_t i_segmentCode); + + /** + * @brief Writes the segment data from the buffer to the BPM using the + * given segment code to determine which segment the data belongs + * to. The BPM must be in BSL mode for this function to work. + * + * @param[in] i_buffer The segment data to write to the BPM. + * + * @param[in] i_segmentCode The segment from the config data section the + * data belongs to. + * + * @return errlHndl_t nullptr on success. Otherwise, an error. + */ + errlHndl_t writeSegment(uint8_t const (&i_buffer)[SEGMENT_SIZE], + uint16_t i_segmentCode); + + /** + * @brief Dumps segment D and B data from the BPM and merges it with the + * data from the config image to create the unique updated segments + * for this BPM. The BPM CANNOT be in BSL mode for this function to + * work because the data is dumped using SCAP registers. There must + * also be working firmware on the device otherwise this will fail. + * + * @param[in] i_configImage The config image that has the fragments to + * merge into the BPM's existing segment data. + * + * @return errlHndl_t nullptr on success. Otherwise, an error. + */ + errlHndl_t preprocessSegments(BpmConfigLidImage i_configImage); + + /** + * @brief Verifies that the data written into the flash on the BPM is what + * was sent by hostboot in a payload. + * + * @param[in] i_payload The payload that was just sent to the BPM to + * be verified. + * + * @param[in] i_dataLength The length of the data section of the + * payload. + * + * @param[in] o_status The status code returned from the BPM. + * A status of 0 indicates success, all other + * values are a failure. + * + * @return errlHndl_t nullptr if no errors. Otherwise, an error. + */ + errlHndl_t verifyBlockWrite(payload_t i_payload, + uint8_t i_dataLength, + uint8_t & o_status); + + /** + * @brief Attempts a BSL_RX_DATA_BLOCK command up to three times by calling + * blockWriteRetry. + * + * @param[in] i_payload The payload containing the BSL_RX_DATA_BLOCK + * command and the data to be attempted to be + * written. + * + * @return errlHndl_t nullptr on success. Otherwise, an error. + */ + errlHndl_t blockWrite(payload_t i_payload); + + /** + * @brief Attempts a BSL_RX_DATA_BLOCK command up to three times. + * + * @param[in] i_payload The payload containing the BSL_RX_DATA_BLOCK + * command and the data to be attempted to be + * written. + * + * @return errlHndl_t nullptr on success. Otherwise, an error. + */ + errlHndl_t blockWriteRetry(payload_t i_payload); + + /** + * @brief A helper function used to wait for the command status bit to reset + * after a command is executed. + * + * @param[in] i_commandStatus The command status register union made + * by the caller to identify the type of + * command that was sent. + * + * @return errlHndl_t nullptr on success. Otherwise, an error. + */ + errlHndl_t waitForCommandStatusBitReset( + command_status_register_t i_commandStatus); + + errlHndl_t verifyGoodBpmState(); + + /** + * @brief Helper function for the SCAP register functions that will poll + * the busy bit in SCAP_STATUS until it is zero. + * + * @return errlHndl_t nullptr on success. Otherwise, an error. + */ + errlHndl_t waitForBusyBit(); + + /** + * @brief Starting with BSL version 1.4 it is necessary to check the CRC of + * the firmware image once it has been written to the BPM. If this + * is not done or fails to succeed then the firmware image will not + * be loaded and executed by the BPM. If the CRC check fails then + * the update must be attempted again. + * Must be in BSL mode. + * + * @return errlHndl_t nullptr on success. Otherwise, an error. + */ + errlHndl_t checkFirmwareCrc(); + + /** + * @brief After a command is sent to the BPM to request info from it this + * function processes the response and returns it to the caller. + * A response packet can only be received once per command sent to + * the BPM. Which means that the caller must resend the command + * again to get another response packet. Simply calling the function + * repeatedly will not work. BPM must be in BSL mode. + * + * @param[in] o_responseData The buffer to be filled with the + * response data from the BPM. + * + * @param[in] i_responseSize The size of the buffer to be filled. + * + * @return errlHndl_t nullptr on success. Otherwise, an error. + */ + errlHndl_t getResponse(uint8_t * o_responseData, + uint8_t i_responseSize); + + + /** + * @brief Helper function to handle two potential errors that might occur in a + * function that only returns a single error log. If the return error is + * not nullptr then the second error will be linked to it and committed + * if this is the final update attempt. Otherwise, it will be deleted + * since the update procedure will occur again and may be successful. + * If the return error is nullptr then the return error will point to + * the second's error and the second error will point to nullptr. + * + * @param[in/out] io_returnErrl A pointer to the error that would be + * returned by the function that called + * this one. If nullptr, then it will be + * set point to the secondary error and + * that error will become nullptr. + * + * @param[in/out] io_secondErrl The secondary error that occurred which + * in addition to the usual returned error. + */ + void handleMultipleErrors(errlHndl_t& io_returnErrl, + errlHndl_t& io_secondErrl); + + /** + * @brief Calculates the CRC16 bytes for the BSL payload. This CRC differs + * from the NVDIMM CRC calculation in that the initial value is + * 0xFFFF instead of 0x0000. + * + * NOTE: To calculate a correct CRC for the BSL payload the SYNC_BYTE + * must be included in the payload despite the fact that it + * should be removed from the payload before sending to the BPM + * because the NVDIMM sends the SYNC_BYTE automatically. + * + * @param[in] i_ptr A pointer to the start of the data to calculate the + * CRC for. + * @param[in] i_size This size of the data pointed at by i_ptr. + * + * @return uint16_t The CRC bytes. + */ + uint16_t crc16_calc(const void* const i_ptr, int i_size); + + +}; + +typedef std::vector<Bpm> bpmList_t; + +/** + * @brief Runs the firmware and config updates on the list of BPMs given. + * + * @param[in] i_16gb_BPMs The list of BPMs sitting on 16gb NVDIMMs that + * potentially need to be updated. + * + * @param[in] i_32gb_BPMs The list of BPMs sitting on 32gb NVDIMMs that + * potentially need to be updated. + * + * @param[in] i_16gb_fwImage The firmware image associated with BPMs sitting + * on 16gb NVDIMMs. + * + * @param[in] i_32gb_fwImage The firmware image associated with BPMs sitting + * on 32gb NVDIMMs. + * + * @param[in] i_16gb_configImage The configuration data associated with BPMs + * sitting on 16gb NVDIMMs. + * + * @param[in] i_32gb_configImage The configuration data associated with BPMs + * sitting on 32gb NVDIMMs. + * + */ +void runBpmUpdates(bpmList_t * const i_16gb_BPMs, + bpmList_t * const i_32gb_BPMs, + BpmFirmwareLidImage * const i_16gb_fwImage, + BpmFirmwareLidImage * const i_32gb_fwImage, + BpmConfigLidImage * const i_16gb_configImage, + BpmConfigLidImage * const i_32gb_configImage); + +}; // end of BPM namespace +}; // end of NVDIMM namespace + +#endif + diff --git a/src/usr/isteps/nvdimm/errlud_nvdimm.C b/src/usr/isteps/nvdimm/errlud_nvdimm.C index 743297b94..07afa187a 100644 --- a/src/usr/isteps/nvdimm/errlud_nvdimm.C +++ b/src/usr/isteps/nvdimm/errlud_nvdimm.C @@ -158,9 +158,53 @@ UdNvdimmParms::UdNvdimmParms( uint8_t i_opType, } //------------------------------------------------------------------------------ -UdNvdimmParms::~UdNvdimmParms() -{ +UdNvdimmParms::~UdNvdimmParms() = default; +//------------------------------------------------------------------------------ +// NVDIMM Dimm Operation Parameters and Errors +//------------------------------------------------------------------------------ +UdNvdimmOPParms::UdNvdimmOPParms( const nvdimm_reg_t &i_RegInfo ) +{ + // Version control for ErrorUD struct + iv_CompId = NVDIMM_COMP_ID; + iv_Version = 3; + iv_SubSection = NVDIMM_OP_PARAMETERS; + + //***** Memory Layout ***** + // 1 byte : MODULE_HEALTH + // 1 byte : MODULE_HEALTH_STATUS0 + // 1 byte : MODULE_HEALTH_STATUS1 + // 1 byte : CSAVE_STATUS + // 1 byte : CSAVE_INFO + // 1 byte : CSAVE_FAIL_INFO0 + // 1 byte : CSAVE_FAIL_INFO1 + // 1 byte : CSAVE_TIMEOUT_INFO0 + // 1 byte : CSAVE_TIMEOUT_INFO1 + // 1 byte : ERROR_THRESHOLD_STATUS + // 1 byte : NVDIMM_READY + // 1 byte : NVDIMM_CMD_STATUS0 + // 1 byte : ABORT_CMD_TIMEOUT + // 1 byte : ERASE_STATUS + // 1 byte : ERASE_FAIL_INFO + // 1 byte : ERASE_TIMEOUT0 + // 1 byte : ERASE_TIMEOUT1 + // 1 byte : SET_ES_POLICY_STATUS + // 1 byte : RESTORE_STATUS + // 1 byte : RESTORE_FAIL_INFO + // 1 byte : RESTORE_TIMEOUT0 + // 1 byte : RESTORE_TIMEOUT1 + // 1 byte : ARM_STATUS + // 1 byte : ARM_FAIL_INFO + // 1 byte : ARM_TIMEOUT0 + // 1 byte : ARM_TIMEOUT1 + // 1 byte : SET_EVENT_NOTIFICATION_STATUS + // 1 byte : ENCRYPTION_CONFIG_STATUS + + char * l_pBuf = reinterpret_cast<char *>( reallocUsrBuf(sizeof(i_RegInfo))); + memcpy(l_pBuf, &i_RegInfo, sizeof(i_RegInfo)); } +// Default the deconstructor +UdNvdimmOPParms::~UdNvdimmOPParms() = default; + } // end NVDIMM namespace diff --git a/src/usr/isteps/nvdimm/errlud_nvdimm.H b/src/usr/isteps/nvdimm/errlud_nvdimm.H index 55b5f9b20..2041da054 100644 --- a/src/usr/isteps/nvdimm/errlud_nvdimm.H +++ b/src/usr/isteps/nvdimm/errlud_nvdimm.H @@ -61,12 +61,37 @@ class UdNvdimmParms : public ERRORLOG::ErrlUserDetails */ virtual ~UdNvdimmParms(); - private: // Disabled - UdNvdimmParms(UdNvdimmParms &); - UdNvdimmParms & operator=(UdNvdimmParms &); + UdNvdimmParms(UdNvdimmParms &) = delete; + UdNvdimmParms & operator=(UdNvdimmParms &) = delete; }; -} // end NVDIMM namespace +/** + * @class UdNvdimmOPParms + * + * Adds NVDIMM information to an error log as user detail data + */ +class UdNvdimmOPParms : public ERRORLOG::ErrlUserDetails +{ + public: + /** + * @brief Constructor + * + * @param i_i2cInfo Miscellaneous Parameters + */ + UdNvdimmOPParms( const nvdimm_reg_t &i_RegInfo ); + + /** + * @brief Destructor + */ + virtual ~UdNvdimmOPParms(); + + // Disabled + UdNvdimmOPParms() = delete; + UdNvdimmOPParms(UdNvdimmOPParms &) = delete; + UdNvdimmOPParms & operator=(UdNvdimmOPParms &) = delete; +}; + +} // end of namespace NVDIMM #endif diff --git a/src/usr/isteps/nvdimm/nvdimm.C b/src/usr/isteps/nvdimm/nvdimm.C index 79d7b679d..e93271e5e 100644 --- a/src/usr/isteps/nvdimm/nvdimm.C +++ b/src/usr/isteps/nvdimm/nvdimm.C @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2014,2019 */ +/* Contributors Listed Below - COPYRIGHT 2014,2020 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -28,6 +28,8 @@ #include <errl/errlentry.H> #include <errl/errlmanager.H> #include <errl/errludtarget.H> +#include <errl/errludlogregister.H> +#include <errl/errludstring.H> #include <targeting/common/commontargeting.H> #include <targeting/common/util.H> #include <targeting/common/utilFilter.H> @@ -36,33 +38,39 @@ #include <fapi2.H> #include <fapi2/plat_hwp_invoker.H> #include <lib/shared/nimbus_defaults.H> +#include <lib/ccs/ccs_nimbus.H> #include <lib/dimm/ddr4/nvdimm_utils.H> #include <lib/mc/port.H> #include <isteps/nvdimm/nvdimmreasoncodes.H> +#include "errlud_nvdimm.H" +#include "nvdimmErrorLog.H" #include <isteps/nvdimm/nvdimm.H> #include <vpd/spdenums.H> +#include <secureboot/trustedbootif.H> +#include <targeting/common/targetUtil.H> +#ifdef __HOSTBOOT_RUNTIME +#include <runtime/hbrt_utilities.H> +#include <targeting/runtime/rt_targeting.H> +#else +#include <initservice/istepdispatcherif.H> +#endif using namespace TARGETING; using namespace DeviceFW; using namespace EEPROM; +using namespace ERRORLOG; trace_desc_t* g_trac_nvdimm = NULL; TRAC_INIT(&g_trac_nvdimm, NVDIMM_COMP_NAME, 2*KILOBYTE); // Easy macro replace for unit testing -#define TRACUCOMP(args...) TRACFCOMP(args) -//#define TRACUCOMP(args...) +//#define TRACUCOMP(args...) TRACFCOMP(args) +#define TRACUCOMP(args...) namespace NVDIMM { #define NUM_OFFSET 2 -#define NVDIMM_SET_USER_DATA_1(left_32_ops_id, right_32_huid) \ - TWO_UINT32_TO_UINT64(left_32_ops_id, right_32_huid) - -#define NVDIMM_SET_USER_DATA_2_TIMEOUT(left_32_polled, right_32_timeout) \ - NVDIMM_SET_USER_DATA_1(left_32_polled, right_32_timeout) - typedef struct ops_timeoutInfo{ const char * desc; @@ -83,6 +91,173 @@ constexpr ops_timeoutInfo_t timeoutInfoTable[] = {"CHARGE", {ES_CHARGE_TIMEOUT1, ES_CHARGE_TIMEOUT0}, CHARGE , MODULE_HEALTH_STATUS1, CHARGE_IN_PROGRESS}, }; +// Definition of ENCRYPTION_CONFIG_STATUS -- page 5 offset 0x20 +typedef union { + uint8_t whole; + struct + { + uint8_t reserved : 1; // [7] + uint8_t unsupported_field : 1; // [6] + uint8_t erase_pending : 1; // [5] + uint8_t encryption_unlocked : 1; // [4] + uint8_t encryption_enabled : 1; // [3] + uint8_t erase_key_present : 1; // [2] + uint8_t random_string_present : 1; // [1] + uint8_t encryption_supported : 1; // [0] + } PACKED; +} encryption_config_status_t; + +// Valid bits to check against (skips reserved and unsupported) +static constexpr uint8_t ENCRYPTION_STATUS_CHECK_MASK = 0x3F; +static constexpr uint8_t ENCRYPTION_STATUS_DISABLED = 0x01; +static constexpr uint8_t ENCRYPTION_STATUS_ENABLED = 0x1F; + +// NV_STATUS masks +static constexpr uint8_t NV_STATUS_OR_MASK = 0xFB; +static constexpr uint8_t NV_STATUS_AND_MASK = 0x04; +static constexpr uint8_t NV_STATUS_UNPROTECTED_SET = 0x01; +static constexpr uint8_t NV_STATUS_UNPROTECTED_CLR = 0xFE; +static constexpr uint8_t NV_STATUS_ENCRYPTION_SET = 0x10; +static constexpr uint8_t NV_STATUS_ENCRYPTION_CLR = 0xEF; +static constexpr uint8_t NV_STATUS_ERASE_VERIFY_SET = 0x20; +static constexpr uint8_t NV_STATUS_ERASE_VERIFY_CLR = 0xDF; +static constexpr uint8_t NV_STATUS_POSSIBLY_UNPROTECTED_SET = 0x40; + +// NVDIMM key consts +static constexpr size_t NUM_KEYS_IN_ATTR = 3; +static constexpr size_t MAX_TPM_SIZE = 34; +static constexpr uint8_t KEY_TERMINATE_BYTE = 0x00; +static constexpr uint8_t KEY_ABORT_BYTE = 0xFF; + +// NVDIMM CSAVE_FAIL_INFO1 Bit mask +// Currently only bits 1:6 need to be checked during init +static constexpr uint8_t CSAVE_FAIL_BITS_MASK = 0x7E; + +// LOG PAGE INFO +static constexpr size_t VENDOR_LOG_UNIT_SIZE = 256; +static constexpr size_t VENDOR_LOG_BLOCK_SIZE = 32; +static constexpr size_t VENDOR_BLOCK_DATA_BYTES = 32; + +// TYPED_BLOCK_DATA +static constexpr uint8_t VENDOR_DATA_TYPE = 0x04; +static constexpr uint8_t VENDOR_DEFAULT = 0x00; +static constexpr uint8_t FIRMWARE_IMAGE_DATA = 0x02; + +// Commands to OPERATIONAL_UNIT_OPS_CMD +static constexpr uint8_t GET_OPERATIONAL_UNIT = 0x01; +static constexpr uint8_t GENERATE_OPERATIONAL_UNIT_CKSUM = 0x08; + +static constexpr uint8_t MSBIT_SET_MASK = 0x80; +static constexpr uint8_t MSBIT_CLR_MASK = 0x7F; +static constexpr uint8_t OPERATION_SLEEP_SECONDS = 0x1; + +// Bit mask for checking the fw slot running +static constexpr uint8_t RUNNING_FW_SLOT = 0xF0; + +// NOTE: If the ARM_MAX_RETRY_COUNT is greater than 1 then +// previous error logs may be lost and not reported +static constexpr size_t ARM_MAX_RETRY_COUNT = 1; +static constexpr uint8_t FW_OPS_UPDATE = 0x04; + +// Secure erase verify operations +static constexpr uint8_t ERASE_VERIFY_CLEAR = 0x00; +static constexpr uint8_t ERASE_VERIFY_START = 0xC0; +static constexpr uint8_t ERASE_VERIFY_TRIGGER = 0x80; + +#ifndef __HOSTBOOT_RUNTIME +// Warning thresholds +static constexpr uint8_t THRESHOLD_ES_LIFETIME = 0x07; // 7% +static constexpr uint8_t THRESHOLD_NVM_LIFETIME = 0x31; // 49% + +// 12 bit fixed point temperature in celsius degrees +// with following bit format: +// [15:13]Reserved +// [12]Sign 0 = positive, 1 = negative The value of 0 C should be expressed as a positive value +// [11]128 [10]64 [9]32 [8]16 [7]8 [6]4 [5]2 [4]1 [3]0.5 [2]0.25 +// [1]0.125 Optional for temperature reporting fields; not used for temperature threshold fields +// [0]0.0625 Optional for temperature reporting fields; not used for temperature threshold fields +static constexpr uint8_t THRESHOLD_ES_TEMP_HIGH_1 = 0x03; // 52.5 C +static constexpr uint8_t THRESHOLD_ES_TEMP_HIGH_0 = 0x48; // 52.5 C +static constexpr uint8_t THRESHOLD_ES_TEMP_LOW_1 = 0x00; // 2.5 C +static constexpr uint8_t THRESHOLD_ES_TEMP_LOW_0 = 0x28; // 2.5 C +#endif + +// Definition of ENCRYPTION_KEY_VALIDATION -- page 5 offset 0x2A +typedef union { + uint8_t whole; + struct + { + uint8_t reserved : 5; // [7:3] + uint8_t keys_validated : 1; // [2] + uint8_t access_key_valid : 1; // [1] + uint8_t erase_key_valid : 1; // [0] + } PACKED; +} encryption_key_validation_t; + +/** + * @brief Utility function to send the value of + * ATTR_NVDIMM_ARMED to the FSP + */ +void send_ATTR_NVDIMM_ARMED( Target* i_nvdimm, + ATTR_NVDIMM_ARMED_type& i_val ); + +/** + * @brief Utility function to set ATTR_NVDIMM_ENCRYPTION_KEYS_FW + * and send the value to the FSP + */ +void set_ATTR_NVDIMM_ENCRYPTION_KEYS_FW( + ATTR_NVDIMM_ENCRYPTION_KEYS_FW_typeStdArr& i_val ) +{ + Target* l_sys = nullptr; + targetService().getTopLevelTarget( l_sys ); + assert(l_sys, "set_ATTR_NVDIMM_ENCRYPTION_KEYS_FW: no TopLevelTarget"); + + l_sys->setAttrFromStdArr + <ATTR_NVDIMM_ENCRYPTION_KEYS_FW>(i_val); + +#ifdef __HOSTBOOT_RUNTIME + errlHndl_t l_err = nullptr; + + // Send attr to HWSV if at runtime + AttributeTank::Attribute l_attr = {}; + if( !makeAttributeStdArr<ATTR_NVDIMM_ENCRYPTION_KEYS_FW> + (l_sys, l_attr) ) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK"set_ATTR_NVDIMM_ENCRYPTION_KEYS_FW() Could not create Attribute"); + /*@ + *@errortype + *@reasoncode NVDIMM_CANNOT_MAKE_ATTRIBUTE + *@severity ERRORLOG_SEV_PREDICTIVE + *@moduleid SET_ATTR_NVDIMM_ENCRYPTION_KEYS_FW + *@devdesc Couldn't create an Attribute to send the data + * to the FSP + *@custdesc NVDIMM encryption error + */ + l_err = new ERRORLOG::ErrlEntry( + ERRORLOG::ERRL_SEV_PREDICTIVE, + SET_ATTR_NVDIMM_ENCRYPTION_KEYS_FW, + NVDIMM_CANNOT_MAKE_ATTRIBUTE, + ERRORLOG::ErrlEntry::ADD_SW_CALLOUT ); + l_err->collectTrace(NVDIMM_COMP_NAME); + errlCommit( l_err, NVDIMM_COMP_ID ); + } + else + { + std::vector<TARGETING::AttributeTank::Attribute> l_attrList; + l_attrList.push_back(l_attr); + l_err = sendAttributes( l_attrList ); + if (l_err) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK"set_ATTR_NVDIMM_ENCRYPTION_KEYS_FW() Error sending ATTR_NVDIMM_ENCRYPTION_KEYS_FW down to FSP"); + l_err->setSev(ERRORLOG::ERRL_SEV_PREDICTIVE); + l_err->collectTrace(NVDIMM_COMP_NAME); + errlCommit( l_err, NVDIMM_COMP_ID ); + } + } +#endif //__HOSTBOOT_RUNTIME + +} + /** * @brief Wrapper to call deviceOp to read the NV controller via I2C * @@ -103,8 +278,8 @@ errlHndl_t nvdimmReadReg(Target* i_nvdimm, uint8_t & o_data, const bool page_verify) { - TRACUCOMP(g_trac_nvdimm, ENTER_MRK"NVDIMM Read HUID %X, addr 0x%X", - TARGETING::get_huid(i_nvdimm), i_addr); + TRACUCOMP(g_trac_nvdimm, ENTER_MRK"NVDIMM Read HUID 0x%X, addr 0x%X", + get_huid(i_nvdimm), i_addr); errlHndl_t l_err = nullptr; size_t l_numBytes = 1; @@ -123,7 +298,7 @@ errlHndl_t nvdimmReadReg(Target* i_nvdimm, if (l_err) { TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmReadReg() nvdimm[%X] - failed to read the current page", - TARGETING::get_huid(i_nvdimm)); + get_huid(i_nvdimm)); break; } @@ -134,7 +309,7 @@ errlHndl_t nvdimmReadReg(Target* i_nvdimm, if (l_err) { TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmReadReg() nvdimm[%X] - failed to verify page", - TARGETING::get_huid(i_nvdimm)); + get_huid(i_nvdimm)); break; } } @@ -144,11 +319,16 @@ errlHndl_t nvdimmReadReg(Target* i_nvdimm, i_nvdimm, &o_data, l_numBytes, - DEVICE_NVDIMM_ADDRESS(l_reg_addr)); + DEVICE_NVDIMM_RAW_ADDRESS(l_reg_addr)); }while(0); - TRACUCOMP(g_trac_nvdimm, EXIT_MRK"NVDIMM Read HUID %X, page 0x%X, addr 0x%X = %X", - TARGETING::get_huid(i_nvdimm), l_reg_page, l_reg_addr, o_data); + if (l_err) + { + nvdimmAddPage4Regs(i_nvdimm,l_err); + } + + TRACUCOMP(g_trac_nvdimm, EXIT_MRK"NVDIMM Read HUID 0x%X, page 0x%X, addr 0x%X = 0x%X", + get_huid(i_nvdimm), l_reg_page, l_reg_addr, o_data); return l_err; } @@ -173,8 +353,8 @@ errlHndl_t nvdimmWriteReg(Target* i_nvdimm, uint8_t i_data, const bool page_verify) { - TRACUCOMP(g_trac_nvdimm, ENTER_MRK"NVDIMM Write HUID %X, addr 0x%X = %X", - TARGETING::get_huid(i_nvdimm), i_addr, i_data); + TRACUCOMP(g_trac_nvdimm, ENTER_MRK"NVDIMM Write HUID 0x%X, addr 0x%X = 0x%X", + get_huid(i_nvdimm), i_addr, i_data); errlHndl_t l_err = nullptr; size_t l_numBytes = 1; @@ -193,7 +373,7 @@ errlHndl_t nvdimmWriteReg(Target* i_nvdimm, if (l_err) { TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmWriteReg() nvdimm[%X] - failed to read the current page", - TARGETING::get_huid(i_nvdimm)); + get_huid(i_nvdimm)); break; } @@ -204,7 +384,7 @@ errlHndl_t nvdimmWriteReg(Target* i_nvdimm, if (l_err) { TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmWriteReg() nvdimm[%X] - failed to verify page", - TARGETING::get_huid(i_nvdimm)); + get_huid(i_nvdimm)); break; } } @@ -214,11 +394,16 @@ errlHndl_t nvdimmWriteReg(Target* i_nvdimm, i_nvdimm, &i_data, l_numBytes, - DEVICE_NVDIMM_ADDRESS(l_reg_addr)); + DEVICE_NVDIMM_RAW_ADDRESS(l_reg_addr)); }while(0); - TRACUCOMP(g_trac_nvdimm, EXIT_MRK"NVDIMM Write HUID %X, page = 0x%X, addr 0x%X = %X", - TARGETING::get_huid(i_nvdimm), l_reg_page, l_reg_addr, i_data); + if (l_err) + { + nvdimmAddPage4Regs(i_nvdimm,l_err); + } + + TRACUCOMP(g_trac_nvdimm, EXIT_MRK"NVDIMM Write HUID 0x%X, page = 0x%X, addr 0x%X = 0x%X", + get_huid(i_nvdimm), l_reg_page, l_reg_addr, i_data); return l_err; } @@ -234,43 +419,48 @@ errlHndl_t nvdimmWriteReg(Target* i_nvdimm, void nvdimmSetStatusFlag(Target *i_nvdimm, const uint8_t i_status_flag) { TRACUCOMP(g_trac_nvdimm, ENTER_MRK"nvdimmSetStatusFlag() HUID[%X], i_status_flag[%X]" - ,TARGETING::get_huid(i_nvdimm), i_status_flag); + ,get_huid(i_nvdimm), i_status_flag); - auto l_statusFlag = i_nvdimm->getAttr<TARGETING::ATTR_NV_STATUS_FLAG>(); + auto l_statusFlag = i_nvdimm->getAttr<ATTR_NV_STATUS_FLAG>(); switch(i_status_flag) { - // Make sure NSTD_VAL_PRSV (content preserved) is unset before setting NSTD_VAL_NOPRSV - // (data not preserved) or NSTD_ERR_NOPRSV (error preserving data) + // Make sure NSTD_VAL_RESTORED (content preserved) is unset before setting NSTD_VAL_ERASED + // (data not preserved) or NSTD_VAL_SR_FAILED (error preserving data) case NSTD_ERR: - case NSTD_VAL_NOPRSV: - case NSTD_ERR_NOPRSV: - l_statusFlag &= NSTD_VAL_PRSV_MASK; + case NSTD_VAL_ERASED: + case NSTD_VAL_SR_FAILED: + l_statusFlag &= NSTD_VAL_RESTORED_MASK; l_statusFlag |= i_status_flag; break; // If the content preserved(restore sucessfully), make sure - // NSTD_VAL_NOPRSV (not preserved) and NSTD_ERR_NOPRSV (error preserving) + // NSTD_VAL_ERASED (not preserved) and NSTD_VAL_SR_FAILED (error preserving) // are unset before setting this flag. - case NSTD_VAL_PRSV: - l_statusFlag &= (NSTD_VAL_NOPRSV_MASK & NSTD_ERR_NOPRSV_MASK); + case NSTD_VAL_RESTORED: + l_statusFlag &= (NSTD_VAL_ERASED_MASK & NSTD_VAL_SR_FAILED_MASK); l_statusFlag |= i_status_flag; break; - case NSTD_ERR_NOBKUP: + case NSTD_VAL_DISARMED: + l_statusFlag |= i_status_flag; + break; + + // Error detected but save/restore might work. May coexsit with other bits. + case NSTD_ERR_VAL_SR: l_statusFlag |= i_status_flag; break; default: assert(0, "nvdimmSetStatusFlag() HUID[%X], i_status_flag[%X] invalid flag!", - TARGETING::get_huid(i_nvdimm), i_status_flag); + get_huid(i_nvdimm), i_status_flag); break; } - i_nvdimm->setAttr<TARGETING::ATTR_NV_STATUS_FLAG>(l_statusFlag); + i_nvdimm->setAttr<ATTR_NV_STATUS_FLAG>(l_statusFlag); TRACUCOMP(g_trac_nvdimm, EXIT_MRK"nvdimmSetStatusFlag() HUID[%X], i_status_flag[%X]" - ,TARGETING::get_huid(i_nvdimm), i_status_flag); + ,get_huid(i_nvdimm), i_status_flag); } @@ -284,10 +474,11 @@ void nvdimmSetStatusFlag(Target *i_nvdimm, const uint8_t i_status_flag) */ errlHndl_t nvdimmReady(Target *i_nvdimm) { - TRACUCOMP(g_trac_nvdimm, ENTER_MRK"nvdimmReady() HUID[%X]",TARGETING::get_huid(i_nvdimm)); + TRACUCOMP(g_trac_nvdimm, ENTER_MRK"nvdimmReady() HUID[%X]",get_huid(i_nvdimm)); errlHndl_t l_err = nullptr; - uint8_t l_data = 0x0; + nvdimm_reg_t l_RegInfo; + uint8_t l_data; uint8_t l_nvm_init_time = 0; size_t l_numBytes = 1; @@ -300,17 +491,17 @@ errlHndl_t nvdimmReady(Target *i_nvdimm) DEVICE_SPD_ADDRESS(SPD::NVM_INIT_TIME)); TRACUCOMP(g_trac_nvdimm, "nvdimmReady() HUID[%X] l_nvm_init_time = %u", - TARGETING::get_huid(i_nvdimm), l_nvm_init_time); + get_huid(i_nvdimm), l_nvm_init_time); if (l_err) { TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmReady() nvdimm[%X] - failed to retrieve NVM_INIT_TIME from SPD", - TARGETING::get_huid(i_nvdimm)); + get_huid(i_nvdimm)); break; } - // Convert to ms for polling - uint32_t l_nvm_init_time_ms = l_nvm_init_time * MS_PER_SEC; + // Convert to ms for polling and double the value to avoid edge condition + uint32_t l_nvm_init_time_ms = l_nvm_init_time * MS_PER_SEC * 2; uint32_t l_poll = 0; do @@ -320,7 +511,7 @@ errlHndl_t nvdimmReady(Target *i_nvdimm) if (l_err) { TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmReady() nvdimm[%X] - error getting ready status[%d]", - TARGETING::get_huid(i_nvdimm), l_data); + get_huid(i_nvdimm), l_data); break; } @@ -336,8 +527,50 @@ errlHndl_t nvdimmReady(Target *i_nvdimm) if ((l_data != NV_READY) && !l_err) { + + // Collect available status registers for error log + do + { + // Read and save NVDIMM_READY for traces + l_err = nvdimmReadReg(i_nvdimm, NVDIMM_READY, l_data); + if(l_err) + { + errlCommit( l_err, NVDIMM_COMP_ID ); + break; + } + l_RegInfo.NVDimm_Ready = l_data; + + // Read and save MODULE_HEALTH for traces + l_err = nvdimmReadReg(i_nvdimm, MODULE_HEALTH, l_data); + if(l_err) + { + errlCommit( l_err, NVDIMM_COMP_ID ); + break; + } + l_RegInfo.Module_Health = l_data; + + // Read and save MODULE_HEALTH_STATUS0 for traces + l_err = nvdimmReadReg(i_nvdimm, MODULE_HEALTH_STATUS0, l_data); + if(l_err) + { + errlCommit( l_err, NVDIMM_COMP_ID ); + break; + } + l_RegInfo.Module_Health_Status0 = l_data; + + // Read and save MODULE_HEALTH_STATUS1 for traces + l_err = nvdimmReadReg(i_nvdimm, MODULE_HEALTH_STATUS1, l_data); + if(l_err) + { + errlCommit( l_err, NVDIMM_COMP_ID ); + break; + } + l_RegInfo.Module_Health_Status1 = l_data; + + }while(0); + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmReady() nvdimm[%X] - nvdimm not ready[%d]", - TARGETING::get_huid(i_nvdimm), l_data); + get_huid(i_nvdimm), l_data); /*@ *@errortype *@reasoncode NVDIMM_NOT_READY @@ -350,26 +583,33 @@ errlHndl_t nvdimmReady(Target *i_nvdimm) * for host access. (userdata1 != 0xA5) *@custdesc NVDIMM not ready */ - l_err = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_UNRECOVERABLE, - NVDIMM_CHECK_READY, - NVDIMM_NOT_READY, - NVDIMM_SET_USER_DATA_1(l_data, TARGETING::get_huid(i_nvdimm)), - 0x0, - ERRORLOG::ErrlEntry::NO_SW_CALLOUT ); + l_err = new ERRORLOG::ErrlEntry( + ERRORLOG::ERRL_SEV_UNRECOVERABLE, + NVDIMM_CHECK_READY, + NVDIMM_NOT_READY, + NVDIMM_SET_USER_DATA_1(l_data, get_huid(i_nvdimm)), + 0x0, + ERRORLOG::ErrlEntry::NO_SW_CALLOUT ); - l_err->collectTrace(NVDIMM_COMP_NAME, 1024 ); + l_err->collectTrace(NVDIMM_COMP_NAME); // If nvdimm is not ready for access by now, this is // a failing indication on the NV controller - l_err->addPartCallout( i_nvdimm, - HWAS::NV_CONTROLLER_PART_TYPE, - HWAS::SRCI_PRIORITY_HIGH); + l_err->addHwCallout( i_nvdimm, + HWAS::SRCI_PRIORITY_HIGH, + HWAS::DECONFIG, + HWAS::GARD_Fatal); + + // Add Register Traces to error log + NVDIMM::UdNvdimmOPParms( l_RegInfo ).addToLog(l_err); + nvdimmAddPage4Regs(i_nvdimm,l_err); + nvdimmAddVendorLog(i_nvdimm, l_err); } }while(0); TRACUCOMP(g_trac_nvdimm, EXIT_MRK"nvdimmReady() HUID[%X] ready[%X]", - TARGETING::get_huid(i_nvdimm), l_data); + get_huid(i_nvdimm), l_data); return l_err; } @@ -386,7 +626,7 @@ errlHndl_t nvdimmReady(Target *i_nvdimm) */ errlHndl_t nvdimmResetController(Target *i_nvdimm) { - TRACUCOMP(g_trac_nvdimm, ENTER_MRK"nvdimmResetController() HUID[%X]",TARGETING::get_huid(i_nvdimm)); + TRACUCOMP(g_trac_nvdimm, ENTER_MRK"nvdimmResetController() HUID[%X]",get_huid(i_nvdimm)); errlHndl_t l_err = nullptr; do @@ -396,7 +636,7 @@ errlHndl_t nvdimmResetController(Target *i_nvdimm) if (l_err) { TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmResetController() nvdimm[%X] - error reseting the controller", - TARGETING::get_huid(i_nvdimm)); + get_huid(i_nvdimm)); break; } @@ -404,12 +644,17 @@ errlHndl_t nvdimmResetController(Target *i_nvdimm) if (l_err) { TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmResetController() nvdimm[%X] - not ready after reset.", - TARGETING::get_huid(i_nvdimm)); + get_huid(i_nvdimm)); } }while(0); - TRACUCOMP(g_trac_nvdimm, EXIT_MRK"nvdimmResetController() HUID[%X]",TARGETING::get_huid(i_nvdimm)); + // Reset will lock encryption so unlock again + TargetHandleList l_nvdimmTargetList; + l_nvdimmTargetList.push_back(i_nvdimm); + nvdimm_encrypt_unlock(l_nvdimmTargetList); + + TRACUCOMP(g_trac_nvdimm, EXIT_MRK"nvdimmResetController() HUID[%X]",get_huid(i_nvdimm)); return l_err; } @@ -436,8 +681,8 @@ errlHndl_t nvdimmPollStatus ( Target *i_nvdimm, bool l_done = false; // Get the timeout value for ops_id - assert(i_nvdimm->tryGetAttr<TARGETING::ATTR_NV_OPS_TIMEOUT_MSEC>(l_target_timeout_values), - "nvdimmPollStatus() HUID[%X], failed reading ATTR_NV_OPS_TIMEOUT_MSEC!", TARGETING::get_huid(i_nvdimm)); + assert(i_nvdimm->tryGetAttr<ATTR_NV_OPS_TIMEOUT_MSEC>(l_target_timeout_values), + "nvdimmPollStatus() HUID[%X], failed reading ATTR_NV_OPS_TIMEOUT_MSEC!", get_huid(i_nvdimm)); uint32_t l_timeout = l_target_timeout_values[i_ops_id]; do @@ -461,13 +706,13 @@ errlHndl_t nvdimmPollStatus ( Target *i_nvdimm, o_poll += OPS_POLL_TIME_MS; - } while (o_poll < l_timeout); + } while (o_poll <= l_timeout); if (!l_done && !l_err) { TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmPollStatus() nvdimm[%X] - Status timed out ops_id[%d]", - TARGETING::get_huid(i_nvdimm), i_ops_id); + get_huid(i_nvdimm), i_ops_id); /*@ *@errortype *@reasoncode NVDIMM_STATUS_TIMEOUT @@ -481,20 +726,17 @@ errlHndl_t nvdimmPollStatus ( Target *i_nvdimm, * Refer to userdata1 for which operation it timed out. *@custdesc NVDIMM timed out */ - l_err = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_PREDICTIVE, - NVDIMM_POLL_STATUS, - NVDIMM_STATUS_TIMEOUT, - NVDIMM_SET_USER_DATA_1(i_ops_id, TARGETING::get_huid(i_nvdimm)), - NVDIMM_SET_USER_DATA_2_TIMEOUT(o_poll, l_timeout), - ERRORLOG::ErrlEntry::NO_SW_CALLOUT ); - - l_err->collectTrace(NVDIMM_COMP_NAME, 1024 ); - - // May have to move the error handling to the caller - // as different op could have different error severity - l_err->addPartCallout( i_nvdimm, - HWAS::NV_CONTROLLER_PART_TYPE, - HWAS::SRCI_PRIORITY_HIGH); + l_err = new ERRORLOG::ErrlEntry( + ERRORLOG::ERRL_SEV_PREDICTIVE, + NVDIMM_POLL_STATUS, + NVDIMM_STATUS_TIMEOUT, + NVDIMM_SET_USER_DATA_1(i_ops_id, get_huid(i_nvdimm)), + NVDIMM_SET_USER_DATA_2_TIMEOUT(o_poll, l_timeout), + ERRORLOG::ErrlEntry::NO_SW_CALLOUT ); + + l_err->collectTrace(NVDIMM_COMP_NAME); + nvdimmAddPage4Regs(i_nvdimm,l_err); + nvdimmAddVendorLog(i_nvdimm, l_err); } return l_err; @@ -516,14 +758,46 @@ errlHndl_t nvdimmPollBackupDone(Target* i_nvdimm, uint32_t &o_poll) { TRACUCOMP(g_trac_nvdimm, ENTER_MRK"nvdimmPollBackupDone() nvdimm[%X]", - TARGETING::get_huid(i_nvdimm)); + get_huid(i_nvdimm)); errlHndl_t l_err = nullptr; + nvdimm_reg_t l_RegInfo = nvdimm_reg_t(); l_err = nvdimmPollStatus ( i_nvdimm, SAVE, o_poll); + if (l_err) + { + errlCommit(l_err, NVDIMM_COMP_ID); + + /*@ + *@errortype + *@reasoncode NVDIMM_BACKUP_TIMEOUT + *@severity ERRORLOG_SEV_PREDICTIVE + *@moduleid NVDIMM_POLL_BACKUP + *@userdata1[0:31] Related ops (0xff = NA) + *@userdata1[32:63] Target Huid + *@devdesc Encountered timeout while performing NVDIMM Restore operation + *@custdesc NVDIMM timed out + */ + l_err = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_PREDICTIVE, + NVDIMM_POLL_BACKUP, + NVDIMM_BACKUP_TIMEOUT, + NVDIMM_SET_USER_DATA_1(SAVE, TARGETING::get_huid(i_nvdimm)), + ERRORLOG::ErrlEntry::NO_SW_CALLOUT ); + + l_err->collectTrace( NVDIMM_COMP_NAME ); + nvdimmAddVendorLog(i_nvdimm, l_err); + + // Collect register data for FFDC Traces + nvdimmTraceRegs ( i_nvdimm, l_RegInfo ); + nvdimmAddPage4Regs(i_nvdimm,l_err); + + // Add reg traces to the error log + NVDIMM::UdNvdimmOPParms( l_RegInfo ).addToLog(l_err); + } + TRACUCOMP(g_trac_nvdimm, EXIT_MRK"nvdimmPollBackupDone() nvdimm[%X]", - TARGETING::get_huid(i_nvdimm)); + get_huid(i_nvdimm)); return l_err; } @@ -543,18 +817,57 @@ errlHndl_t nvdimmPollRestoreDone(Target* i_nvdimm, uint32_t &o_poll) { TRACUCOMP(g_trac_nvdimm, ENTER_MRK"nvdimmPollRestoreDone() nvdimm[%X]", - TARGETING::get_huid(i_nvdimm)); + get_huid(i_nvdimm)); errlHndl_t l_err = nullptr; + nvdimm_reg_t l_RegInfo = nvdimm_reg_t(); l_err = nvdimmPollStatus ( i_nvdimm, RESTORE, o_poll ); + if (l_err) + { + errlCommit(l_err, NVDIMM_COMP_ID); + + /*@ + *@errortype + *@reasoncode NVDIMM_RESTORE_TIMEOUT + *@severity ERRORLOG_SEV_PREDICTIVE + *@moduleid NVDIMM_POLL_RESTORE + *@userdata1[0:31] Related ops (0xff = NA) + *@userdata1[32:63] Target Huid + *@devdesc Encountered timeout while performing NVDIMM Restore operation + *@custdesc NVDIMM timed out + */ + l_err = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_PREDICTIVE, + NVDIMM_POLL_RESTORE, + NVDIMM_RESTORE_TIMEOUT, + NVDIMM_SET_USER_DATA_1(RESTORE, TARGETING::get_huid(i_nvdimm)), + ERRORLOG::ErrlEntry::NO_SW_CALLOUT ); + + l_err->collectTrace( NVDIMM_COMP_NAME ); + + // May have to move the error handling to the caller + // as different op could have different error severity + l_err->addPartCallout( i_nvdimm, + HWAS::NV_CONTROLLER_PART_TYPE, + HWAS::SRCI_PRIORITY_HIGH); + + // Collect register data for FFDC Traces + nvdimmTraceRegs ( i_nvdimm, l_RegInfo ); + nvdimmAddPage4Regs(i_nvdimm,l_err); + nvdimmAddVendorLog(i_nvdimm, l_err); + + // Add reg traces to the error log + NVDIMM::UdNvdimmOPParms( l_RegInfo ).addToLog(l_err); + } + TRACUCOMP(g_trac_nvdimm, EXIT_MRK"nvdimmPollRestoreDone() nvdimm[%X]", - TARGETING::get_huid(i_nvdimm)); + get_huid(i_nvdimm)); return l_err; } + /** * @brief This function polls the command status register for erase * completion (does not indicate success or fail) @@ -570,14 +883,39 @@ errlHndl_t nvdimmPollEraseDone(Target* i_nvdimm, uint32_t &o_poll) { TRACUCOMP(g_trac_nvdimm, ENTER_MRK"nvdimmPollEraseDone() nvdimm[%X]", - TARGETING::get_huid(i_nvdimm)); + get_huid(i_nvdimm)); errlHndl_t l_err = nullptr; - l_err = nvdimmPollStatus ( i_nvdimm, ERASE, o_poll); + l_err = nvdimmPollStatus( i_nvdimm, ERASE, o_poll); + + if (l_err) + { + errlCommit(l_err, NVDIMM_COMP_ID); + + /*@ + *@errortype + *@reasoncode NVDIMM_ERASE_TIMEOUT + *@severity ERRORLOG_SEV_PREDICTIVE + *@moduleid NVDIMM_POLL_ERASE + *@userdata1[0:31] Related ops (0xff = NA) + *@userdata1[32:63] Target Huid + *@devdesc Encountered timeout while performing NVDIMM Restore operation + *@custdesc NVDIMM timed out + */ + l_err = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_PREDICTIVE, + NVDIMM_POLL_ERASE, + NVDIMM_ERASE_TIMEOUT, + NVDIMM_SET_USER_DATA_1(ERASE, TARGETING::get_huid(i_nvdimm)), + ERRORLOG::ErrlEntry::NO_SW_CALLOUT ); + + l_err->collectTrace( NVDIMM_COMP_NAME ); + nvdimmAddPage4Regs(i_nvdimm,l_err); + nvdimmAddVendorLog(i_nvdimm, l_err); + } TRACUCOMP(g_trac_nvdimm, EXIT_MRK"nvdimmPollEraseDone() nvdimm[%X]", - TARGETING::get_huid(i_nvdimm)); + get_huid(i_nvdimm)); return l_err; } @@ -598,14 +936,18 @@ errlHndl_t nvdimmPollESChargeStatus(Target* i_nvdimm, uint32_t &o_poll) { TRACUCOMP(g_trac_nvdimm, ENTER_MRK"nvdimmPollESChargeDone() nvdimm[%X]", - TARGETING::get_huid(i_nvdimm)); + get_huid(i_nvdimm)); errlHndl_t l_err = nullptr; - l_err = nvdimmPollStatus ( i_nvdimm, CHARGE, o_poll ); + l_err = nvdimmPollStatus( i_nvdimm, CHARGE, o_poll ); + + l_err->addPartCallout( i_nvdimm, + HWAS::NV_CONTROLLER_PART_TYPE, + HWAS::SRCI_PRIORITY_HIGH); TRACUCOMP(g_trac_nvdimm, EXIT_MRK"nvdimmPollESChargeDone() nvdimm[%X]", - TARGETING::get_huid(i_nvdimm)); + get_huid(i_nvdimm)); return l_err; } @@ -623,7 +965,7 @@ errlHndl_t nvdimmPollESChargeStatus(Target* i_nvdimm, errlHndl_t nvdimmGetRestoreValid(Target* i_nvdimm, uint8_t & o_rstrValid) { TRACUCOMP(g_trac_nvdimm, ENTER_MRK"nvdimmGetRestoreValid() nvdimm[%X]", - TARGETING::get_huid(i_nvdimm)); + get_huid(i_nvdimm)); errlHndl_t l_err = nullptr; @@ -631,11 +973,11 @@ errlHndl_t nvdimmGetRestoreValid(Target* i_nvdimm, uint8_t & o_rstrValid) if (l_err){ TRACFCOMP(g_trac_nvdimm, ERR_MRK"NDVIMM HUID[%X], Error getting restore status!", - TARGETING::get_huid(i_nvdimm)); + get_huid(i_nvdimm)); } TRACUCOMP(g_trac_nvdimm, EXIT_MRK"nvdimmGetRestoreValid() nvdimm[%X], restore_status[%x],", - TARGETING::get_huid(i_nvdimm), o_rstrValid); + get_huid(i_nvdimm), o_rstrValid); return l_err; } @@ -651,10 +993,11 @@ errlHndl_t nvdimmGetRestoreValid(Target* i_nvdimm, uint8_t & o_rstrValid) errlHndl_t nvdimmSetESPolicy(Target* i_nvdimm) { TRACUCOMP(g_trac_nvdimm, ENTER_MRK"nvdimmSetESPolicy() nvdimm[%X]", - TARGETING::get_huid(i_nvdimm)); + get_huid(i_nvdimm)); errlHndl_t l_err = nullptr; - uint8_t l_data; + uint8_t l_data = 0x0; + nvdimm_reg_t l_RegInfo = nvdimm_reg_t(); do { @@ -663,9 +1006,9 @@ errlHndl_t nvdimmSetESPolicy(Target* i_nvdimm) if (l_err) { - nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR_NOBKUP); + nvdimmSetStatusFlag(i_nvdimm, NSTD_VAL_DISARMED); TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmSetESPolicy() nvdimm[%X]" - "failed to write ES register!",TARGETING::get_huid(i_nvdimm)); + "failed to write ES register!",get_huid(i_nvdimm)); break; } @@ -677,16 +1020,16 @@ errlHndl_t nvdimmSetESPolicy(Target* i_nvdimm) if (l_err) { - nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR_NOBKUP); + nvdimmSetStatusFlag(i_nvdimm, NSTD_VAL_DISARMED); TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmSetESPolicy() nvdimm[%X]" - "failed to read ES register!",TARGETING::get_huid(i_nvdimm)); + "failed to read ES register!",get_huid(i_nvdimm)); break; } - if ((l_data & ES_SUCCESS) != ES_SUCCESS) + if (((l_data & ES_SUCCESS) != ES_SUCCESS) || ((l_data & ES_POLICY_ERROR) == ES_POLICY_ERROR)) { TRACFCOMP(g_trac_nvdimm, EXIT_MRK"NDVIMM HUID[%X], nvdimmSetESPolicy() " - "failed!",TARGETING::get_huid(i_nvdimm)); + "failed!",get_huid(i_nvdimm)); /*@ *@errortype *@reasoncode NVDIMM_SET_ES_ERROR @@ -700,28 +1043,28 @@ errlHndl_t nvdimmSetESPolicy(Target* i_nvdimm) * NVDIMM is intact *@custdesc NVDIMM encountered error setting the energy source policy */ - l_err = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_PREDICTIVE, - NVDIMM_SET_ES, - NVDIMM_SET_ES_ERROR, - NVDIMM_SET_USER_DATA_1(CHARGE, TARGETING::get_huid(i_nvdimm)), - 0x0, - ERRORLOG::ErrlEntry::NO_SW_CALLOUT ); - - l_err->collectTrace(NVDIMM_COMP_NAME, 1024 ); - - // Failure setting the energy source policy could mean error on the - // battery or even the cabling - l_err->addPartCallout( i_nvdimm, - HWAS::BPM_PART_TYPE, - HWAS::SRCI_PRIORITY_HIGH); - l_err->addPartCallout( i_nvdimm, - HWAS::BPM_CABLE_PART_TYPE, - HWAS::SRCI_PRIORITY_HIGH); + l_err = new ERRORLOG::ErrlEntry( + ERRORLOG::ERRL_SEV_PREDICTIVE, + NVDIMM_SET_ES, + NVDIMM_SET_ES_ERROR, + NVDIMM_SET_USER_DATA_1(CHARGE, get_huid(i_nvdimm)), + 0x0, + ERRORLOG::ErrlEntry::NO_SW_CALLOUT ); + + l_err->collectTrace(NVDIMM_COMP_NAME); + + // Read relevant regs for trace data + nvdimmTraceRegs(i_nvdimm, l_RegInfo); + nvdimmAddPage4Regs(i_nvdimm,l_err); + nvdimmAddVendorLog(i_nvdimm, l_err); + + // Add reg traces to the error log + NVDIMM::UdNvdimmOPParms( l_RegInfo ).addToLog(l_err); } }while(0); TRACUCOMP(g_trac_nvdimm, EXIT_MRK"NDVIMM HUID[%X], nvdimmSetESPolicy()," - ,TARGETING::get_huid(i_nvdimm)); + ,get_huid(i_nvdimm)); return l_err; } @@ -739,7 +1082,7 @@ errlHndl_t nvdimmSetESPolicy(Target* i_nvdimm) errlHndl_t nvdimmChangeArmState(Target *i_nvdimm, bool i_state) { TRACUCOMP(g_trac_nvdimm, ENTER_MRK"nvdimmChangeArmState() nvdimm[%X]", - TARGETING::get_huid(i_nvdimm)); + get_huid(i_nvdimm)); errlHndl_t l_err = nullptr; @@ -753,11 +1096,11 @@ errlHndl_t nvdimmChangeArmState(Target *i_nvdimm, bool i_state) if (l_err) { TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmChangeArmState() nvdimm[%X] error %s nvdimm!!", - TARGETING::get_huid(i_nvdimm), i_state? "arming" : "disarming"); + get_huid(i_nvdimm), i_state? "arming" : "disarming"); } TRACUCOMP(g_trac_nvdimm, EXIT_MRK"nvdimmChangeArmState() nvdimm[%X]", - TARGETING::get_huid(i_nvdimm)); + get_huid(i_nvdimm)); return l_err; } @@ -774,7 +1117,7 @@ errlHndl_t nvdimmChangeArmState(Target *i_nvdimm, bool i_state) errlHndl_t nvdimmValidImage(Target *i_nvdimm, bool &o_imgValid) { TRACUCOMP(g_trac_nvdimm, ENTER_MRK"nvdimmValidImage(): nvdimm[%X]", - TARGETING::get_huid(i_nvdimm)); + get_huid(i_nvdimm)); errlHndl_t l_err = nullptr; uint8_t l_data = 0x0; @@ -785,7 +1128,7 @@ errlHndl_t nvdimmValidImage(Target *i_nvdimm, bool &o_imgValid) if (l_err) { TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmValidImage() nvdimm[%X]" - "failed to for image!",TARGETING::get_huid(i_nvdimm) ); + "failed to for image!",get_huid(i_nvdimm) ); } else if(l_data & VALID_IMAGE) { @@ -793,55 +1136,70 @@ errlHndl_t nvdimmValidImage(Target *i_nvdimm, bool &o_imgValid) } TRACUCOMP(g_trac_nvdimm, EXIT_MRK"nvdimmValidImage(): nvdimm[%X] ret[%X]", - TARGETING::get_huid(i_nvdimm), l_data); + get_huid(i_nvdimm), l_data); return l_err; } +void maskMbacalfir_eventn(TARGETING::Target* i_nvdimm) +{ + errlHndl_t l_err = nullptr; + TargetHandleList l_mcaList; + uint64_t l_writeData; + uint32_t l_writeAddress; + size_t l_writeSize = sizeof(l_writeData); + + getParentAffinityTargets(l_mcaList, i_nvdimm, CLASS_UNIT, TYPE_MCA); + assert(l_mcaList.size(), "maskMbacalfir_eventn() failed to find parent MCA."); + + l_writeAddress = MBACALFIR_OR_MASK_REG; + l_writeData = MBACALFIR_EVENTN_OR_BIT; + l_err = deviceWrite(l_mcaList[0], &l_writeData, l_writeSize, + DEVICE_SCOM_ADDRESS(l_writeAddress)); + if(l_err) + { + TRACFCOMP(g_trac_nvdimm, + ERR_MRK "Failed to mask MBACALFIR EventN using address " + "0x%08x on NVDIMM 0x%08X MCA 0x%08X", + l_writeAddress, get_huid(i_nvdimm), get_huid(l_mcaList[0])); + l_err->collectTrace(NVDIMM_COMP_NAME); + errlCommit( l_err, NVDIMM_COMP_ID ); + } +} + #ifndef __HOSTBOOT_RUNTIME /** * @brief This function handles all the restore related operations. * SRE -> restore -> SRX/RCD/MRS * - * @param[in] i_nvdimmList - list of nvdimms + * @param[in,out] io_nvdimmList - list of nvdimms. Each nvdimm is removed + * from the list after a successful restore. Leftover nvdimm + * is returned to the caller for error handling. * * @param[in] i_mpipl - MPIPL mode * * @return errlHndl_t - Null if successful, otherwise a pointer to * the error log. */ -errlHndl_t nvdimmRestore(TargetHandleList i_nvdimmList, uint8_t &i_mpipl) +errlHndl_t nvdimmRestore(TargetHandleList& io_nvdimmList, uint8_t &i_mpipl) { errlHndl_t l_err = nullptr; - bool l_imgValid; uint8_t l_rstrValid; uint32_t l_poll = 0; + TargetHandleList l_nvdimmList = io_nvdimmList; do { // Put NVDIMM into self-refresh - for (TargetHandleList::iterator it = i_nvdimmList.begin(); - it != i_nvdimmList.end();) + for (TargetHandleList::iterator it = io_nvdimmList.begin(); + it != io_nvdimmList.end();) { - l_err = nvdimmValidImage(*it, l_imgValid); - // No reason to run if we can't figure out - // if there is an image or not - if (l_err) - { - nvdimmSetStatusFlag(*it, NSTD_ERR_NOPRSV); - break; - } + // Default state during boot is unarmed, therefore not preserved + nvdimmSetStatusFlag(*it, NSTD_VAL_DISARMED); - if (!l_imgValid) - { - nvdimmSetStatusFlag(*it, NSTD_VAL_NOPRSV); - i_nvdimmList.erase(it); - continue; - } - - TARGETING::TargetHandleList l_mcaList; - getParentAffinityTargets(l_mcaList, *it, TARGETING::CLASS_UNIT, TARGETING::TYPE_MCA); + TargetHandleList l_mcaList; + getParentAffinityTargets(l_mcaList, *it, CLASS_UNIT, TYPE_MCA); assert(l_mcaList.size(), "nvdimmRestore() failed to find parent MCA."); fapi2::Target<fapi2::TARGET_TYPE_MCA> l_fapi_mca(l_mcaList[0]); @@ -850,21 +1208,40 @@ errlHndl_t nvdimmRestore(TargetHandleList i_nvdimmList, uint8_t &i_mpipl) // is de-asserted before kicking off the restore if (i_mpipl) { + TRACFCOMP(g_trac_nvdimm, "nvdimmRestore(): in MPIPL"); + + // To avoid PRD error during mpipl need to Mask MBACALFIR EventN + // Note: a regular IPL will already have this masked + maskMbacalfir_eventn(*it); + + // Call init for error checking skipped in the SAVE step + nvdimm_init(*it); + FAPI_INVOKE_HWP(l_err, mss::ddr_resetn, l_fapi_mca, HIGH); if (l_err) { TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmRestore() HUID[%X] i_mpipl[%u] failed to de-assert resetn!", - TARGETING::get_huid(*it), i_mpipl); - - nvdimmSetStatusFlag(*it, NSTD_ERR_NOPRSV); - //@TODO RTC 199645 - add HW callout on dimm target - // If we failed to de-assert reset_n, the dimm is pretty much useless. - // Let's not restore if that happens - // The callout will be added inside the HWP - // Leaving this comment here as a reminder, will remove later + get_huid(*it), i_mpipl); break; } + + // In MPIPL, invalidate the BAR to prevent any traffic from stepping on + // the restore + FAPI_INVOKE_HWP(l_err, mss::nvdimm::change_bar_valid_state, l_fapi_mca, LOW); + + // This should not fail at all (scom read/write). If it does, post an informational log + // to leave some breadcrumbs + if (l_err) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmRestore() HUID[%X] i_mpipl[%u] failed to invalidate BAR!", + get_huid(*it), i_mpipl); + + l_err->setSev(ERRORLOG::ERRL_SEV_INFORMATIONAL); + l_err->collectTrace( NVDIMM_COMP_NAME ); + ERRORLOG::errlCommit(l_err, NVDIMM_COMP_ID); + } + } // Self-refresh is done at the port level @@ -873,13 +1250,7 @@ errlHndl_t nvdimmRestore(TargetHandleList i_nvdimmList, uint8_t &i_mpipl) if (l_err) { TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmRestore() HUID[%X] self_refresh_entry failed!", - TARGETING::get_huid(*it)); - - nvdimmSetStatusFlag(*it, NSTD_ERR_NOPRSV); - //@TODO RTC 199645 - add HW callout on dimm target - // Without SRE the data could be not reliably restored - // The callout will be added inside the HWP - // Leaving this comment here as a reminder, will remove later + get_huid(*it)); break; } it++; @@ -890,21 +1261,14 @@ errlHndl_t nvdimmRestore(TargetHandleList i_nvdimmList, uint8_t &i_mpipl) break; } - // Nothing to do. Move on. - if (i_nvdimmList.empty()) - { - break; - } - // Kick off the restore on each nvdimm in the nvdimm list - for (const auto & l_nvdimm : i_nvdimmList) + for (const auto & l_nvdimm : io_nvdimmList) { l_err = nvdimmWriteReg(l_nvdimm, NVDIMM_FUNC_CMD, RESTORE_IMAGE); if (l_err) { - nvdimmSetStatusFlag(l_nvdimm, NSTD_ERR_NOPRSV); TRACFCOMP(g_trac_nvdimm, ERR_MRK"NDVIMM HUID[%X], error initiating restore!!", - TARGETING::get_huid(l_nvdimm)); + get_huid(l_nvdimm)); break; } } @@ -915,7 +1279,7 @@ errlHndl_t nvdimmRestore(TargetHandleList i_nvdimmList, uint8_t &i_mpipl) } // Make sure the restore completed - for (const auto & l_nvdimm : i_nvdimmList) + for (const auto & l_nvdimm : io_nvdimmList) { // Since we kicked off the restore on all the modules at once, the restore // should complete on all of the modules in one restore window. Use the @@ -923,10 +1287,8 @@ errlHndl_t nvdimmRestore(TargetHandleList i_nvdimmList, uint8_t &i_mpipl) l_err = nvdimmPollRestoreDone(l_nvdimm, l_poll); if (l_err) { - nvdimmSetStatusFlag(l_nvdimm, NSTD_ERR_NOPRSV); TRACFCOMP(g_trac_nvdimm, ERR_MRK"NDVIMM HUID[%X], error restoring!", - TARGETING::get_huid(l_nvdimm)); - errlCommit(l_err, NVDIMM_COMP_ID); + get_huid(l_nvdimm)); break; } } @@ -936,22 +1298,23 @@ errlHndl_t nvdimmRestore(TargetHandleList i_nvdimmList, uint8_t &i_mpipl) break; } - // Make sure the restore is valid - for (const auto & l_nvdimm : i_nvdimmList) + // Check for restore errors + for (TargetHandleList::iterator it = io_nvdimmList.begin(); + it != io_nvdimmList.end();) { - l_err = nvdimmGetRestoreValid(l_nvdimm, l_rstrValid); + l_err = nvdimmGetRestoreValid(*it, l_rstrValid); if (l_err) { - nvdimmSetStatusFlag(l_nvdimm, NSTD_ERR_NOPRSV); TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmRestore Target[%X] error validating restore status!", - TARGETING::get_huid(l_nvdimm)); + get_huid(*it)); break; } - if ((l_rstrValid & RSTR_SUCCESS) != RSTR_SUCCESS){ + if ((l_rstrValid & RSTR_ERROR) == RSTR_ERROR) + { - TRACFCOMP(g_trac_nvdimm, ERR_MRK"NDVIMM HUID[%X] restoreValid[%d], restore failed!", - TARGETING::get_huid(l_nvdimm), l_rstrValid); + TRACFCOMP(g_trac_nvdimm, ERR_MRK"NDVIMM HUID[%X] restore failed due to errors", + get_huid(*it)); /*@ *@errortype *@reasoncode NVDIMM_RESTORE_FAILED @@ -964,36 +1327,21 @@ errlHndl_t nvdimmRestore(TargetHandleList i_nvdimmList, uint8_t &i_mpipl) * restore timeout (Controller error) *@custdesc NVDIMM failed to restore data */ - l_err = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_UNRECOVERABLE, - NVDIMM_RESTORE, - NVDIMM_RESTORE_FAILED, - TARGETING::get_huid(l_nvdimm), - 0x0, - ERRORLOG::ErrlEntry::NO_SW_CALLOUT); - - l_err->collectTrace(NVDIMM_COMP_NAME, 1024 ); - nvdimmSetStatusFlag(l_nvdimm, NSTD_ERR_NOPRSV); - - // Invalid restore could be due to dram not in self-refresh - // or controller issue. Data should not be trusted at this point - l_err->addPartCallout( l_nvdimm, - HWAS::NV_CONTROLLER_PART_TYPE, - HWAS::SRCI_PRIORITY_HIGH); + l_err = new ERRORLOG::ErrlEntry( + ERRORLOG::ERRL_SEV_UNRECOVERABLE, + NVDIMM_RESTORE, + NVDIMM_RESTORE_FAILED, + get_huid(*it), + 0x0, + ERRORLOG::ErrlEntry::NO_SW_CALLOUT); + nvdimmAddPage4Regs(*it,l_err); + nvdimmAddVendorLog(*it, l_err); break; } - } - - if (l_err) - { - break; - } - // Exit self-refresh - for (const auto & l_nvdimm : i_nvdimmList) - { - - TARGETING::TargetHandleList l_mcaList; - getParentAffinityTargets(l_mcaList, l_nvdimm, TARGETING::CLASS_UNIT, TARGETING::TYPE_MCA); + // Exit self-refresh + TargetHandleList l_mcaList; + getParentAffinityTargets(l_mcaList, *it, CLASS_UNIT, TYPE_MCA); assert(l_mcaList.size(), "nvdimmRestore() failed to find parent MCA."); fapi2::Target<fapi2::TARGET_TYPE_MCA> l_fapi_mca(l_mcaList[0]); @@ -1005,16 +1353,48 @@ errlHndl_t nvdimmRestore(TargetHandleList i_nvdimmList, uint8_t &i_mpipl) if (l_err) { TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmRestore() HUID[%X] post_restore_transition failed!", - TARGETING::get_huid(l_nvdimm)); - - // Commit the error from the HWP - nvdimmSetStatusFlag(l_nvdimm, NSTD_ERR_NOPRSV); + get_huid(*it)); + nvdimmAddPage4Regs(*it,l_err); break; } else { // Restore success! - nvdimmSetStatusFlag(l_nvdimm, NSTD_VAL_PRSV); + // Remove dimm from list for error handling + it = io_nvdimmList.erase(it); + } + } + + if (l_err) + { + TRACFCOMP(g_trac_nvdimm, "nvdimmRestore() HUID[%X] encounrterd an error during restore"); + break; + } + + if (i_mpipl) + { + for (const auto & l_nvdimm : l_nvdimmList) + { + TargetHandleList l_mcaList; + errlHndl_t err = nullptr; + getParentAffinityTargets(l_mcaList, l_nvdimm, CLASS_UNIT, TYPE_MCA); + assert(l_mcaList.size(), "nvdimmRestore() failed to find parent MCA."); + + // Re-validate the BAR after restore + fapi2::Target<fapi2::TARGET_TYPE_MCA> l_fapi_mca(l_mcaList[0]); + FAPI_INVOKE_HWP(err, mss::nvdimm::change_bar_valid_state, l_fapi_mca, HIGH); + + // This should not fail at all (scom read/write). If it does, post an informational log + // to leave some breadcrumbs + if (err) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmRestore() HUID[%X] i_mpipl[%u] failed to invalidate BAR!", + get_huid(l_nvdimm), i_mpipl); + + err->setSev(ERRORLOG::ERRL_SEV_INFORMATIONAL); + err->collectTrace( NVDIMM_COMP_NAME ); + ERRORLOG::errlCommit(err, NVDIMM_COMP_ID); + } } } @@ -1027,66 +1407,124 @@ errlHndl_t nvdimmRestore(TargetHandleList i_nvdimmList, uint8_t &i_mpipl) #endif /** - * @brief This function checks the erase status register to make sure - * the last erase completed witout error + * @brief This function checks the status and success of an erase * * @param[in] i_nvdimm - nvdimm target with NV controller + * @param[in] i_statusOnly - check just the status register (not the image) * * @return errlHndl_t - Null if successful, otherwise a pointer to * the error log. */ -errlHndl_t nvdimmCheckEraseSuccess(Target *i_nvdimm) +errlHndl_t nvdimmEraseCheck(Target *i_nvdimm, bool i_statusOnly) { - TRACUCOMP(g_trac_nvdimm, ENTER_MRK"nvdimmCheckEraseSuccess() : nvdimm[%X]", - TARGETING::get_huid(i_nvdimm)); - - uint8_t l_data = 0; errlHndl_t l_err = nullptr; + nvdimm_reg_t l_RegInfo; + uint8_t l_data = 0; + bool l_valid = false; - l_err = nvdimmReadReg(i_nvdimm, ERASE_STATUS, l_data); + // Erase happens one module at a time. No need to set any offset on the counter + uint32_t l_poll = 0; + l_err = nvdimmPollEraseDone(i_nvdimm, l_poll); + // Add part callout, currently all erase calls have same callout + // Dump traces to the error log if error exists if (l_err) { - TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmCheckEraseSuccess() nvdimm[%X]" - "failed to read erase status reg!",TARGETING::get_huid(i_nvdimm)); + // For both Erase timeout and Erase fail + // Callout nvdimm on high, gard and deconfig + l_err->addHwCallout( i_nvdimm, + HWAS::SRCI_PRIORITY_HIGH, + HWAS::DECONFIG, + HWAS::GARD_Fatal); + + // Collect register data for FFDC Traces + nvdimmTraceRegs ( i_nvdimm, l_RegInfo ); + nvdimmAddPage4Regs(i_nvdimm,l_err); + + // Add reg traces to the error log + NVDIMM::UdNvdimmOPParms( l_RegInfo ).addToLog(l_err); } - else if ((l_data & ERASE_SUCCESS) != ERASE_SUCCESS) + else { + do + { + // Read Erase Status register + l_err = nvdimmReadReg ( i_nvdimm, ERASE_STATUS, l_data); + if (l_err) + { + nvdimmSetStatusFlag(i_nvdimm, NSTD_VAL_DISARMED); + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm[%X], failed to read erase status", + get_huid(i_nvdimm)); + break; + } - TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmCheckEraseSuccess() nvdimm[%X]" - "failed to erase!",TARGETING::get_huid(i_nvdimm)); - /*@ - *@errortype - *@reasoncode NVDIMM_ERASE_FAILED - *@severity ERRORLOG_SEV_PREDICTIVE - *@moduleid NVDIMM_CHECK_ERASE - *@userdata1[0:31] Related ops (0xff = NA) - *@userdata1[32:63] Target Huid - *@userdata2 <UNUSED> - *@devdesc Encountered error erasing previously stored data image - * on NVDIMM. Likely due to timeout and/or controller error - *@custdesc NVDIMM error erasing data image - */ - l_err = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_PREDICTIVE, - NVDIMM_CHECK_ERASE, - NVDIMM_ERASE_FAILED, - NVDIMM_SET_USER_DATA_1(ERASE, TARGETING::get_huid(i_nvdimm)), - 0x0, - ERRORLOG::ErrlEntry::NO_SW_CALLOUT ); + if (i_statusOnly) + { + // assume image is cleared, do not check + TRACFCOMP(g_trac_nvdimm, "nvdimmEraseCheck() - skipping image check for nvdimm[%X]", + get_huid(i_nvdimm)); + l_valid = false; + } + else + { + // Check for a valid image + l_err = nvdimmValidImage( i_nvdimm, l_valid ); + if (l_err) + { + nvdimmSetStatusFlag(i_nvdimm, NSTD_VAL_DISARMED); + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm[%X] Failed to detect valid image", + get_huid(i_nvdimm)); + break; + } + } - l_err->collectTrace(NVDIMM_COMP_NAME, 1024 ); - errlCommit( l_err, NVDIMM_COMP_ID ); + if ( (l_data & ERASE_ERROR) || l_valid ) + { + nvdimmSetStatusFlag(i_nvdimm, NSTD_VAL_DISARMED); + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm[%X] NVDimm Erase failed due to error (ERASE_STATUS: 0x%02X, Image %s)", + get_huid(i_nvdimm), l_data, l_valid?"not erased":"erased"); + /*@ + *@errortype + *@reasoncode NVDIMM_ERASE_ERROR + *@severity ERRORLOG_SEV_PREDICTIVE + *@moduleid NVDIMM_CHECK_ERASE + *@userdata1[0:31] ERASE_STATUS register + *@userdata1[32:63] Target Huid + *@userdata2 ERASE_ERROR status bit + *@userdata2 Image validity + *@devdesc Encountered error during image erase function + * on NVDIMM. Check error register trace for details + *@custdesc NVDIMM error during nvdimm erase + */ + l_err = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_PREDICTIVE, + NVDIMM_CHECK_ERASE, + NVDIMM_ERASE_ERROR, + NVDIMM_SET_USER_DATA_1(l_data, get_huid(i_nvdimm)), + NVDIMM_SET_USER_DATA_1(ERASE_ERROR, l_valid), + ERRORLOG::ErrlEntry::NO_SW_CALLOUT ); + + l_err->collectTrace( NVDIMM_COMP_NAME ); + break; + } - // Failure to erase could mean internal NV controller error and/or - // HW error on nand flash. NVDIMM will lose persistency if failed to - // erase nand flash - l_err->addPartCallout( i_nvdimm, - HWAS::NV_CONTROLLER_PART_TYPE, - HWAS::SRCI_PRIORITY_HIGH); - } + } while(0); - TRACUCOMP(g_trac_nvdimm, EXIT_MRK"nvdimmCheckEraseSuccess(): nvdimm[%X] ret[%X]", - TARGETING::get_huid(i_nvdimm), l_data); + if(l_err) + { + // Callout nvdimm on high, gard and deconfig + l_err->addHwCallout( i_nvdimm, + HWAS::SRCI_PRIORITY_HIGH, + HWAS::DECONFIG, + HWAS::GARD_Fatal); + + // Collect register data for FFDC Traces + nvdimmTraceRegs ( i_nvdimm, l_RegInfo ); + nvdimmAddPage4Regs(i_nvdimm,l_err); + + // Add reg traces to the error log + NVDIMM::UdNvdimmOPParms( l_RegInfo ).addToLog(l_err); + } + } return l_err; } @@ -1102,7 +1540,7 @@ errlHndl_t nvdimmCheckEraseSuccess(Target *i_nvdimm) errlHndl_t nvdimmEraseNF(Target *i_nvdimm) { TRACUCOMP(g_trac_nvdimm, ENTER_MRK"nvdimmEraseNF() nvdimm[%X]", - TARGETING::get_huid(i_nvdimm)); + get_huid(i_nvdimm)); errlHndl_t l_err = nullptr; @@ -1112,22 +1550,17 @@ errlHndl_t nvdimmEraseNF(Target *i_nvdimm) if (l_err) { TRACFCOMP(g_trac_nvdimm, ERR_MRK"NDVIMM HUID[%X] error initiating erase!!", - TARGETING::get_huid(i_nvdimm)); + get_huid(i_nvdimm)); break; } - // Erase happens one module at a time. No need to set any offset on the counter - uint32_t l_poll = 0; - l_err = nvdimmPollEraseDone(i_nvdimm, l_poll); - if (!l_err) - { - l_err = nvdimmCheckEraseSuccess(i_nvdimm); - } + // Poll for success, then check the status and image + l_err = nvdimmEraseCheck(i_nvdimm, false); }while(0); TRACUCOMP(g_trac_nvdimm, EXIT_MRK"nvdimmEraseNF() nvdimm[%X]", - TARGETING::get_huid(i_nvdimm)); + get_huid(i_nvdimm)); return l_err; } @@ -1146,15 +1579,15 @@ errlHndl_t nvdimmEraseNF(Target *i_nvdimm) errlHndl_t nvdimmOpenPage(Target *i_nvdimm, uint8_t i_page) { - TRACUCOMP(g_trac_nvdimm, ENTER_MRK"nvdimmOpenPage nvdimm[%X]", TARGETING::get_huid(i_nvdimm)); + TRACUCOMP(g_trac_nvdimm, ENTER_MRK"nvdimmOpenPage nvdimm[%X]", get_huid(i_nvdimm)); errlHndl_t l_err = nullptr; bool l_success = false; uint8_t l_data; uint32_t l_poll = 0; uint32_t l_target_timeout_values[6]; - assert(i_nvdimm->tryGetAttr<TARGETING::ATTR_NV_OPS_TIMEOUT_MSEC>(l_target_timeout_values), - "nvdimmOpenPage() HUID[%X], failed reading ATTR_NV_OPS_TIMEOUT_MSEC!", TARGETING::get_huid(i_nvdimm)); + assert(i_nvdimm->tryGetAttr<ATTR_NV_OPS_TIMEOUT_MSEC>(l_target_timeout_values), + "nvdimmOpenPage() HUID[%X], failed reading ATTR_NV_OPS_TIMEOUT_MSEC!", get_huid(i_nvdimm)); uint32_t l_timeout = l_target_timeout_values[PAGE_SWITCH]; @@ -1167,7 +1600,7 @@ errlHndl_t nvdimmOpenPage(Target *i_nvdimm, if (l_err) { TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmOpenPage nvdimm[%X]" - "error writing to page change reg", TARGETING::get_huid(i_nvdimm)); + "error writing to page change reg", get_huid(i_nvdimm)); break; } @@ -1200,7 +1633,7 @@ errlHndl_t nvdimmOpenPage(Target *i_nvdimm, if (!l_success && !l_err) { TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmOpenPage nvdimm[%X] openpage_success[%d]," - "failure to open page!", TARGETING::get_huid(i_nvdimm), static_cast<uint8_t>(l_success)); + "failure to open page!", get_huid(i_nvdimm), static_cast<uint8_t>(l_success)); /*@ *@errortype @@ -1215,25 +1648,28 @@ errlHndl_t nvdimmOpenPage(Target *i_nvdimm, *@custdesc Encountered error performing internal operaiton * on NVDIMM */ - l_err = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_UNRECOVERABLE, - NVDIMM_POLL_STATUS, - NVDIMM_STATUS_TIMEOUT, - NVDIMM_SET_USER_DATA_1(PAGE_SWITCH, TARGETING::get_huid(i_nvdimm)), - NVDIMM_SET_USER_DATA_2_TIMEOUT(l_poll, l_timeout), - ERRORLOG::ErrlEntry::NO_SW_CALLOUT ); + l_err = new ERRORLOG::ErrlEntry( + ERRORLOG::ERRL_SEV_UNRECOVERABLE, + NVDIMM_OPEN_PAGE, + NVDIMM_OPEN_PAGE_TIMEOUT, + NVDIMM_SET_USER_DATA_1(PAGE_SWITCH, get_huid(i_nvdimm)), + NVDIMM_SET_USER_DATA_2_TIMEOUT(l_poll, l_timeout), + ERRORLOG::ErrlEntry::NO_SW_CALLOUT ); l_err->collectTrace(NVDIMM_COMP_NAME, 256 ); + nvdimmAddVendorLog(i_nvdimm, l_err); // Failure to open page most likely means problem with // the NV controller. l_err->addPartCallout( i_nvdimm, HWAS::NV_CONTROLLER_PART_TYPE, HWAS::SRCI_PRIORITY_HIGH); + nvdimmAddPage4Regs(i_nvdimm,l_err); } }while(0); TRACUCOMP(g_trac_nvdimm, EXIT_MRK"nvdimmOpenPage nvdimm[%X] nvdimmOpenPage.success[%d]," - ,TARGETING::get_huid(i_nvdimm), static_cast<uint8_t>(l_success)); + ,get_huid(i_nvdimm), static_cast<uint8_t>(l_success)); return l_err; } @@ -1250,12 +1686,12 @@ errlHndl_t nvdimmOpenPage(Target *i_nvdimm, errlHndl_t nvdimmGetTimeoutVal(Target* i_nvdimm) { TRACUCOMP(g_trac_nvdimm, ENTER_MRK"nvdimmGetTimeoutVal() HUID[%X]" - ,TARGETING::get_huid(i_nvdimm)); + ,get_huid(i_nvdimm)); errlHndl_t l_err = nullptr; uint8_t l_data = 0; uint32_t timeout_map[6]; - i_nvdimm->tryGetAttr<TARGETING::ATTR_NV_OPS_TIMEOUT_MSEC>(timeout_map); + i_nvdimm->tryGetAttr<ATTR_NV_OPS_TIMEOUT_MSEC>(timeout_map); //Get the 6 main timeout values for (uint8_t i = SAVE; i <= CHARGE; i++) @@ -1282,28 +1718,33 @@ errlHndl_t nvdimmGetTimeoutVal(Target* i_nvdimm) if (l_err) { TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmGetTimeoutVal() HUID[%X] " - "error reading timeout value for op[%d]!", TARGETING::get_huid(i_nvdimm), i); + "error reading timeout value for op[%d]!", get_huid(i_nvdimm), i); break; } //Converting to msec depending on bit 15. 1 = sec, 0 = msec //except for charge. Charge is only in seconds so convert anyway + //Double the timeout values for margins if (timeout_map[i] >= 0x8000 || i == CHARGE){ timeout_map[i] = timeout_map[i] & 0x7FFF; - timeout_map[i] = timeout_map[i] * MS_PER_SEC; + timeout_map[i] = timeout_map[i] * MS_PER_SEC * 2; + } + else + { + timeout_map[i] = timeout_map[i] * 2; } TRACUCOMP(g_trac_nvdimm, "nvdimmGetTimeoutVal() HUID[%X], timeout_idx[%d], timeout_ms[%d]" - ,TARGETING::get_huid(i_nvdimm), timeoutInfoTable[i].idx, timeout_map[i]); + ,get_huid(i_nvdimm), timeoutInfoTable[i].idx, timeout_map[i]); } if (!l_err) { - i_nvdimm->setAttr<TARGETING::ATTR_NV_OPS_TIMEOUT_MSEC>(timeout_map); + i_nvdimm->setAttr<ATTR_NV_OPS_TIMEOUT_MSEC>(timeout_map); } TRACUCOMP(g_trac_nvdimm, EXIT_MRK"nvdimmGetTimeoutVal() HUID[%X]" - ,TARGETING::get_huid(i_nvdimm)); + ,get_huid(i_nvdimm)); return l_err; } @@ -1327,8 +1768,8 @@ errlHndl_t nvdimmEpowSetup(TargetHandleList &i_nvdimmList) for (TargetHandleList::iterator it = i_nvdimmList.begin(); it != i_nvdimmList.end();) { - TARGETING::TargetHandleList l_mcaList; - getParentAffinityTargets(l_mcaList, *it, TARGETING::CLASS_UNIT, TARGETING::TYPE_MCA); + TargetHandleList l_mcaList; + getParentAffinityTargets(l_mcaList, *it, CLASS_UNIT, TYPE_MCA); assert(l_mcaList.size(), "nvdimmEpowSetup() failed to find parent MCA."); fapi2::Target<fapi2::TARGET_TYPE_MCA> l_fapi_mca(l_mcaList[0]); @@ -1340,9 +1781,10 @@ errlHndl_t nvdimmEpowSetup(TargetHandleList &i_nvdimmList) if (l_err) { TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmEpowSetup() HUID[%X] failed to setup epow!", - TARGETING::get_huid(*it)); + get_huid(*it)); - nvdimmSetStatusFlag(*it, NSTD_ERR_NOPRSV); + nvdimmSetStatusFlag(*it, NSTD_VAL_SR_FAILED); + nvdimmAddPage4Regs(*it,l_err); break; } it++; @@ -1354,6 +1796,7 @@ errlHndl_t nvdimmEpowSetup(TargetHandleList &i_nvdimmList) return l_err; } + /** * @brief Entry function to NVDIMM restore * - Restore image from NVDIMM NAND flash to DRAM @@ -1365,31 +1808,21 @@ errlHndl_t nvdimmEpowSetup(TargetHandleList &i_nvdimmList) void nvdimm_restore(TargetHandleList &i_nvdimmList) { TRACUCOMP(g_trac_nvdimm, ENTER_MRK"nvdimm_restore()"); + errlHndl_t l_err = nullptr; + bool l_valid = false; + bool l_continue = true; TARGETING::Target* l_sys = nullptr; TARGETING::targetService().getTopLevelTarget( l_sys ); assert(l_sys, "nvdimm_restore: no TopLevelTarget"); uint8_t l_mpipl = l_sys->getAttr<ATTR_IS_MPIPL_HB>(); + nvdimm_reg_t l_RegInfo = nvdimm_reg_t(); + TargetHandleList l_nvdimm_restore_list = i_nvdimmList; + uint8_t l_rstrValid; do { - // Set the energy policy to device-managed - // Don't think this is needed for the supercaps to start charging - // but do it anyway to get the charging going - for (const auto & l_nvdimm : i_nvdimmList) - { - l_err = nvdimmSetESPolicy(l_nvdimm); - if (l_err) - { - // Failing this is an indication of power pack issue. - // This will prevent future backup, but let's continue - // since we can still restore the data if there is any - nvdimmSetStatusFlag(l_nvdimm, NSTD_ERR_NOBKUP); - TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm_restore() - Failing nvdimmSetESPolicy()"); - errlCommit( l_err, NVDIMM_COMP_ID ); - } - } - + // Check MPIPL case first to make sure any on-going backup is complete if (l_mpipl) { // During MPIPL, make sure any in-progress save is completed before proceeding @@ -1401,41 +1834,118 @@ void nvdimm_restore(TargetHandleList &i_nvdimmList) if (l_err) { - nvdimmSetStatusFlag(l_nvdimm, NSTD_ERR_NOPRSV); + nvdimmSetStatusFlag(l_nvdimm, NSTD_VAL_ERASED); TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm_restore() nvdimm[%X], error backing up the DRAM!", - TARGETING::get_huid(l_nvdimm)); + get_huid(l_nvdimm)); errlCommit(l_err, NVDIMM_COMP_ID); break; } } } + // Compile a list of nvdimms with valid image + // TODO: Reach out to RAS on how to handle odd number of nvdimms + // since we always operate in pairs + for (TargetHandleList::iterator it = l_nvdimm_restore_list.begin(); + it != l_nvdimm_restore_list.end();) + { + // Check for a valid image + l_err = nvdimmValidImage( *it, l_valid ); + if (l_err) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm_restore() nvdimm[%X] Failed to detect valid image", get_huid(*it)); + errlCommit(l_err, NVDIMM_COMP_ID); + } + + // Remove it from the restore list if there is no valid image + if (!l_valid) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm_restore() nvdimm[%X] No valid image discovered", get_huid(*it)); + // Set ATTR NV STATUS FLAG to Erased + nvdimmSetStatusFlag(*it, NSTD_VAL_ERASED); + it = l_nvdimm_restore_list.erase(it); + + } + else + { + it++; + } + } + + // Exit if there is nothing to restore + if (l_nvdimm_restore_list.empty()) + { + break; + } + // Start the restore - l_err = nvdimmRestore(i_nvdimmList, l_mpipl); + l_err = nvdimmRestore(l_nvdimm_restore_list, l_mpipl); + // Check if restore completed successfully if (l_err) { + const auto l_nvdimm = l_nvdimm_restore_list.front(); + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm_restore() - Failing nvdimmRestore()"); - errlCommit( l_err, NVDIMM_COMP_ID ); + nvdimmSetStatusFlag(l_nvdimm, NSTD_VAL_SR_FAILED); + + // Invalid restore could be due to dram not in self-refresh + // or controller issue. Data should not be trusted at this point + l_err->addHwCallout( l_nvdimm, + HWAS::SRCI_PRIORITY_HIGH, + HWAS::DECONFIG, + HWAS::GARD_Fatal); + + // Collect register data for FFDC Traces + nvdimmTraceRegs ( l_nvdimm, l_RegInfo ); + nvdimmAddPage4Regs(l_nvdimm,l_err); + + // Add reg traces to the error log + NVDIMM::UdNvdimmOPParms( l_RegInfo ).addToLog(l_err); break; } - // Make sure the energy source is fully charged before erasing the images - // Doing this on all the nvdimms since the ones w/o image will need - // to be fully charged before arming the trigger - uint32_t l_poll = 0; + // Check health status registers and exit if required for (const auto & l_nvdimm : i_nvdimmList) { - l_err = nvdimmPollESChargeStatus(l_nvdimm, l_poll); + // Post restore health check. l_continue gets set per the health check logic + // and used later to determine if boot shall continue on error condition + l_err = nvdimmHealthStatusCheck( l_nvdimm, HEALTH_RESTORE, l_continue ); - if (l_err){ - nvdimmSetStatusFlag(l_nvdimm, NSTD_ERR_NOBKUP); + if (l_err) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm_restore() nvdimm[%X] failed during health status check", get_huid(l_nvdimm)); errlCommit( l_err, NVDIMM_COMP_ID ); + if (!l_continue) + { + break; + } } + + // Make sure the restore is valid + l_err = nvdimmGetRestoreValid(l_nvdimm, l_rstrValid); + if (l_err) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm_restore Target[%X] error validating restore status!", + get_huid(l_nvdimm)); + break; + } + + if ((l_rstrValid & RSTR_SUCCESS) == RSTR_SUCCESS) + { + // Restore success! + nvdimmSetStatusFlag(l_nvdimm, NSTD_VAL_RESTORED); + } + } }while(0); + if (l_err) + { + errlCommit(l_err, NVDIMM_COMP_ID); + } + // At the end, pre-load CCS with commands for EPOW. This will stage the CCS // with the require commands to trigger the save on NVDIMMs. The actual // triggering will be done by OCC when EPOW is detected. @@ -1455,6 +1965,7 @@ void nvdimm_restore(TargetHandleList &i_nvdimmList) * - Checks for ready state * - Gathers timeout values * - Waits for the ongoing backup to complete + * - Unlocks encryption * - Disarms the trigger for draminit * * @param[in] i_nvdimm - nvdimm target @@ -1463,66 +1974,3903 @@ void nvdimm_restore(TargetHandleList &i_nvdimmList) void nvdimm_init(Target *i_nvdimm) { TRACUCOMP(g_trac_nvdimm, ENTER_MRK"nvdimm_init() nvdimm[%X]", - TARGETING::get_huid(i_nvdimm)); + get_huid(i_nvdimm)); errlHndl_t l_err = nullptr; + bool l_continue = true; + uint8_t l_data = 0; + uint8_t l_failinfo0 = 0; + uint8_t l_failinfo1 = 0; + nvdimm_reg_t l_RegInfo; + uint32_t l_poll = 0; do { - l_err = nvdimmReady(i_nvdimm); + // Force a factory reset if told to via attribute override + // This will allow us to recover from bad images, lost keys, etc + Target* l_sys = nullptr; + targetService().getTopLevelTarget( l_sys ); + assert(l_sys, "nvdimm_init: no TopLevelTarget"); + if( l_sys->getAttr<ATTR_FORCE_NVDIMM_RESET>() ) + { + l_err = nvdimm_factory_reset(i_nvdimm); + if (l_err) + { + nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR); + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm_init() nvdimm[%X], factory reset failed", + get_huid(i_nvdimm)); + errlCommit(l_err, NVDIMM_COMP_ID); + } + } + // Set ATTR_NV_STATUS_FLAG to default disarmed state + l_err = notifyNvdimmProtectionChange(i_nvdimm, NVDIMM_DISARMED); if (l_err) { nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR); - TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm_int() nvdimm[%X], controller not ready", - TARGETING::get_huid(i_nvdimm)); errlCommit(l_err, NVDIMM_COMP_ID); + } + + // Check if the nvdimm ready status + l_err = nvdimmReady(i_nvdimm); + + if (l_err) + { + nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR); + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm_init() nvdimm[%X], controller not ready", + get_huid(i_nvdimm)); break; } + // Check if the firmware slot is 0 + l_err = nvdimmGetRunningSlot(i_nvdimm, l_data); + if (l_err) + { + nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR); + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm_init() nvdimm[%X], failed to read slot info", + get_huid(i_nvdimm)); + break; + } + + if (l_data == 0) + { + nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR_VAL_SR); + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm_init() nvdimm[%X], running on fw slot 0", + get_huid(i_nvdimm)); + /*@ + *@errortype + *@reasoncode NVDIMM_INVALID_FW_SLOT + *@severity ERRORLOG_SEV_PREDICTIVE + *@moduleid NVDIMM_CHECK_FW_SLOT + *@userdata1[0:31] Slot running + *@userdata1[32:63] Target Huid + *@userdata2 <UNUSED> + *@devdesc Encountered error when checking the firmware slot running + * on NVDIMM. Firmware is running on slot 0 instead of 1 + *@custdesc NVDIMM incorrect firmware slot + */ + l_err = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_PREDICTIVE, + NVDIMM_CHECK_FW_SLOT, + NVDIMM_INVALID_FW_SLOT, + NVDIMM_SET_USER_DATA_1(l_data, get_huid(i_nvdimm)), + 0x0, + ERRORLOG::ErrlEntry::NO_SW_CALLOUT ); + + l_err->collectTrace( NVDIMM_COMP_NAME ); + + // Add callout of nvdimm with no deconfig/gard + l_err->addHwCallout( i_nvdimm, + HWAS::SRCI_PRIORITY_LOW, + HWAS::NO_DECONFIG, + HWAS::GARD_NULL); + + errlCommit(l_err, NVDIMM_COMP_ID); + } + // Get the timeout values for the major ops at init l_err = nvdimmGetTimeoutVal(i_nvdimm); if (l_err) { nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR); - TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm_int() nvdimm[%X], error retrieving timeout values", - TARGETING::get_huid(i_nvdimm)); - errlCommit(l_err, NVDIMM_COMP_ID); + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm_init() nvdimm[%X], error retrieving timeout values", + get_huid(i_nvdimm)); break; } - //Check save progress - uint32_t l_poll = 0; - l_err = nvdimmPollBackupDone(i_nvdimm, l_poll); + // Check for Erase in progress and verify good status + l_err = nvdimmEraseCheck(i_nvdimm, true); + if (l_err) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm_init() nvdimm[%X], error checking erase status", + get_huid(i_nvdimm)); + break; + } + // Check NO_RESET_N bit for power loss without save + l_err = nvdimmReadReg ( i_nvdimm, CSAVE_FAIL_INFO1, l_data); if (l_err) { - nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR_NOPRSV); - TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm_int() nvdimm[%X], error backing up the DRAM!", - TARGETING::get_huid(i_nvdimm)); - errlCommit(l_err, NVDIMM_COMP_ID); break; } + else if ((l_data & NO_RESET_N) == NO_RESET_N) + { + // Set ATTR_NV_STATUS_FLAG to partial working as data may persist + nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR_VAL_SR); + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmInit() nvdimm[%X]" + "failed to save due to power loss!",get_huid(i_nvdimm)); + /*@ + *@errortype + *@reasoncode NVDIMM_POWER_SAVE_FAILURE + *@severity ERRORLOG_SEV_PREDICTIVE + *@moduleid NVDIMM_CHECK_RESETN + *@userdata1[0:31] Related ops (0xff = NA) + *@userdata1[32:63] Target Huid + *@userdata2 <UNUSED> + *@devdesc NO_RESET_N: The NVDIMM experienced a power loss, but no CSAVE + * was triggered since the NVDIMM did not detect an asserted + * RESET_N. If there is a prior predicitve log for OCC in safe + * mode, than this would be the reason for NO_RESET_N. Otherwise + * there could be a problem with the RESET_N signal between proc + * and NVDIMM. + *@custdesc NVDIMM error erasing data image + */ + l_err = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_PREDICTIVE, + NVDIMM_CHECK_RESETN, + NVDIMM_POWER_SAVE_FAILURE, + NVDIMM_SET_USER_DATA_1(l_data, get_huid(i_nvdimm)), + 0x0, + ERRORLOG::ErrlEntry::NO_SW_CALLOUT ); + + l_err->collectTrace( NVDIMM_COMP_NAME ); + nvdimmAddVendorLog(i_nvdimm, l_err); + + // Failure to erase could mean internal NV controller error and/or + // HW error on nand flash. NVDIMM will lose persistency if failed to + // erase nand flash + l_err->addHwCallout( i_nvdimm, + HWAS::SRCI_PRIORITY_LOW, + HWAS::NO_DECONFIG, + HWAS::GARD_NULL); + + // Collect register data for FFDC Traces + nvdimmTraceRegs ( i_nvdimm, l_RegInfo ); + nvdimmAddPage4Regs(i_nvdimm,l_err); + + // Add reg traces to the error log + NVDIMM::UdNvdimmOPParms( l_RegInfo ).addToLog(l_err); - // Disarm the ddr_resetn here in case it came in armed. When the nvdimm is - // armed the reset_n is masked off from the host, meaning the drams won't - // be able to get reset properly later, causing training to fail. - l_err = nvdimmChangeArmState(i_nvdimm, DISARM_TRIGGER); + errlCommit(l_err, NVDIMM_COMP_ID); + } + else + { + // Check save progress + l_err = nvdimmPollBackupDone(i_nvdimm, l_poll); + if (l_err) + { + // May have to move the error handling to the caller + // as different op could have different error severity + l_err->addHwCallout( i_nvdimm, + HWAS::SRCI_PRIORITY_HIGH, + HWAS::DECONFIG, + HWAS::GARD_Fatal); + + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm_int() nvdimm[%X], error backing up the DRAM!", + get_huid(i_nvdimm)); + break; + } + } + // Check CSAVE FAIL INFO registers for fail errors + l_err = nvdimmReadReg( i_nvdimm, CSAVE_FAIL_INFO0, l_failinfo0 ); + if (l_err) + { + break; + } + l_err = nvdimmReadReg ( i_nvdimm, CSAVE_FAIL_INFO1, l_failinfo1 ); + if (l_err) + { + break; + } + // Apply mask for relevant 1:6 bits to failinfo1 + l_failinfo1 &= CSAVE_FAIL_BITS_MASK; + + // Check CSAVE_STATUS Register + l_err = nvdimmReadReg( i_nvdimm, CSAVE_STATUS, l_data ); if (l_err) { - nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR_NOPRSV); - TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm_init() nvdimm[%X], error disarming the nvdimm!", - TARGETING::get_huid(i_nvdimm)); - errlCommit(l_err, NVDIMM_COMP_ID); break; } + else if ((l_data == SAVE_ERROR) && ((l_failinfo0 != ZERO) || (l_failinfo1 != ZERO))) + { + /*@ + *@errortype + *@reasoncode NVDIMM_CSAVE_ERROR + *@severity ERRORLOG_SEV_PREDICTIVE + *@moduleid NVDIMM_CHECK_CSAVE + *@userdata1[0:31] Related ops (0xff = NA) + *@userdata1[32:63] Target Huid + *@userdata2 <UNUSED> + *@devdesc Encountered error saving during catastrophic save + * on NVDIMM. Check error register trace for details + *@custdesc NVDIMM error during Catastrophic Save + */ + l_err = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_PREDICTIVE, + NVDIMM_CHECK_CSAVE, + NVDIMM_CSAVE_ERROR, + NVDIMM_SET_USER_DATA_1(l_data, get_huid(i_nvdimm)), + 0x0, + ERRORLOG::ErrlEntry::NO_SW_CALLOUT ); + + l_err->collectTrace( NVDIMM_COMP_NAME ); + + // Collect register data for FFDC Traces + nvdimmTraceRegs ( i_nvdimm, l_RegInfo ); + nvdimmAddPage4Regs(i_nvdimm,l_err); + nvdimmAddVendorLog(i_nvdimm, l_err); + + // Add reg traces to the error log + NVDIMM::UdNvdimmOPParms( l_RegInfo ).addToLog(l_err); + + // Check if the image is still valid + if ( l_RegInfo.CSave_Info != VALID_IMAGE ) + { + // Callout and gard dimm if image is not valid + l_err->addHwCallout( i_nvdimm, + HWAS::SRCI_PRIORITY_HIGH, + HWAS::DECONFIG, + HWAS::GARD_Fatal); + break; + } + else + { + // Callout dimm without gard if image is valid + l_err->addHwCallout( i_nvdimm, + HWAS::SRCI_PRIORITY_LOW, + HWAS::NO_DECONFIG, + HWAS::GARD_NULL); + + // Set ATTR_NV_STATUS_FLAG to partial working as data may persist + nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR_VAL_SR); + errlCommit(l_err, NVDIMM_COMP_ID); + } + } + + // Check Health Status Registers + l_err = nvdimmHealthStatusCheck(i_nvdimm, HEALTH_SAVE, l_continue); + if(!l_continue) + { + break; + } + + // Unlock encryption if enabled + TargetHandleList l_nvdimmTargetList; + l_nvdimmTargetList.push_back(i_nvdimm); + NVDIMM::nvdimm_encrypt_unlock(l_nvdimmTargetList); }while(0); TRACUCOMP(g_trac_nvdimm, EXIT_MRK"nvdimm_init() nvdimm[%X]", - TARGETING::get_huid(i_nvdimm)); + get_huid(i_nvdimm)); + + if (l_err) + { + l_err->collectTrace( NVDIMM_COMP_NAME ); + errlCommit(l_err, NVDIMM_COMP_ID); + } } + + +void nvdimm_thresholds(TARGETING::TargetHandleList &i_nvdimmList) +{ + TRACUCOMP(g_trac_nvdimm, ENTER_MRK"nvdimm_thresholds()"); + + errlHndl_t l_err = nullptr; + + for (const auto & l_nvdimm : i_nvdimmList) + { + // ES_LIFETIME_WARNING_THRESHOLD + l_err = nvdimmWriteReg(l_nvdimm, + ES_LIFETIME_WARNING_THRESHOLD, + THRESHOLD_ES_LIFETIME); + if (l_err) + { + TRACFCOMP(g_trac_nvdimm, + ERR_MRK"nvdimm_thresholds() nvdimm[%X] " + "error setting ES_LIFETIME_WARNING_THRESHOLD", + get_huid(l_nvdimm)); + errlCommit( l_err, NVDIMM_COMP_ID ); + } + + // NVM_LIFETIME_WARNING_THRESHOLD + l_err = nvdimmWriteReg(l_nvdimm, + NVM_LIFETIME_WARNING_THRESHOLD, + THRESHOLD_NVM_LIFETIME); + if (l_err) + { + TRACFCOMP(g_trac_nvdimm, + ERR_MRK"nvdimm_thresholds() nvdimm[%X] " + "error setting NVM_LIFETIME_WARNING_THRESHOLD", + get_huid(l_nvdimm)); + errlCommit( l_err, NVDIMM_COMP_ID ); + } + + // ES_TEMP_WARNING_HIGH_THRESHOLD1 + l_err = nvdimmWriteReg(l_nvdimm, + ES_TEMP_WARNING_HIGH_THRESHOLD1, + THRESHOLD_ES_TEMP_HIGH_1); + if (l_err) + { + TRACFCOMP(g_trac_nvdimm, + ERR_MRK"nvdimm_thresholds() nvdimm[%X] " + "error setting ES_TEMP_WARNING_HIGH_THRESHOLD1", + get_huid(l_nvdimm)); + errlCommit( l_err, NVDIMM_COMP_ID ); + } + + // ES_TEMP_WARNING_HIGH_THRESHOLD0 + l_err = nvdimmWriteReg(l_nvdimm, + ES_TEMP_WARNING_HIGH_THRESHOLD0, + THRESHOLD_ES_TEMP_HIGH_0); + if (l_err) + { + TRACFCOMP(g_trac_nvdimm, + ERR_MRK"nvdimm_thresholds() nvdimm[%X] " + "error setting ES_TEMP_WARNING_HIGH_THRESHOLD0", + get_huid(l_nvdimm)); + errlCommit( l_err, NVDIMM_COMP_ID ); + } + + // ES_TEMP_WARNING_LOW_THRESHOLD1 + l_err = nvdimmWriteReg(l_nvdimm, + ES_TEMP_WARNING_LOW_THRESHOLD1, + THRESHOLD_ES_TEMP_LOW_1); + if (l_err) + { + TRACFCOMP(g_trac_nvdimm, + ERR_MRK"nvdimm_thresholds() nvdimm[%X] " + "error setting ES_TEMP_WARNING_LOW_THRESHOLD1", + get_huid(l_nvdimm)); + errlCommit( l_err, NVDIMM_COMP_ID ); + } + + // ES_TEMP_WARNING_LOW_THRESHOLD0 + l_err = nvdimmWriteReg(l_nvdimm, + ES_TEMP_WARNING_LOW_THRESHOLD0, + THRESHOLD_ES_TEMP_LOW_0); + if (l_err) + { + TRACFCOMP(g_trac_nvdimm, + ERR_MRK"nvdimm_thresholds() nvdimm[%X] " + "error setting ES_TEMP_WARNING_LOW_THRESHOLD0", + get_huid(l_nvdimm)); + errlCommit( l_err, NVDIMM_COMP_ID ); + } + } + + TRACUCOMP(g_trac_nvdimm, EXIT_MRK"nvdimm_thresholds()"); +} + + +errlHndl_t nvdimm_getRandom(uint8_t* o_genData) +{ + errlHndl_t l_err = nullptr; + uint8_t l_xtraData[ENC_KEY_SIZE] = {0}; + + do + { + // Get a pointer to the TPM + Target* l_tpm = nullptr; + l_err = nvdimm_getTPM(l_tpm); + if (l_err) + { + break; + } + + // Get a random number from the TPM + l_err = TRUSTEDBOOT::GetRandom(l_tpm, ENC_KEY_SIZE, o_genData); + if (l_err) + { + break; + } + + // Validate and update the random number + // Retry if more randomness required + do + { + //Get replacement data + l_err = TRUSTEDBOOT::GetRandom(l_tpm, ENC_KEY_SIZE, l_xtraData); + if (l_err) + { + break; + } + + }while (nvdimm_keyifyRandomNumber(o_genData, l_xtraData)); + + } while(0); + + return l_err; +} + + +errlHndl_t nvdimm_getTPM(Target*& o_tpm) +{ + errlHndl_t l_err = nullptr; + + do + { + // Get all functional TPMs + TargetHandleList l_tpmList; + TRUSTEDBOOT::getTPMs(l_tpmList, + TRUSTEDBOOT::TPM_FILTER::ALL_FUNCTIONAL); + + if (l_tpmList.size()) + { + o_tpm = l_tpmList[0]; + break; + } + + // No TPMs, generate error + TRACFCOMP(g_trac_nvdimm,ERR_MRK"nvdimm_getTPM() No functional TPMs found"); + + /*@ + *@errortype + *@reasoncode NVDIMM_TPM_NOT_FOUND + *@severity ERRORLOG_SEV_PREDICTIVE + *@moduleid NVDIMM_GET_TPM + *@devdesc Functional TPM required to generate encryption keys + *@custdesc NVDIMM error generating encryption keys + */ + l_err = new ERRORLOG::ErrlEntry( + ERRORLOG::ERRL_SEV_PREDICTIVE, + NVDIMM_GET_TPM, + NVDIMM_TPM_NOT_FOUND, + 0x0, + 0x0, + ERRORLOG::ErrlEntry::NO_SW_CALLOUT ); + + l_err->collectTrace(NVDIMM_COMP_NAME); + + // Get all TPMs + TRUSTEDBOOT::getTPMs(l_tpmList, + TRUSTEDBOOT::TPM_FILTER::ALL_IN_BLUEPRINT); + if (l_tpmList.size() == 0) + { + // No TPMs, we probably have nvdimms enabled + // when they should not be + l_err->addProcedureCallout( + HWAS::EPUB_PRC_HB_CODE, + HWAS::SRCI_PRIORITY_HIGH); + } + else + { + // If a TPM exists it must be deconfigured + l_err->addProcedureCallout( + HWAS::EPUB_PRC_FIND_DECONFIGURED_PART, + HWAS::SRCI_PRIORITY_HIGH); + l_err->addProcedureCallout( + HWAS::EPUB_PRC_HB_CODE, + HWAS::SRCI_PRIORITY_MED); + } + + }while(0); + + // Functional TPM not found + return l_err; +} + + +#endif + + +/** + * @brief Force a factory reset of the NV logic and flash + * + * @param[in] i_nvdimm - NVDIMM Target + */ +errlHndl_t nvdimm_factory_reset(Target *i_nvdimm) +{ + TRACFCOMP(g_trac_nvdimm, ENTER_MRK"nvdimm_factory_reset() nvdimm[%X]", + get_huid(i_nvdimm)); + errlHndl_t l_err = nullptr; + + do + { + // Send the reset command + l_err = nvdimmWriteReg(i_nvdimm, NVDIMM_FUNC_CMD, FACTORY_DEFAULT); + if( l_err ) + { + break; + } + + // Poll 2 minutes for completion + // We could get the timeout value from the dimm but since we're + // doing a hard reset anyway I just want to use a big number that + // can handle any lies that the controller might tell us. + uint8_t l_data = 0; + constexpr uint64_t MAX_POLL_SECONDS = 120; + uint64_t poll = 0; + for( poll = 0; poll < MAX_POLL_SECONDS; poll++ ) + { + l_err = nvdimmReadReg(i_nvdimm, NVDIMM_CMD_STATUS0, l_data); + if( l_err ) + { + break; + } + + if( l_data != FACTORY_RESET_IN_PROGRESS ) + { + break; + } + +#ifndef __HOSTBOOT_RUNTIME + // kick the watchdog since this can take awhile + INITSERVICE::sendProgressCode(); #endif + // sleep 1 second + nanosleep(1, 0); + } + if( l_err ) { break; } + + // Make an error if it never finished + if( poll >= MAX_POLL_SECONDS ) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm_factory_reset() nvdimm[%X] - factory reset never completed[%d]", + get_huid(i_nvdimm), l_data); + /*@ + *@errortype + *@reasoncode NVDIMM_NOT_READY + *@severity ERRORLOG_SEV_UNRECOVERABLE + *@moduleid NVDIMM_FACTORY_RESET + *@userdata1[0:31] Ret value from ready register + *@userdata1[32:63] Target Huid + *@userdata2 Number of seconds waited + *@devdesc NVDIMM factory reset never completed + *@custdesc NVDIMM still in reset + */ + l_err = new ERRORLOG::ErrlEntry( + ERRORLOG::ERRL_SEV_UNRECOVERABLE, + NVDIMM_FACTORY_RESET, + NVDIMM_NOT_READY, + NVDIMM_SET_USER_DATA_1(l_data, get_huid(i_nvdimm)), + MAX_POLL_SECONDS, + ERRORLOG::ErrlEntry::NO_SW_CALLOUT ); + + l_err->collectTrace(NVDIMM_COMP_NAME); + nvdimmAddVendorLog(i_nvdimm, l_err); + + // If nvdimm is not ready for access by now, this is + // a failing indication on the NV controller + l_err->addPartCallout( i_nvdimm, + HWAS::NV_CONTROLLER_PART_TYPE, + HWAS::SRCI_PRIORITY_HIGH); + nvdimmAddPage4Regs(i_nvdimm,l_err); + } + } while(0); + + return l_err; +} + + +bool nvdimm_encrypt_unlock(TargetHandleList &i_nvdimmList) +{ + TRACFCOMP(g_trac_nvdimm, ENTER_MRK"nvdimm_encrypt_unlock()"); + errlHndl_t l_err = nullptr; + bool l_success = true; + + do + { + // Do not check ATTR_NVDIMM_ENCRYPTION_ENABLE + // The attribute could have been reset by flashing the FSP + // Unlock if the keys are valid and NVDIMM hw encryption is enabled + + // Get the sys pointer, attribute keys are system level + Target* l_sys = nullptr; + targetService().getTopLevelTarget( l_sys ); + assert(l_sys, "nvdimm_encrypt_unlock() no TopLevelTarget"); + + // Get the FW key attributes + auto l_attrKeysFw = + l_sys->getAttrAsStdArr<ATTR_NVDIMM_ENCRYPTION_KEYS_FW>(); + + // Cast to key data struct type for easy access to each key + nvdimmKeyData_t* l_keysFw = + reinterpret_cast<nvdimmKeyData_t*>(&l_attrKeysFw); + + // Check encryption unlock for all nvdimms + for (const auto & l_nvdimm : i_nvdimmList) + { + // Get encryption state in the config/status reg + encryption_config_status_t l_encStatus = {0}; + l_err = nvdimmReadReg(l_nvdimm, + ENCRYPTION_CONFIG_STATUS, + l_encStatus.whole); + if (l_err) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm_encrypt_unlock() nvdimm[%X] error reading ENCRYPTION_CONFIG_STATUS",get_huid(l_nvdimm)); + errlCommit( l_err, NVDIMM_COMP_ID ); + nvdimmSetEncryptionError(l_nvdimm); + l_success = false; + continue; + } + + // Already unlocked or not enabled then exit + if (l_encStatus.encryption_unlocked || + !l_encStatus.encryption_enabled) + { + break; + } + + // Check for valid key attribute data + l_err = nvdimm_checkValidAttrKeys(l_keysFw); + if (l_err) + { + errlCommit( l_err, NVDIMM_COMP_ID ); + nvdimmSetEncryptionError(l_nvdimm); + l_success = false; + break; + } + + // Else encryption is enabled but needs unlock + TRACFCOMP(g_trac_nvdimm, "nvdimm_encrypt_unlock() nvdimm[%X] enabled, unlocking...",get_huid(l_nvdimm)); + + // Set the Unlock Access Key Reg + l_err = nvdimm_setKeyReg(l_nvdimm, + l_keysFw->ak, + ENCRYPTION_ACCESS_KEY_UNLOCK, + ENCRYPTION_ACCESS_KEY_VERIFY, + false); + if (l_err) + { + errlCommit( l_err, NVDIMM_COMP_ID ); + nvdimmSetEncryptionError(l_nvdimm); + l_success = false; + continue; + } + + // Verify encryption is unlocked + l_err = nvdimmReadReg(l_nvdimm, + ENCRYPTION_CONFIG_STATUS, + l_encStatus.whole); + if (l_err) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm_encrypt_unlock() nvdimm[%X] error reading ENCRYPTION_CONFIG_STATUS after unlock",get_huid(l_nvdimm)); + errlCommit( l_err, NVDIMM_COMP_ID ); + nvdimmSetEncryptionError(l_nvdimm); + l_success = false; + continue; + } + + if (!l_encStatus.encryption_unlocked) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm_encrypt_unlock() nvdimm[%X] encryption unlock failed, expected ENCRYPTION_CONFIG_STATUS=0x%.02X, expected=0x1F ",get_huid(l_nvdimm),l_encStatus.whole); + /*@ + *@errortype + *@reasoncode NVDIMM_ENCRYPTION_UNLOCK_FAILED + *@severity ERRORLOG_SEV_PREDICTIVE + *@moduleid NVDIMM_ENCRYPT_UNLOCK + *@userdata1 NVDIMM HUID + *@userdata2 ENCRYPTION_CONFIG_STATUS + *@devdesc NVDIMM failed to unlock encryption + *@custdesc NVDIMM encryption error + */ + l_err = new ERRORLOG::ErrlEntry( + ERRORLOG::ERRL_SEV_PREDICTIVE, + NVDIMM_ENCRYPT_UNLOCK, + NVDIMM_ENCRYPTION_UNLOCK_FAILED, + get_huid(l_nvdimm), + l_encStatus.whole, + ERRORLOG::ErrlEntry::NO_SW_CALLOUT ); + + l_err->collectTrace(NVDIMM_COMP_NAME); + nvdimmAddVendorLog(l_nvdimm, l_err); + l_err->addPartCallout( l_nvdimm, + HWAS::NV_CONTROLLER_PART_TYPE, + HWAS::SRCI_PRIORITY_HIGH); + l_err->addHwCallout( l_nvdimm, + HWAS::SRCI_PRIORITY_MED, + HWAS::DELAYED_DECONFIG, + HWAS::GARD_NULL ); + + nvdimmAddPage4Regs(l_nvdimm,l_err); + errlCommit( l_err, NVDIMM_COMP_ID ); + nvdimmSetEncryptionError(l_nvdimm); + l_success = false; + } + else + { + TRACFCOMP(g_trac_nvdimm, "nvdimm_encrypt_unlock() nvdimm[%X] encryption is unlocked 0x%.02x",get_huid(l_nvdimm),l_encStatus.whole); + } + } + }while(0); + + TRACFCOMP(g_trac_nvdimm, EXIT_MRK"nvdimm_encrypt_unlock()"); + return l_success; +} + + +void nvdimmSetEncryptionError(Target *i_nvdimm) +{ + ATTR_NVDIMM_ARMED_type l_armed_state = {}; + l_armed_state = i_nvdimm->getAttr<ATTR_NVDIMM_ARMED>(); + + l_armed_state.encryption_error_detected = 1; + + i_nvdimm->setAttr<ATTR_NVDIMM_ARMED>(l_armed_state); +} + + +bool nvdimm_keyifyRandomNumber(uint8_t* o_genData, uint8_t* i_xtraData) +{ + bool l_failed = false; + uint32_t l_xtraByte = 0; + + for (uint32_t l_byte = 0; l_byte < ENC_KEY_SIZE; l_byte++) + { + if ((o_genData[l_byte] != KEY_TERMINATE_BYTE) && + (o_genData[l_byte] != KEY_ABORT_BYTE)) + { + // This byte is valid + continue; + } + + // This byte is not valid, replace it + // Find a valid byte in the replacement data + while ((i_xtraData[l_xtraByte] == KEY_TERMINATE_BYTE) || + (i_xtraData[l_xtraByte] == KEY_ABORT_BYTE)) + { + l_xtraByte++; + + if (l_xtraByte == ENC_KEY_SIZE) + { + l_failed = true; + break; + } + } + + if (l_failed) + { + break; + } + + // Replace the invalid byte with the valid extra byte + o_genData[l_byte] = i_xtraData[l_xtraByte]; + } + + return l_failed; +} + + +bool nvdimm_validRandomNumber(uint8_t* i_genData) +{ + bool l_valid = true; + for (uint32_t l_byte = 0; l_byte < ENC_KEY_SIZE; l_byte++) + { + if ((i_genData[l_byte] == KEY_TERMINATE_BYTE) || + (i_genData[l_byte] == KEY_ABORT_BYTE)) + { + l_valid = false; + break; + } + } + return l_valid; +} + + +errlHndl_t nvdimm_checkValidAttrKeys( nvdimmKeyData_t* i_attrData ) +{ + errlHndl_t l_err = nullptr; + bool l_valid = false; + + do + { + l_valid = nvdimm_validRandomNumber(i_attrData->rs); + if (!l_valid) + { + break; + } + l_valid = nvdimm_validRandomNumber(i_attrData->ek); + if (!l_valid) + { + break; + } + l_valid = nvdimm_validRandomNumber(i_attrData->ak); + if (!l_valid) + { + break; + } + }while(0); + + if (!l_valid) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm_checkValidAttrKeys() ATTR_NVDIMM_ENCRYPTION_KEYS_FW contains invalid data"); + /*@ + *@errortype + *@reasoncode NVDIMM_ENCRYPTION_INVALID_ATTRIBUTE + *@severity ERRORLOG_SEV_PREDICTIVE + *@moduleid NVDIMM_CHECK_VALID_ATTR_DATA + *@devdesc ATTR_NVDIMM_ENCRYPTION_KEYS_FW has invalid data + *@custdesc NVDIMM encryption error + */ + l_err = new ERRORLOG::ErrlEntry( + ERRORLOG::ERRL_SEV_PREDICTIVE, + NVDIMM_CHECK_VALID_ATTR_DATA, + NVDIMM_ENCRYPTION_INVALID_ATTRIBUTE, + ERRORLOG::ErrlEntry::ADD_SW_CALLOUT ); + + l_err->collectTrace(NVDIMM_COMP_NAME); + } + + return l_err; +} + + +errlHndl_t nvdimm_handleConflictingKeys( + ATTR_NVDIMM_ENCRYPTION_KEYS_FW_typeStdArr& i_attrKeysFw, + ATTR_NVDIMM_ENCRYPTION_KEYS_ANCHOR_typeStdArr& i_attrKeysAnchor) +{ + TRACFCOMP(g_trac_nvdimm, ENTER_MRK"nvdimm_handleConflictingKeys()"); + errlHndl_t l_err = nullptr; + bool l_validKeyFound = false; + + // Recast to key data type to simplify parsing + nvdimmKeyData_t* l_keysFw = + reinterpret_cast<nvdimmKeyData_t*>(&i_attrKeysFw); + nvdimmKeyData_t* l_keysAnchor = + reinterpret_cast<nvdimmKeyData_t*>(&i_attrKeysAnchor); + + // Get the nvdimm target pointers + TargetHandleList l_nvdimmTargetList; + nvdimm_getNvdimmList(l_nvdimmTargetList); + for (const auto & l_nvdimm : l_nvdimmTargetList) + { + // Check encryption state in the config/status reg + encryption_config_status_t l_encStatus = {0}; + l_err = nvdimmReadReg(l_nvdimm, + ENCRYPTION_CONFIG_STATUS, + l_encStatus.whole); + if (l_err) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm_handleConflictingKeys() nvdimm[%X] error reading ENCRYPTION_CONFIG_STATUS",get_huid(l_nvdimm)); + errlCommit( l_err, NVDIMM_COMP_ID ); + nvdimmSetEncryptionError(l_nvdimm); + continue; + } + + // Encryption is not enabled + // Keys are not in use so could use either set of keys + // Use the ANCHOR card keys + if (!l_encStatus.encryption_enabled) + { + TRACFCOMP(g_trac_nvdimm, "nvdimm_handleConflictingKeys() nvdimm[%X] copying ANCHOR keys to FW",get_huid(l_nvdimm)); + l_validKeyFound = true; + set_ATTR_NVDIMM_ENCRYPTION_KEYS_FW(i_attrKeysAnchor); + continue; + } + + // Encryption is enabled, test the keys + // Write the EK test reg with the FW attr value + l_err = nvdimm_setKeyReg(l_nvdimm, + l_keysFw->ek, + ENCRYPTION_ERASE_KEY_TEST, + ENCRYPTION_ERASE_KEY_TEST_VERIFY, + false); + if (l_err) + { + break; + } + + // Check for erase key valid in the validation reg + encryption_key_validation_t l_keyValid = {0}; + l_err = nvdimmReadReg(l_nvdimm, + ENCRYPTION_KEY_VALIDATION, + l_keyValid.whole); + if (l_err) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm_handleConflictingKeys() nvdimm[%X] error reading ENCRYPTION_KEY_VALIDATION",get_huid(l_nvdimm)); + break; + } + if (l_keyValid.erase_key_valid) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm_handleConflictingKeys() nvdimm[%X] ATTR_NVDIMM_ENCRYPTION_KEYS_FW valid",get_huid(l_nvdimm)); + l_validKeyFound = true; + // Re-write the FW keys, this will also update the ANCHOR keys + set_ATTR_NVDIMM_ENCRYPTION_KEYS_FW(i_attrKeysFw); + break; + } + + // Write the EK test reg with the Anchor attr value + l_err = nvdimm_setKeyReg(l_nvdimm, + l_keysAnchor->ek, + ENCRYPTION_ERASE_KEY_TEST, + ENCRYPTION_ERASE_KEY_TEST_VERIFY, + false); + if (l_err) + { + break; + } + + // Check for erase key valid in the validation reg + l_keyValid.whole = 0; + l_err = nvdimmReadReg(l_nvdimm, + ENCRYPTION_KEY_VALIDATION, + l_keyValid.whole); + if (l_err) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm_handleConflictingKeys() nvdimm[%X] error reading ENCRYPTION_KEY_VALIDATION",get_huid(l_nvdimm)); + break; + } + if (l_keyValid.erase_key_valid) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm_handleConflictingKeys() nvdimm[%X] ATTR_NVDIMM_ENCRYPTION_KEYS_ANCHOR valid",get_huid(l_nvdimm)); + l_validKeyFound = true; + // Copy anchor attr value to FW attribute + set_ATTR_NVDIMM_ENCRYPTION_KEYS_FW(i_attrKeysAnchor); + + break; + } + } + + if (!l_validKeyFound) + { + // Neither key attribute is valid + TRACFCOMP(g_trac_nvdimm,ERR_MRK"nvdimm_handleConflictingKeys() ATTR_NVDIMM_ENCRYPTION_KEYS_FW and ATTR_NVDIMM_ENCRYPTION_KEYS_ANCHOR invalid."); + /*@ + *@errortype + *@reasoncode NVDIMM_ENCRYPTION_KEY_ATTRS_INVALID + *@severity ERRORLOG_SEV_PREDICTIVE + *@moduleid NVDIMM_HANDLE_CONFLICTING_KEYS + *@devdesc NVDIMM encryption key attributes invalid + *@custdesc NVDIMM encryption error + */ + l_err = new ERRORLOG::ErrlEntry( + ERRORLOG::ERRL_SEV_PREDICTIVE, + NVDIMM_HANDLE_CONFLICTING_KEYS, + NVDIMM_ENCRYPTION_KEY_ATTRS_INVALID, + ERRORLOG::ErrlEntry::ADD_SW_CALLOUT ); + + l_err->collectTrace(NVDIMM_COMP_NAME); + } + + TRACFCOMP(g_trac_nvdimm, EXIT_MRK"nvdimm_handleConflictingKeys()"); + return l_err; +} + + +void nvdimm_getNvdimmList(TargetHandleList &o_nvdimmTargetList) +{ + // Check for any NVDIMMs after the mss_power_cleanup + TargetHandleList l_dimmTargetList; + getAllLogicalCards(l_dimmTargetList, TYPE_DIMM); + + // Walk the dimm list and collect all the nvdimm targets + for (auto const l_dimm : l_dimmTargetList) + { + if (isNVDIMM(l_dimm)) + { + o_nvdimmTargetList.push_back(l_dimm); + } + } +} + + +bool nvdimm_gen_keys(void) +{ + TRACFCOMP(g_trac_nvdimm, ENTER_MRK"nvdimm_gen_keys()"); + errlHndl_t l_err = nullptr; + bool l_success = true; + + do + { + // Determine if key generation required + Target* l_sys = nullptr; + targetService().getTopLevelTarget( l_sys ); + assert(l_sys, "nvdimm_gen_keys: no TopLevelTarget"); + + // Key size must be less that max TPM random generator size + static_assert(ENC_KEY_SIZE <= MAX_TPM_SIZE, + "nvdimm_gen_keys() ENC_KEY_SIZE is greater than MAX_TPM_SIZE"); + + // Key attributes should be same size + static_assert( sizeof(ATTR_NVDIMM_ENCRYPTION_KEYS_ANCHOR_type) == + sizeof(ATTR_NVDIMM_ENCRYPTION_KEYS_FW_type), + "nvdimm_gen_keys() size of ATTR_NVDIMM_ENCRYPTION_KEYS_ANCHOR_type does not match ATTR_NVDIMM_ENCRYPTION_KEYS_FW_type"); + + // Get the key attributes + auto l_attrKeysFw = + l_sys->getAttrAsStdArr<ATTR_NVDIMM_ENCRYPTION_KEYS_FW>(); + auto l_attrKeysAn = + l_sys->getAttrAsStdArr<ATTR_NVDIMM_ENCRYPTION_KEYS_ANCHOR>(); + + // Check the attribute sizes + static_assert(sizeof(l_attrKeysFw) == (NUM_KEYS_IN_ATTR * ENC_KEY_SIZE), + "nvdimm_gen_keys() Size of ATTR_NVDIMM_ENCRYPTION_KEYS_FW does not match NUM_KEYS_IN_ATTR * ENC_KEY_SIZE"); + static_assert(sizeof(l_attrKeysAn) == (NUM_KEYS_IN_ATTR * ENC_KEY_SIZE), + "nvdimm_gen_keys() Size of ATTR_NVDIMM_ENCRYPTION_KEYS_ANCHOR does not match NUM_KEYS_IN_ATTR * ENC_KEY_SIZE"); + + // Compare attributes to zero + std::array<uint8_t,sizeof(l_attrKeysFw)> l_zero = {0}; + bool l_fwZero = (l_attrKeysFw == l_zero); + bool l_anZero = (l_attrKeysAn == l_zero); + + // Compare the attribute values + if (!l_fwZero && !l_anZero) + { + if (l_attrKeysFw != l_attrKeysAn) + { + // Handle conflicting keys + TRACFCOMP(g_trac_nvdimm, "nvdimm_gen_keys() ATTR_NVDIMM_ENCRYPTION_KEYS_FW != ATTR_NVDIMM_ENCRYPTION_KEYS_ANCHOR"); + l_err = nvdimm_handleConflictingKeys(l_attrKeysFw,l_attrKeysAn); + } + else + { + TRACFCOMP(g_trac_nvdimm, "nvdimm_gen_keys() ATTR_NVDIMM_ENCRYPTION_KEYS_FW == ATTR_NVDIMM_ENCRYPTION_KEYS_ANCHOR"); + } + break; + } + else if (!l_fwZero && l_anZero) + { + TRACFCOMP(g_trac_nvdimm, "nvdimm_gen_keys() ATTR_NVDIMM_ENCRYPTION_KEYS_FW != 0 and ATTR_NVDIMM_ENCRYPTION_KEYS_ANCHOR = 0"); + break; + } + else if (l_fwZero && !l_anZero) + { + // Set FW attr = Anchor attr + TRACFCOMP(g_trac_nvdimm, "nvdimm_gen_keys() Setting ATTR_NVDIMM_ENCRYPTION_KEYS_FW = ATTR_NVDIMM_ENCRYPTION_KEYS_ANCHOR"); + set_ATTR_NVDIMM_ENCRYPTION_KEYS_FW(l_attrKeysAn); + break; + } + + // If we get here then both key attributes are zero, generate new keys + assert(sizeof(l_attrKeysFw) == sizeof(nvdimmKeyData_t), + "nvdimm_gen_keys() ATTR_NVDIMM_ENCRYPTION_KEYS_FW size does not match nvdimmKeyData_t"); + nvdimmKeyData_t* l_keys = + reinterpret_cast<nvdimmKeyData_t*>(&l_attrKeysFw); + + // Generate Random String (RS) + l_err = nvdimm_getRandom(l_keys->rs); + if (l_err) + { + break; + } + + // Generate Erase Key (EK) + l_err = nvdimm_getRandom(l_keys->ek); + if (l_err) + { + break; + } + + // Generate Access Key (AK) + l_err = nvdimm_getRandom(l_keys->ak); + if (l_err) + { + break; + } + + // Set the FW attribute + set_ATTR_NVDIMM_ENCRYPTION_KEYS_FW(l_attrKeysFw); + + }while(0); + + if (l_err) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm_gen_keys() Failed to generate keys, will not set ATTR_NVDIMM_ENCRYPTION_KEYS_FW"); + errlCommit( l_err, NVDIMM_COMP_ID ); + l_success = false; + + // Set the encryption error for all nvdimms + TargetHandleList l_nvdimmTargetList; + nvdimm_getNvdimmList(l_nvdimmTargetList); + for (const auto & l_nvdimm : l_nvdimmTargetList) + { + nvdimmSetEncryptionError(l_nvdimm); + } + } + + TRACFCOMP(g_trac_nvdimm, EXIT_MRK"nvdimm_gen_keys()"); + return l_success; +} + + +bool nvdimm_remove_keys(void) +{ + TRACFCOMP(g_trac_nvdimm, ENTER_MRK"nvdimm_remove_keys()"); + bool l_success = true; + + // Get the sys pointer, attribute keys are system level + Target* l_sys = nullptr; + targetService().getTopLevelTarget( l_sys ); + assert(l_sys, "nvdimm_remove_keys() no TopLevelTarget"); + + // Set the FW attribute = 0 + TRACFCOMP(g_trac_nvdimm, "nvdimm_remove_keys() Setting ATTR_NVDIMM_ENCRYPTION_KEYS_FW=0"); + ATTR_NVDIMM_ENCRYPTION_KEYS_FW_typeStdArr l_attrKeysFw = {0}; + set_ATTR_NVDIMM_ENCRYPTION_KEYS_FW(l_attrKeysFw); + + TRACFCOMP(g_trac_nvdimm, EXIT_MRK"nvdimm_remove_keys()"); + return l_success; +} + + +errlHndl_t nvdimm_setKeyReg(Target* i_nvdimm, + uint8_t* i_keyData, + uint32_t i_keyReg, + uint32_t i_verifyReg, + bool i_secondAttempt) +{ + TRACFCOMP(g_trac_nvdimm, ENTER_MRK"nvdimm_setKeyReg(0x%X) reg=0x%X",get_huid(i_nvdimm),i_keyReg); + errlHndl_t l_err = nullptr; + + do + { + uint32_t l_byte = 0; + uint8_t l_verifyData = 0x0; + + // Before setting the key reg we need to + // init the verif reg with a random value + uint8_t l_genData[ENC_KEY_SIZE] = {0}; + l_err = nvdimm_getRandom(l_genData); + if (l_err) + { + break; + } + + // Write the verif reg one byte at a time + for (l_byte = 0; l_byte < ENC_KEY_SIZE; l_byte++) + { + // Write the verification byte + l_err = nvdimmWriteReg(i_nvdimm, i_verifyReg, l_genData[l_byte]); + if (l_err) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm_setKeyReg() huid=0x%X, error writing verif reg=0x%.03X byte=0x%d", get_huid(i_nvdimm), i_verifyReg, l_byte); + break; + } + } + + // Delay to allow verif write to complete + nanosleep(0, KEY_WRITE_DELAY_MS*NS_PER_MSEC); + + // Write the reg, one byte at a time + for (l_byte = 0; l_byte < ENC_KEY_SIZE; l_byte++) + { + // Write the key byte + l_err = nvdimmWriteReg(i_nvdimm, i_keyReg, i_keyData[l_byte]); + if (l_err) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm_setKeyReg() huid=0x%X, error writing key reg 0x%.03X byte=0x%d", get_huid(i_nvdimm), i_keyReg, l_byte); + break; + } + + // Read the verification byte + l_err = nvdimmReadReg(i_nvdimm, i_verifyReg, l_verifyData); + if (l_err) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm_setKeyReg() huid=0x%X, error reading verif reg=0x%.03X byte=0x%d", get_huid(i_nvdimm), i_verifyReg, l_byte); + break; + } + + // Verify the key byte + if (l_verifyData != i_keyData[l_byte]) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm_setKeyReg() huid=0x%X, key verification failed reg=0x%.03X byte=0x%d set=0x%.02x get=0x%.02x", get_huid(i_nvdimm), i_keyReg, l_byte, i_keyData[l_byte], l_verifyData); + // Write KEY_ABORT_BYTE to abort the key write sequence + l_err = nvdimmWriteReg(i_nvdimm, i_keyReg, KEY_ABORT_BYTE); + if (i_secondAttempt) + { + // Verify check byte failed for the second time + TRACFCOMP(g_trac_nvdimm,ERR_MRK"nvdimm_getTPM() Key verification byte check failed on second attempt."); + /*@ + *@errortype + *@reasoncode NVDIMM_VERIF_BYTE_CHECK_FAILED + *@severity ERRORLOG_SEV_PREDICTIVE + *@moduleid NVDIMM_SET_KEY_REG + *@userdata1 NVDIMM HUID + *@userdata2[0:31] Key Register + *@userdata2[32:63] Verif Register + *@devdesc NVDIMM failed to set encryption register + *@custdesc NVDIMM register error + */ + l_err = new ERRORLOG::ErrlEntry( + ERRORLOG::ERRL_SEV_PREDICTIVE, + NVDIMM_SET_KEY_REG, + NVDIMM_VERIF_BYTE_CHECK_FAILED, + get_huid(i_nvdimm), + NVDIMM_SET_USER_DATA_1(i_keyReg,i_verifyReg), + ERRORLOG::ErrlEntry::NO_SW_CALLOUT ); + + l_err->collectTrace(NVDIMM_COMP_NAME); + nvdimmAddVendorLog(i_nvdimm, l_err); + l_err->addPartCallout( i_nvdimm, + HWAS::NV_CONTROLLER_PART_TYPE, + HWAS::SRCI_PRIORITY_HIGH); + nvdimmAddPage4Regs(i_nvdimm,l_err); + } + else + { + // Try writing the reg again + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm_setKeyReg() huid=0x%X, writing reg=0x%.03X again", get_huid(i_nvdimm), i_keyReg); + l_err = nvdimm_setKeyReg(i_nvdimm, + i_keyData, + i_keyReg, + i_verifyReg, + true); + } + break; + } + } + + // Delay to allow write to complete + nanosleep(0, KEY_WRITE_DELAY_MS*NS_PER_MSEC); + + }while(0); + + TRACFCOMP(g_trac_nvdimm, EXIT_MRK"nvdimm_setKeyReg(0x%X) reg=0x%X",get_huid(i_nvdimm),i_keyReg); + return l_err; +} + + +bool nvdimm_encrypt_enable(TargetHandleList &i_nvdimmList) +{ + TRACFCOMP(g_trac_nvdimm, ENTER_MRK"nvdimm_encrypt_enable()"); + errlHndl_t l_err = nullptr; + bool l_success = true; + + do + { + // Get the sys pointer, attribute keys are system level + Target* l_sys = nullptr; + targetService().getTopLevelTarget( l_sys ); + assert(l_sys, "nvdimm_encrypt_enable() no TopLevelTarget"); + + // Exit if encryption is not enabled via the attribute + if (!l_sys->getAttr<ATTR_NVDIMM_ENCRYPTION_ENABLE>()) + { + TRACFCOMP(g_trac_nvdimm,"ATTR_NVDIMM_ENCRYPTION_ENABLE=0"); + break; + } + + // Get the FW key attributes + auto l_attrKeysFw = + l_sys->getAttrAsStdArr<ATTR_NVDIMM_ENCRYPTION_KEYS_FW>(); + + // Cast to key data struct type for easy access to each key + nvdimmKeyData_t* l_keysFw = + reinterpret_cast<nvdimmKeyData_t*>(&l_attrKeysFw); + + // Check for valid key attribute key data + l_err = nvdimm_checkValidAttrKeys(l_keysFw); + if (l_err) + { + break; + } + + // Handle encryption for all nvdimms + for (const auto & l_nvdimm : i_nvdimmList) + { + // Check encryption state in the config/status reg + encryption_config_status_t l_encStatus = {0}; + l_err = nvdimmReadReg(l_nvdimm, + ENCRYPTION_CONFIG_STATUS, + l_encStatus.whole); + if (l_err) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm_encrypt_enable() nvdimm[%X] error reading ENCRYPTION_CONFIG_STATUS",get_huid(l_nvdimm)); + errlCommit( l_err, NVDIMM_COMP_ID ); + nvdimmSetEncryptionError(l_nvdimm); + l_success = false; + continue; + } + + // Encryption is enabled and unlocked + if (l_encStatus.encryption_unlocked && + l_encStatus.encryption_enabled) + { + TRACFCOMP(g_trac_nvdimm, "nvdimm_encrypt_enable() nvdimm[%X] enabled and unlocked",get_huid(l_nvdimm)); + continue; + } + + // Need to handle these cases? + if (!((l_encStatus.whole & ENCRYPTION_STATUS_CHECK_MASK) + == ENCRYPTION_STATUS_DISABLED)) + { + TRACFCOMP(g_trac_nvdimm, "nvdimm_encrypt_enable() nvdimm[%X] unsupported state 0x%.02X",get_huid(l_nvdimm),l_encStatus.whole); + continue; + } + + // Status = 0x01, enable encryption + // Set the Random String (RS) reg + TRACFCOMP(g_trac_nvdimm,"nvdimm_encrypt_enable() nvdimm[%X] status=0x01 0x%.02x",get_huid(l_nvdimm),l_encStatus.whole); + l_err = nvdimm_setKeyReg(l_nvdimm, + l_keysFw->rs, + ENCRYPTION_RAMDOM_STRING_SET, + ENCRYPTION_RANDOM_STRING_VERIFY, + false); + if (l_err) + { + errlCommit( l_err, NVDIMM_COMP_ID ); + nvdimmSetEncryptionError(l_nvdimm); + l_success = false; + continue; + } + + // Set the Erase Key (EK) Reg + l_err = nvdimm_setKeyReg(l_nvdimm, + l_keysFw->ek, + ENCRYPTION_ERASE_KEY_SET, + ENCRYPTION_ERASE_KEY_VERIFY, + false); + if (l_err) + { + errlCommit( l_err, NVDIMM_COMP_ID ); + nvdimmSetEncryptionError(l_nvdimm); + l_success = false; + continue; + } + + // Set the Access Key (AK) Reg + l_err = nvdimm_setKeyReg(l_nvdimm, + l_keysFw->ak, + ENCRYPTION_ACCESS_KEY_SET, + ENCRYPTION_ACCESS_KEY_VERIFY, + false); + if (l_err) + { + errlCommit( l_err, NVDIMM_COMP_ID ); + nvdimmSetEncryptionError(l_nvdimm); + l_success = false; + continue; + } + + // Verify encryption is enabled + l_err = nvdimmReadReg(l_nvdimm, + ENCRYPTION_CONFIG_STATUS, + l_encStatus.whole); + if (l_err) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm_encrypt_enable() nvdimm[%X] error reading ENCRYPTION_CONFIG_STATUS after enable",get_huid(l_nvdimm)); + errlCommit( l_err, NVDIMM_COMP_ID ); + nvdimmSetEncryptionError(l_nvdimm); + l_success = false; + continue; + } + if (!((l_encStatus.whole & ENCRYPTION_STATUS_CHECK_MASK) + == ENCRYPTION_STATUS_ENABLED)) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm_encrypt_enable() nvdimm[%X] encryption enable failed, ENCRYPTION_CONFIG_STATUS=0x%.02X, expected=0x1F ",get_huid(l_nvdimm),l_encStatus.whole); + /*@ + *@errortype + *@reasoncode NVDIMM_ENCRYPTION_ENABLE_FAILED + *@severity ERRORLOG_SEV_PREDICTIVE + *@moduleid NVDIMM_ENCRYPT_ENABLE + *@userdata1 NVDIMM HUID + *@userdata2 ENCRYPTION_CONFIG_STATUS + *@devdesc NVDIMM failed to enable encryption + *@custdesc NVDIMM encryption error + */ + l_err = new ERRORLOG::ErrlEntry( + ERRORLOG::ERRL_SEV_PREDICTIVE, + NVDIMM_ENCRYPT_ENABLE, + NVDIMM_ENCRYPTION_ENABLE_FAILED, + get_huid(l_nvdimm), + l_encStatus.whole, + ERRORLOG::ErrlEntry::NO_SW_CALLOUT ); + + l_err->collectTrace(NVDIMM_COMP_NAME); + nvdimmAddVendorLog(l_nvdimm, l_err); + l_err->addPartCallout( l_nvdimm, + HWAS::NV_CONTROLLER_PART_TYPE, + HWAS::SRCI_PRIORITY_HIGH); + + nvdimmAddPage4Regs(l_nvdimm,l_err); + errlCommit( l_err, NVDIMM_COMP_ID ); + nvdimmSetEncryptionError(l_nvdimm); + l_success = false; + } + else + { + TRACFCOMP(g_trac_nvdimm, "nvdimm_encrypt_enable() nvdimm[%X] encryption is enabled 0x%.02x",get_huid(l_nvdimm),l_encStatus.whole); + + l_err = notifyNvdimmProtectionChange(l_nvdimm, + ENCRYPTION_ENABLED); + if (l_err) + { + errlCommit(l_err, NVDIMM_COMP_ID); + } + } + } + }while(0); + + TRACFCOMP(g_trac_nvdimm, EXIT_MRK"nvdimm_encrypt_enable()"); + return l_success; +} + + +bool nvdimm_crypto_erase(TargetHandleList &i_nvdimmList) +{ + TRACFCOMP(g_trac_nvdimm, ENTER_MRK"nvdimm_crypto_erase()"); + errlHndl_t l_err = nullptr; + bool l_success = true; + + do + { + // Get the sys pointer, attribute keys are system level + Target* l_sys = nullptr; + targetService().getTopLevelTarget( l_sys ); + assert(l_sys, "nvdimm_crypto_erase: no TopLevelTarget"); + + // Exit if encryption is not enabled via the attribute + if (!l_sys->getAttr<ATTR_NVDIMM_ENCRYPTION_ENABLE>()) + { + TRACFCOMP(g_trac_nvdimm,"ATTR_NVDIMM_ENCRYPTION_ENABLE=0"); + break; + } + + // Get the FW key attributes + auto l_attrKeysFw = + l_sys->getAttrAsStdArr<ATTR_NVDIMM_ENCRYPTION_KEYS_FW>(); + + // Cast to key data struct type for easy access to each key + nvdimmKeyData_t* l_keysFw = + reinterpret_cast<nvdimmKeyData_t*>(&l_attrKeysFw); + + // Check for valid key attribute key data + l_err = nvdimm_checkValidAttrKeys(l_keysFw); + if (l_err) + { + break; + } + + // Handle erase for all nvdimms + for (const auto & l_nvdimm : i_nvdimmList) + { + // Check encryption state in the config/status reg + encryption_config_status_t l_encStatus = {0}; + l_err = nvdimmReadReg(l_nvdimm, + ENCRYPTION_CONFIG_STATUS, + l_encStatus.whole); + if (l_err) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm_crypto_erase() nvdimm[%X] error reading ENCRYPTION_CONFIG_STATUS",get_huid(l_nvdimm)); + errlCommit( l_err, NVDIMM_COMP_ID ); + nvdimmSetEncryptionError(l_nvdimm); + l_success = false; + continue; + } + // Encryption enabled must be set to crypto erase + if (!l_encStatus.encryption_enabled) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm_crypto_erase() nvdimm[%X] encryption not enabled, will not cypto erase 0x%.02x",get_huid(l_nvdimm),l_encStatus.whole); + l_success = false; + continue; + } + else + { + TRACFCOMP(g_trac_nvdimm, "nvdimm_crypto_erase() nvdimm[%X] encryption enabled 0x%.02x",get_huid(l_nvdimm),l_encStatus.whole); + } + + // Set the Erase Key (EK) Reg + l_err = nvdimm_setKeyReg(l_nvdimm, + l_keysFw->ek, + ENCRYPTION_ERASE_KEY_SET, + ENCRYPTION_ERASE_KEY_VERIFY, + false); + if (l_err) + { + errlCommit( l_err, NVDIMM_COMP_ID ); + nvdimmSetEncryptionError(l_nvdimm); + l_success = false; + continue; + } + + // Check encryption state in the config/status reg + l_err = nvdimmReadReg(l_nvdimm, + ENCRYPTION_CONFIG_STATUS, + l_encStatus.whole); + if (l_err) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm_crypto_erase() nvdimm[%X] error reading ENCRYPTION_CONFIG_STATUS",get_huid(l_nvdimm)); + errlCommit( l_err, NVDIMM_COMP_ID ); + nvdimmSetEncryptionError(l_nvdimm); + l_success = false; + continue; + } + // Erase pending bit should be set + if (!l_encStatus.erase_pending) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm_crypto_erase() nvdimm[%X] expected erase pending = 1 0x%.02x",get_huid(l_nvdimm),l_encStatus.whole); + /*@ + *@errortype + *@reasoncode NVDIMM_ENCRYPTION_ERASE_PENDING_FAILED + *@severity ERRORLOG_SEV_PREDICTIVE + *@moduleid NVDIMM_CRYPTO_ERASE + *@userdata1 NVDIMM HUID + *@userdata2 ENCRYPTION_CONFIG_STATUS + *@devdesc NVDIMM failed to set encryption register + *@custdesc NVDIMM register error + */ + l_err = new ERRORLOG::ErrlEntry( + ERRORLOG::ERRL_SEV_PREDICTIVE, + NVDIMM_CRYPTO_ERASE, + NVDIMM_ENCRYPTION_ERASE_PENDING_FAILED, + get_huid(l_nvdimm), + l_encStatus.whole, + ERRORLOG::ErrlEntry::NO_SW_CALLOUT ); + + l_err->collectTrace(NVDIMM_COMP_NAME); + nvdimmAddVendorLog(l_nvdimm, l_err); + l_err->addPartCallout( l_nvdimm, + HWAS::NV_CONTROLLER_PART_TYPE, + HWAS::SRCI_PRIORITY_HIGH); + + nvdimmAddPage4Regs(l_nvdimm,l_err); + errlCommit( l_err, NVDIMM_COMP_ID ); + nvdimmSetEncryptionError(l_nvdimm); + l_success = false; + continue; + } + else + { + TRACFCOMP(g_trac_nvdimm,"nvdimm_crypto_erase() nvdimm[%X] erase pending 0x%.02x",get_huid(l_nvdimm),l_encStatus.whole); + } + + // Generate a generic erase key + uint8_t l_genData[ENC_KEY_SIZE] = {0}; + l_err = nvdimm_getRandom(l_genData); + if (l_err) + { + errlCommit( l_err, NVDIMM_COMP_ID ); + nvdimmSetEncryptionError(l_nvdimm); + l_success = false; + continue; + } + + // Set the Erase Key (EK) Reg + l_err = nvdimm_setKeyReg(l_nvdimm, + l_genData, + ENCRYPTION_ERASE_KEY_SET, + ENCRYPTION_ERASE_KEY_VERIFY, + false); + if (l_err) + { + errlCommit( l_err, NVDIMM_COMP_ID ); + nvdimmSetEncryptionError(l_nvdimm); + l_success = false; + continue; + } + + // Check encryption state in the config/status reg + l_err = nvdimmReadReg(l_nvdimm, + ENCRYPTION_CONFIG_STATUS, + l_encStatus.whole); + if (l_err) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm_crypto_erase() nvdimm[%X] error reading ENCRYPTION_CONFIG_STATUS",get_huid(l_nvdimm)); + errlCommit( l_err, NVDIMM_COMP_ID ); + nvdimmSetEncryptionError(l_nvdimm); + l_success = false; + continue; + } + // Encryption enabled bit should not be set + if (l_encStatus.encryption_enabled) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm_crypto_erase() nvdimm[%X] expected encryption enabled = 0 0x%.02x",get_huid(l_nvdimm),l_encStatus.whole); + /*@ + *@errortype + *@reasoncode NVDIMM_ENCRYPTION_ERASE_FAILED + *@severity ERRORLOG_SEV_PREDICTIVE + *@moduleid NVDIMM_CRYPTO_ERASE + *@userdata1 NVDIMM HUID + *@userdata2 ENCRYPTION_CONFIG_STATUS + *@devdesc NVDIMM failed to set encryption register + *@custdesc NVDIMM register error + */ + l_err = new ERRORLOG::ErrlEntry( + ERRORLOG::ERRL_SEV_PREDICTIVE, + NVDIMM_CRYPTO_ERASE, + NVDIMM_ENCRYPTION_ERASE_FAILED, + get_huid(l_nvdimm), + l_encStatus.whole, + ERRORLOG::ErrlEntry::NO_SW_CALLOUT ); + + l_err->collectTrace(NVDIMM_COMP_NAME); + nvdimmAddVendorLog(l_nvdimm, l_err); + l_err->addPartCallout( l_nvdimm, + HWAS::NV_CONTROLLER_PART_TYPE, + HWAS::SRCI_PRIORITY_HIGH); + + nvdimmAddPage4Regs(l_nvdimm,l_err); + errlCommit( l_err, NVDIMM_COMP_ID ); + nvdimmSetEncryptionError(l_nvdimm); + l_success = false; + continue; + } + else + { + TRACFCOMP(g_trac_nvdimm,"nvdimm_crypto_erase() nvdimm[%X] erase complete 0x%.02x",get_huid(l_nvdimm),l_encStatus.whole); + + l_err = notifyNvdimmProtectionChange(l_nvdimm, + ENCRYPTION_DISABLED); + if (l_err) + { + errlCommit(l_err, NVDIMM_COMP_ID); + } + } + } + }while(0); + + TRACFCOMP(g_trac_nvdimm, EXIT_MRK"nvdimm_crypto_erase()"); + return l_success; +} + + +errlHndl_t notifyNvdimmProtectionChange(Target* i_target, + const nvdimm_protection_t i_state) +{ + TRACFCOMP( g_trac_nvdimm, ENTER_MRK + "notifyNvdimmProtectionChange: Target huid 0x%.8X, state %d", + get_huid(i_target), i_state); + + errlHndl_t l_err = nullptr; + + do + { + // Get the type of target passed in + // It could be proc_type for OCC state + // Or dimm_type for ARM/ERROR state + ATTR_TYPE_type l_type = i_target->getAttr<ATTR_TYPE>(); + assert((l_type == TYPE_PROC)||(l_type == TYPE_DIMM), + "notifyNvdimmProtectionChange invalid target type"); + + // Load the nvdimm list + TargetHandleList l_nvdimmTargetList; + Target* l_proc = nullptr; + if (l_type == TYPE_PROC) + { + // Get the nvdimms under this proc target + l_nvdimmTargetList = getProcNVDIMMs(i_target); + + // Only send command if the processor has an NVDIMM under it + if (l_nvdimmTargetList.empty()) + { + TRACFCOMP( g_trac_nvdimm, "notifyNvdimmProtectionChange: " + "No NVDIMM found under processor 0x%.8X", + get_huid(i_target)); + break; + } + + // The proc target is the passed-in target + l_proc = i_target; + } + else + { + // Only a list of one but keep consistent with proc type + l_nvdimmTargetList.push_back(i_target); + + // Find the proc target from nvdimm target passed in + TargetHandleList l_procList; + getParentAffinityTargets(l_procList, + i_target, + CLASS_CHIP, + TYPE_PROC, + UTIL_FILTER_ALL); + assert(l_procList.size() == 1, "notifyNvdimmProtectionChange:" + "getParentAffinityTargets size != 1"); + l_proc = l_procList[0]; + } + + + // Update the nvdimm status attributes + for (auto const l_nvdimm : l_nvdimmTargetList) + { + // Get the armed status attr and update it + ATTR_NVDIMM_ARMED_type l_armed_state = {}; + l_armed_state = l_nvdimm->getAttr<ATTR_NVDIMM_ARMED>(); + + // If we change the armed state, need to tell FSP + bool l_armed_change = false; + bool l_set_encryption = false; + bool l_clr_encryption = false; + bool l_sev_started = false; + bool l_sev_completed = false; + + switch (i_state) + { + case NVDIMM_ARMED: + l_armed_state.armed = 1; + l_armed_change = true; + break; + case NVDIMM_DISARMED: + l_armed_state.armed = 0; + l_armed_change = true; + break; + case OCC_ACTIVE: + l_armed_state.occ_active = 1; + break; + case OCC_INACTIVE: + l_armed_state.occ_active = 0; + break; + case NVDIMM_FATAL_HW_ERROR: + l_armed_state.fatal_error_detected = 1; + break; + case NVDIMM_RISKY_HW_ERROR: + l_armed_state.risky_error_detected = 1; + break; + case NVDIMM_ENCRYPTION_ERROR: + l_armed_state.encryption_error_detected = 1; + break; + case ENCRYPTION_ENABLED: + l_set_encryption = true; + break; + case ENCRYPTION_DISABLED: + l_clr_encryption = true; + break; + case ERASE_VERIFY_STARTED: + l_sev_started = true; + break; + case ERASE_VERIFY_COMPLETED: + l_sev_completed = true; + break; + case SEND_NV_STATUS: + // no action, just send status + break; + } + + // Set the attribute and send it to the FSP if needed + l_nvdimm->setAttr<ATTR_NVDIMM_ARMED>(l_armed_state); + if( l_armed_change ) + { + send_ATTR_NVDIMM_ARMED( l_nvdimm, l_armed_state ); + } + + // Get the nv status flag attr and update it + ATTR_NV_STATUS_FLAG_type l_nv_status = + l_nvdimm->getAttr<ATTR_NV_STATUS_FLAG>(); + + // Clear bit 0 if protected nv state + if (l_armed_state.armed && + l_armed_state.occ_active && + !l_armed_state.fatal_error_detected) + { + l_nv_status &= NV_STATUS_UNPROTECTED_CLR; + } + + // Set bit 0 if unprotected nv state + else + { + l_nv_status |= NV_STATUS_UNPROTECTED_SET; + } + + // Set bit 4 if encryption enabled + if (l_set_encryption) + { + l_nv_status |= NV_STATUS_ENCRYPTION_SET; + } + + // Clear bit 4 if encryption disabled + if (l_clr_encryption) + { + l_nv_status &= NV_STATUS_ENCRYPTION_CLR; + } + + // Clear bit 5 if secure erase verify started + if (l_sev_started) + { + l_nv_status &= NV_STATUS_ERASE_VERIFY_CLR; + } + + // Set bit 5 if secure erase verify comlpleted + if (l_sev_completed) + { + l_nv_status |= NV_STATUS_ERASE_VERIFY_SET; + } + + // Set bit 6 if risky error + if (l_armed_state.risky_error_detected) + { + l_nv_status |= NV_STATUS_POSSIBLY_UNPROTECTED_SET; + } + + l_nvdimm->setAttr<ATTR_NV_STATUS_FLAG>(l_nv_status); + + } // for nvdimm list + + // Generate combined nvdimm status for the proc + // Bit 2 of NV_STATUS_FLAG is 'Device contents are persisted' + // and must be ANDed for all nvdimms + // the rest of the bits are ORed for all nvdimms + ATTR_NV_STATUS_FLAG_type l_combined_or = 0x00; + ATTR_NV_STATUS_FLAG_type l_combined_and = 0xFF; + ATTR_NV_STATUS_FLAG_type l_combined_status = 0x00; + l_nvdimmTargetList = getProcNVDIMMs(l_proc); + for (auto const l_nvdimm : l_nvdimmTargetList) + { + l_combined_or |= l_nvdimm->getAttr<ATTR_NV_STATUS_FLAG>(); + l_combined_and &= l_nvdimm->getAttr<ATTR_NV_STATUS_FLAG>(); + } + + // Bit 2 of NV_STATUS_FLAG is 'Device contents are persisted' + l_combined_status = + (l_combined_or & NV_STATUS_OR_MASK) | + (l_combined_and & NV_STATUS_AND_MASK); + + TRACFCOMP( g_trac_nvdimm, + "notifyNvdimmProtectionChange: NV_STATUS for proc %X 0x%.02X", + get_huid(l_proc), l_combined_status); + +#ifdef __HOSTBOOT_RUNTIME + + // Send combined status notification + // Get the Proc Chip Id + TARGETING::rtChipId_t l_chipId = 0; + + l_err = TARGETING::getRtTarget(l_proc, l_chipId); + if(l_err) + { + TRACFCOMP( g_trac_nvdimm, + ERR_MRK"notifyNvdimmProtectionChange: getRtTarget ERROR" ); + break; + } + + // Check for valid interface + if ((nullptr == g_hostInterfaces) || + (nullptr == g_hostInterfaces->firmware_request)) + { + TRACFCOMP( g_trac_nvdimm, ERR_MRK"notifyNvdimmProtectionChange: " + "Hypervisor firmware_request interface not linked"); + + /*@ + * @errortype + * @severity ERRL_SEV_PREDICTIVE + * @moduleid NOTIFY_NVDIMM_PROTECTION_CHG + * @reasoncode NVDIMM_NULL_FIRMWARE_REQUEST_PTR + * @userdata1 HUID of processor target + * @userdata2[0:31] NV_STATUS to PHYP + * @userdata2[32:63] In state change + * @devdesc Unable to inform PHYP of NVDIMM protection + * @custdesc Internal firmware error + */ + l_err = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_PREDICTIVE, + NOTIFY_NVDIMM_PROTECTION_CHG, + NVDIMM_NULL_FIRMWARE_REQUEST_PTR, + get_huid(l_proc), + TWO_UINT32_TO_UINT64( + l_combined_status, + i_state) + ); + + l_err->addProcedureCallout(HWAS::EPUB_PRC_PHYP_CODE, + HWAS::SRCI_PRIORITY_HIGH); + + break; + } + + TRACFCOMP( g_trac_nvdimm, + "notifyNvdimmProtectionChange: 0x%.8X " + "NV_STATUS to HYP: 0x%02X", + get_huid(l_proc), + l_combined_status ); + + // Create the firmware_request request struct to send data + hostInterfaces::hbrt_fw_msg l_req_fw_msg; + memset(&l_req_fw_msg, 0, sizeof(l_req_fw_msg)); // clear it all + + // actual msg size (one type of hbrt_fw_msg) + uint64_t l_req_fw_msg_size = hostInterfaces::HBRT_FW_MSG_BASE_SIZE + + sizeof(l_req_fw_msg.nvdimm_protection_state); + + // Populate the firmware_request request struct with given data + l_req_fw_msg.io_type = + hostInterfaces::HBRT_FW_MSG_TYPE_NVDIMM_PROTECTION; + l_req_fw_msg.nvdimm_protection_state.i_procId = l_chipId; + l_req_fw_msg.nvdimm_protection_state.i_state = l_combined_status; + + // Create the firmware_request response struct to receive data + hostInterfaces::hbrt_fw_msg l_resp_fw_msg; + uint64_t l_resp_fw_msg_size = sizeof(l_resp_fw_msg); + memset(&l_resp_fw_msg, 0, l_resp_fw_msg_size); + + // Make the firmware_request call + l_err = firmware_request_helper(l_req_fw_msg_size, + &l_req_fw_msg, + &l_resp_fw_msg_size, + &l_resp_fw_msg); +#endif + + } while (0); + + TRACFCOMP( g_trac_nvdimm, + EXIT_MRK "notifyNvdimmProtectionChange(%.8X, %d) - ERRL %.8X:%.4X", + get_huid(i_target), i_state, + ERRL_GETEID_SAFE(l_err), ERRL_GETRC_SAFE(l_err) ); + + return l_err; +} + + +/* + * @brief Get operational unit operation timeout + */ +errlHndl_t getOperOpsTimeout(TARGETING::Target* i_nvdimm, + uint16_t& o_timeout) +{ + errlHndl_t l_err = nullptr; + + do + { + // Get timeout lsb + uint8_t l_lsb = 0; + l_err = nvdimmReadReg(i_nvdimm, + OPERATIONAL_UNIT_OPS_TIMEOUT0, + l_lsb); + if (l_err) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK + "getOperOpsTimeout() nvdimm[%X] error reading 0x%X", + get_huid(i_nvdimm), OPERATIONAL_UNIT_OPS_TIMEOUT0); + break; + } + + // Get timeout msb + uint8_t l_msb = 0; + l_err = nvdimmReadReg(i_nvdimm, + OPERATIONAL_UNIT_OPS_TIMEOUT1, + l_msb); + if (l_err) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK + "getOperOpsTimeout() nvdimm[%X] error reading 0x%X", + get_huid(i_nvdimm), OPERATIONAL_UNIT_OPS_TIMEOUT1); + break; + } + + // Bit 7 of the MSB indicates whether the time should + // be interpreted in seconds or milliseconds + // 0 = millisecond + // 1 = second + if (l_msb < MSBIT_SET_MASK) + { + o_timeout = l_msb; + o_timeout <<= 8; + o_timeout += l_lsb; + o_timeout = o_timeout / MS_PER_SEC; + } + else + { + l_msb = l_msb & MSBIT_CLR_MASK; + o_timeout = l_msb; + o_timeout <<= 8; + o_timeout += l_lsb; + } + + } while(0); + + return l_err; +} + + +/* + * @brief Wait for operational unit operation to complete + */ +errlHndl_t waitOperOpsComplete(TARGETING::Target* i_nvdimm, uint8_t i_cmd) +{ + errlHndl_t l_err = nullptr; + bool l_complete = false; + uint16_t l_timeout = 0; + uint8_t l_status = 0; + + // Get the timeout + l_err = getOperOpsTimeout(i_nvdimm, l_timeout); + + do + { + // Exit if l_timeout invalid + if (l_err) + { + break; + } + + // Delay before reading status + nanosleep( OPERATION_SLEEP_SECONDS, 0 ); + if (OPERATION_SLEEP_SECONDS > l_timeout) + { + l_timeout = 0; + } + else + { + l_timeout = l_timeout - OPERATION_SLEEP_SECONDS; + } + + // Get timeout cmd status 1 + l_err = nvdimmReadReg(i_nvdimm, + NVDIMM_CMD_STATUS1, + l_status); + if (l_err) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK + "waitOperOpsComplete() nvdimm[%X] error reading 0x%X", + get_huid(i_nvdimm), NVDIMM_CMD_STATUS1); + break; + } + + if (l_status >= 0x01) + { + // If bit 1 is set that means the command is in progress + // Wait for it to become 0 + } + else + { + l_complete = true; + break; + } + + } while(l_timeout > 0); + + // Timed out + if (!l_err && (l_complete == false) ) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK + "waitOperOpsComplete() nvdimm[%X] " + "Timeout waiting for operation 0x%X to complete, " + "NVDIMM_CMD_STATUS1 0x%X", + get_huid(i_nvdimm), i_cmd, l_status); + + // Get the timeout value again + getOperOpsTimeout(i_nvdimm, l_timeout); + + /*@ + *@errortype + *@reasoncode NVDIMM_VENDOR_LOG_TIMEOUT + *@severity ERRORLOG_SEV_PREDICTIVE + *@moduleid NVDIMM_WAIT_OPER_OPS_COMPLETE + *@userdata1[0:31] NVDIMM HUID + *@userdata1[32:63] OPERATIONAL_UNIT_OPS_CMD + *@userdata2[0:31] NVDIMM_CMD_STATUS1 + *@userdata2[32:63] OPERATIONAL_UNIT_OPS_TIMEOUT + *@devdesc NVDIMM timeout reading vendor log + *@custdesc NVDIMM logging error + */ + l_err = new ERRORLOG::ErrlEntry( + ERRORLOG::ERRL_SEV_PREDICTIVE, + NVDIMM_WAIT_OPER_OPS_COMPLETE, + NVDIMM_VENDOR_LOG_TIMEOUT, + TWO_UINT32_TO_UINT64( + get_huid(i_nvdimm), + i_cmd + ), + TWO_UINT32_TO_UINT64( + l_status, + l_timeout + ), + ERRORLOG::ErrlEntry::NO_SW_CALLOUT ); + + l_err->collectTrace(NVDIMM_COMP_NAME); + l_err->addPartCallout( i_nvdimm, + HWAS::NV_CONTROLLER_PART_TYPE, + HWAS::SRCI_PRIORITY_HIGH); + } + + return l_err; +} + + +/* + * @brief Get the vendor log unit + */ +errlHndl_t getLogPerUnit(TARGETING::Target* i_nvdimm, + uint16_t i_unitId, + std::vector<uint8_t>& o_unitData) +{ + // 3a) write OPERATIONAL_UNIT_ID0 and OPERATIONAL_UNIT_ID1 with unit_id + // 3b) set OPERATIONAL_UNIT_OPS_CMD to GET_OPERATIONAL_UNIT + // 3c) wait for NVDIMM_CMD_STATUS1 to return 0 + // 3d) for (block_id = 0; + // block_id < VENDOR_LOG_UNIT_SIZE/BLOCKSIZE; + // block_id++) + // 3da) Write block_id to BLOCK_ID + // 3db) Read TYPED_BLOCK_DATA_BYTE0 to TYPED_BLOCK_DATA_BYTE31 + // 3dc) Save data to buffer + + errlHndl_t l_err = nullptr; + + do + { + // 3a) + // Write the unit LSB + l_err = nvdimmWriteReg(i_nvdimm, + OPERATIONAL_UNIT_ID0, + i_unitId & 0x00FF); + if (l_err) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK + "getLogPerUnit() nvdimm[%X] error writing reg 0x%X to 0x%X", + get_huid(i_nvdimm), OPERATIONAL_UNIT_ID0, (i_unitId & 0x00FF)); + break; + } + + // Write the unit MSB + l_err = nvdimmWriteReg(i_nvdimm, + OPERATIONAL_UNIT_ID1, + i_unitId >> 8); + if (l_err) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK + "getLogPerUnit() nvdimm[%X] error writing reg 0x%X to 0x%X", + get_huid(i_nvdimm), OPERATIONAL_UNIT_ID0, (i_unitId >> 8) ); + break; + } + + // 3b) + // Write the cmd + l_err = nvdimmWriteReg(i_nvdimm, + OPERATIONAL_UNIT_OPS_CMD, + GET_OPERATIONAL_UNIT); + if (l_err) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK + "getLogPerUnit() nvdimm[%X] error writing reg 0x%X to 0x%X", + get_huid(i_nvdimm), OPERATIONAL_UNIT_OPS_CMD, + GET_OPERATIONAL_UNIT ); + break; + } + + // 3c + l_err = waitOperOpsComplete(i_nvdimm, GET_OPERATIONAL_UNIT); + if (l_err) + { + break; + } + + // 3d + for (uint8_t l_blockId = 0; + l_blockId < (VENDOR_LOG_UNIT_SIZE / VENDOR_LOG_BLOCK_SIZE); + l_blockId++) + { + // 3da + // Write the block id + l_err = nvdimmWriteReg(i_nvdimm, + BLOCK_ID, + l_blockId); + if (l_err) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK + "getLogPerUnit() nvdimm[%X] error writing reg 0x%X to 0x%X", + get_huid(i_nvdimm), BLOCK_ID, l_blockId ); + break; + } + + // 3db + // Read all the block data + for (uint16_t l_byteId = TYPED_BLOCK_DATA_BYTE0; + l_byteId < (TYPED_BLOCK_DATA_BYTE0 + VENDOR_BLOCK_DATA_BYTES); + l_byteId++) + { + uint8_t l_data = 0; + l_err = nvdimmReadReg(i_nvdimm, + l_byteId, + l_data); + if (l_err) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK + "getLogPerUnit() nvdimm[%X] error reading 0x%X", + get_huid(i_nvdimm), l_byteId); + break; + } + + // 3dc + o_unitData.push_back(l_data); + } // for byteId + + if (l_err) + { + break; + } + } // for blockId + + } while(0); + + return l_err; +} + + +/* + * @brief Calculate CRC + */ +uint16_t crc16(const uint8_t * i_data, int i_size) +{ + // From JEDEC JESD245B.01 document + // https://www.jedec.org/standards-documents/docs/jesd245a + int i, crc; + crc = 0; + while (--i_size >= 0) + { + crc = crc ^ (int)*i_data++ << 8; + for (i = 0; i < 8; ++i) + { + if (crc & 0x8000) + { + crc = crc << 1 ^ 0x1021; + } + else + { + crc = crc << 1; + } + } + } + return (crc & 0xFFFF); +} + + +/* + * @brief Get operational unit crc + */ +errlHndl_t getOperUnitCrc(TARGETING::Target* i_nvdimm, uint16_t& o_crc) +{ + errlHndl_t l_err = nullptr; + + do + { + // Get crc lsb + uint8_t l_lsb = 0; + l_err = nvdimmReadReg(i_nvdimm, + OPERATIONAL_UNIT_CRC0, + l_lsb); + if (l_err) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK + "getOperUnitCrc() nvdimm[%X] error reading 0x%X", + get_huid(i_nvdimm), OPERATIONAL_UNIT_CRC0); + break; + } + + // Get crc msb + uint8_t l_msb = 0; + l_err = nvdimmReadReg(i_nvdimm, + OPERATIONAL_UNIT_CRC1, + l_msb); + if (l_err) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK + "getOperUnitCrc() nvdimm[%X] error reading 0x%X", + get_huid(i_nvdimm), OPERATIONAL_UNIT_CRC1); + break; + } + + o_crc = l_msb; + o_crc <<= 8; + o_crc += l_lsb; + + } while(0); + + return l_err; +} + + +/* + * @brief Compare host and nvdimm checksum + */ +errlHndl_t compareCksum(TARGETING::Target* i_nvdimm, + std::vector<uint8_t>& i_unitData) +{ + // 3e) Compare checksum for unit retrieved + // 3ea) Write GENERATE_OPERATIONAL_UNIT_CKSUM + // to OPERATIONAL_UNIT_OPS_CMD + // 3eb) wait for NVDIMM_CMD_STATUS1 to return 0 + // 3ec) Read OPERATIONAL_UNIT_CRC1(MSB) and OPERATIONAL_UNIT_CRC0(LSB) + // 3ed) Calculate host checksum + // 3ee) return true if 3ec) == 3ed) + + errlHndl_t l_err = nullptr; + + do + { + // 3ea) + // Command the nvdimm to calculate the CRC on the unit + l_err = nvdimmWriteReg(i_nvdimm, + OPERATIONAL_UNIT_OPS_CMD, + GENERATE_OPERATIONAL_UNIT_CKSUM); + if (l_err) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK + "compareCksum() nvdimm[%X] error writing reg 0x%X to 0x%X", + get_huid(i_nvdimm), OPERATIONAL_UNIT_OPS_CMD, + GENERATE_OPERATIONAL_UNIT_CKSUM ); + break; + } + + // 3eb) + // Wait for the command to finish + l_err = waitOperOpsComplete(i_nvdimm, + GENERATE_OPERATIONAL_UNIT_CKSUM); + if (l_err) + { + break; + } + + // 3ec) + // Read the HW CRC MSB + LSB + uint16_t l_nvdimmCrc = 0; + l_err = getOperUnitCrc(i_nvdimm, l_nvdimmCrc); + if (l_err) + { + break; + } + + // 3ed) + // Calculate the host checksum + uint8_t* l_hostData = reinterpret_cast<uint8_t*>(i_unitData.data()); + uint16_t l_hostCrc = crc16(l_hostData, i_unitData.size()); + + // 3ee) + if (l_hostCrc != l_nvdimmCrc) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK + "compareCksum() nvdimm[%X] compare cksum failed " + "hostCrc 0x%X nvdimmCrc 0x%X", + get_huid(i_nvdimm), l_hostCrc, l_nvdimmCrc); + /*@ + *@errortype + *@reasoncode NVDIMM_VENDOR_LOG_CKSUM_FAILED + *@severity ERRORLOG_SEV_PREDICTIVE + *@moduleid NVDIMM_COMPARE_CKSUM + *@userdata1 NVDIMM HUID + *@userdata2[0:31] HOST CRC + *@userdata2[32:63] NVDIMM CRC + *@devdesc NVDIMM vendor log checksum failed + *@custdesc NVDIMM logging error + */ + l_err = new ERRORLOG::ErrlEntry( + ERRORLOG::ERRL_SEV_PREDICTIVE, + NVDIMM_COMPARE_CKSUM, + NVDIMM_VENDOR_LOG_CKSUM_FAILED, + get_huid(i_nvdimm), + TWO_UINT32_TO_UINT64( + l_hostCrc, + l_nvdimmCrc), + ERRORLOG::ErrlEntry::NO_SW_CALLOUT ); + + l_err->collectTrace(NVDIMM_COMP_NAME); + l_err->addPartCallout( i_nvdimm, + HWAS::NV_CONTROLLER_PART_TYPE, + HWAS::SRCI_PRIORITY_HIGH); + } + + } while(0); + + return l_err; +} + + +/* + * @brief Add vendor log data to FFDC + * Added to all NVDIMM HW errors + */ +void nvdimmAddVendorLog( TARGETING::Target* i_nvdimm, errlHndl_t& io_err ) +{ + TRACFCOMP( g_trac_nvdimm, ENTER_MRK + "nvdimmAddVendorLog: Target huid 0x%.8X", + get_huid(i_nvdimm)); + + /* + 1) Read VENDOR_LOG_PAGE_SIZE. Multiply the return value with BLOCKSIZE + to get the total page size (LOG_PAGE_SIZE) + 2) Set TYPED_BLOCK_DATA to VENDOR_DATA_TYPE + 3) for (unit_id = 0; + unit_id < LOG_PAGE_LENGTH/VENDOR_LOG_UNIT_SIZE; + unit_id++) + 3a) write OPERATIONAL_UNIT_ID0 and OPERATIONAL_UNIT_ID1 with unit_id + 3b) set OPERATIONAL_UNIT_OPS_CMD to GET_OPERATIONAL_UNIT + 3c) wait for NVDIMM_CMD_STATUS1 to return 0 + 3d) for (block_id = 0; + block_id < VENDOR_LOG_UNIT_SIZE/BLOCKSIZE; + block_id++) + 3da) Write block_id to BLOCK_ID + 3db) Read TYPED_BLOCK_DATA_BYTE0 to TYPED_BLOCK_DATA_BYTE31 + 3dc) Save data to buffer + 3e) Compare checksum for unit retrieved + 3ea) Write GENERATE_OPERATIONAL_UNIT_CKSUM + to OPERATIONAL_UNIT_OPS_CMD + 3eb) wait for NVDIMM_CMD_STATUS1 to return 0 + 3ec) Read OPERATIONAL_UNIT_CRC1(MSB) and OPERATIONAL_UNIT_CRC0(LSB) + 3ed) Calculate host checksum + 3ee) return true if 3ec) == 3ed) + */ + + errlHndl_t l_err = nullptr; + + // Get the vendor log attribute + auto l_vendorLog = i_nvdimm->getAttr<ATTR_NVDIMM_READING_VENDOR_LOG>(); + + do + { + // If attr is set we are already in the process of + // reading the vendor log, exit + if (l_vendorLog) + { + break; + } + + if (io_err == nullptr) + { + // A nullptr was given when it should not have been. Emit a trace + // and break out of this function. + TRACFCOMP(g_trac_nvdimm, ERR_MRK + "nvdimmAddVendorLog() io_err was nullptr!! Skip adding additional FFDC."); + break; + } + + + // Set the vendor log attribute so we don't recursively + // execute the nvdimmAddVendorLog function + l_vendorLog = 0x1; + i_nvdimm->setAttr<ATTR_NVDIMM_READING_VENDOR_LOG>(l_vendorLog); + + uint8_t l_readData = 0; + std::vector<uint8_t> l_fullData; + + // Step 1 + l_err = nvdimmReadReg(i_nvdimm, + VENDOR_LOG_PAGE_SIZE, + l_readData); + if (l_err) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK + "nvdimmAddVendorLog() nvdimm[%X] error reading 0x%X", + get_huid(i_nvdimm), VENDOR_LOG_PAGE_SIZE); + break; + } + + size_t l_logPgeLength = l_readData * VENDOR_LOG_BLOCK_SIZE; + + // Step 2 + // Some weird bug here - switching directly to VENDOR_DATA_TYPE + // would not work. Need to switch to something else first + l_err = nvdimmWriteReg(i_nvdimm, + TYPED_BLOCK_DATA, + FIRMWARE_IMAGE_DATA); + if (l_err) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK + "nvdimmAddVendorLog() nvdimm[%X] error writing 0x%X to 0x%X", + get_huid(i_nvdimm),TYPED_BLOCK_DATA, FIRMWARE_IMAGE_DATA ); + break; + } + + l_err = nvdimmWriteReg(i_nvdimm, + TYPED_BLOCK_DATA, + VENDOR_DATA_TYPE); + if (l_err) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK + "nvdimmAddVendorLog() nvdimm[%X] error writing 0x%X to 0x%X", + get_huid(i_nvdimm),TYPED_BLOCK_DATA, VENDOR_DATA_TYPE ); + break; + } + + // Step 3 + // Loop through all the log units. + for (uint16_t l_unitId = 0; + l_unitId < (l_logPgeLength / VENDOR_LOG_UNIT_SIZE); + l_unitId++) + { + // Step 3a) - 3dc) + // Get one log unit + std::vector<uint8_t> l_unitData; + l_err = getLogPerUnit(i_nvdimm, l_unitId, l_unitData); + if (l_err) + { + break; + } + + // Step 3e) - 3ee) + // Check the checksum for the entire log unit + l_err = compareCksum(i_nvdimm, l_unitData); + if (l_err) + { + break; + } + + // Append to full data + l_fullData.insert(l_fullData.end(), + l_unitData.begin(), + l_unitData.end()); + } + + if (l_err) + { + break; + } + + // Find first NUL char in the vendor log data + bool l_foundNull = false; + uint32_t l_idx = 0; + for (l_idx = 0; l_idx < l_fullData.size(); l_idx++) + { + if (l_fullData[l_idx] == 0x00) + { + l_foundNull = true; + break; + } + } + + // If NULL char not found + // then this is the old log format + if (l_foundNull == false) + { + // Add NUL terminator to ascii data + l_fullData.push_back(0x00); + } + // Else new log format + else + { + // If the next char is not NULL + // then the log has wrapped + // Re-arrange the data in chronological order + if (l_fullData[l_idx + 1] != 0x00) + { + // Save the data after the NULL char + // This is the start of the log + std::vector<uint8_t> l_tmpData; + l_tmpData.insert(l_tmpData.begin(), + l_fullData.begin() + l_idx + 1, + l_fullData.end()); + + // Erase this data from the vector + l_fullData.erase(l_fullData.begin() + l_idx + 1, + l_fullData.end()); + + // Place the saved data at the front + l_fullData.insert(l_fullData.begin(), + l_tmpData.begin(), + l_tmpData.end()); + } + // Else log has not wrapped + else + { + // Erase the data at the end of the vector + l_fullData.erase(l_fullData.begin() + l_idx + 1, + l_fullData.end()); + } + } + + // Add vendor data to error log as string + const char* l_fullChar = reinterpret_cast<char*>(l_fullData.data()); + ERRORLOG::ErrlUserDetailsStringSet l_stringSet; + l_stringSet.add("Vendor Log", l_fullChar); + l_stringSet.addToLog(io_err); + + // Change back to default + l_err = nvdimmWriteReg(i_nvdimm, + TYPED_BLOCK_DATA, + VENDOR_DEFAULT); + if (l_err) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK + "nvdimmAddVendorLog() nvdimm[%X] error writing 0x%X to 0x%X", + get_huid(i_nvdimm),TYPED_BLOCK_DATA, VENDOR_DEFAULT ); + break; + } + + } while(0); + + if (l_err) + { + // FFDC error, set as informational + l_err->setSev(ERRORLOG::ERRL_SEV_INFORMATIONAL); + errlCommit( l_err, NVDIMM_COMP_ID ); + } + + // Clear the vendor log attribute before exiting + l_vendorLog = 0x0; + i_nvdimm->setAttr<ATTR_NVDIMM_READING_VENDOR_LOG>(l_vendorLog); + + TRACFCOMP( g_trac_nvdimm, EXIT_MRK + "nvdimmAddVendorLog: Target huid 0x%.8X", + get_huid(i_nvdimm)); +} + + +/* + * @brief Add NVDIMM Update regs to FFDC for errors encountered + * during NVDIMM update process + */ +void nvdimmAddUpdateRegs( TARGETING::Target* i_nvdimm, errlHndl_t& io_err ) +{ + errlHndl_t l_err = nullptr; + + do { + + if (io_err == nullptr) + { + // A nullptr was given when it should not have been. Emit a trace + // and break out of this function. + TRACFCOMP(g_trac_nvdimm, ERR_MRK + "nvdimmAddUpdateRegs() io_err was nullptr!! Skip adding additional FFDC."); + break; + } + + ERRORLOG::ErrlUserDetailsLogRegister l_regUD(i_nvdimm); + const uint32_t l_regList[] = { + NVDIMM_READY, + FIRMWARE_OPS_STATUS, + NVDIMM_CMD_STATUS0, + FIRMWARE_OPS_TIMEOUT0, + FIRMWARE_OPS_TIMEOUT1, + FW_REGION_CRC0, + FW_REGION_CRC1, + MODULE_HEALTH, + MODULE_HEALTH_STATUS0, + MODULE_HEALTH_STATUS1, + ERROR_THRESHOLD_STATUS, + ENCRYPTION_CONFIG_STATUS, + FW_SLOT_INFO, + SLOT0_ES_FWREV0, + SLOT0_ES_FWREV1, + SLOT1_ES_FWREV0, + SLOT1_ES_FWREV1, + SLOT1_SUBFWREV, + CSAVE_INFO, + CSAVE_FAIL_INFO1, + RESTORE_STATUS, + RESTORE_FAIL_INFO, + }; + uint8_t l_readData = 0; + + for (auto l_reg : l_regList) + { + l_err = nvdimmReadReg(i_nvdimm, + l_reg, + l_readData); + if (l_err) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK + "nvdimmAddUpdateRegs() nvdimm[%X] error reading 0x%X", + get_huid(i_nvdimm), l_reg); + + // Don't commit, just delete the error and continue + delete l_err; + l_err = nullptr; + continue; + } + + l_regUD.addDataBuffer(&l_readData, + sizeof(l_readData), + DEVICE_NVDIMM_ADDRESS(l_reg)); + } + + l_regUD.addToLog(io_err); + + } while(0); +} + + +/* + * @brief Add Page 4 regs to FFDC + * Added to all NVDIMM HW errors + */ +void nvdimmAddPage4Regs( TARGETING::Target* i_nvdimm, errlHndl_t& io_err ) +{ + errlHndl_t l_err = nullptr; + + do + { + if (io_err == nullptr) + { + // A nullptr was given when it should not have been. Emit a trace + // and break out of this function. + TRACFCOMP(g_trac_nvdimm, ERR_MRK + "nvdimmAddPage4Regs() io_err was nullptr!! Skip adding additional FFDC."); + break; + } + + + // Get the page4 attribute, if set we are already + // reading the page4 regs, exit + auto l_page4 = i_nvdimm->getAttr<ATTR_NVDIMM_READING_PAGE4>(); + if (l_page4) + { + break; + } + + // Set the page4 attribute so we don't recursively + // execute the nvdimmAddPage4Regs function + l_page4 = 0x1; + i_nvdimm->setAttr<ATTR_NVDIMM_READING_PAGE4>(l_page4); + + ERRORLOG::ErrlUserDetailsLogRegister l_regUD(i_nvdimm); + uint32_t l_regList[] = { + PANIC_CNT, + PARITY_ERROR_COUNT, + FLASH_ERROR_COUNT0, + FLASH_ERROR_COUNT1, + FLASH_ERROR_COUNT2, + FLASH_BAD_BLOCK_COUNT0, + FLASH_BAD_BLOCK_COUNT1, + SCAP_STATUS, + STATUS_EVENT_INT_INFO1, + STATUS_EVENT_INT_INFO2 + }; + uint8_t l_readData = 0; + + for (auto l_reg : l_regList) + { + l_err = nvdimmReadReg(i_nvdimm, + l_reg, + l_readData); + if (l_err) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK + "nvdimmAddPage4Regs() nvdimm[%X] error reading 0x%X", + get_huid(i_nvdimm), l_reg); + + // Don't commit, just delete the error and continue + delete l_err; + l_err = nullptr; + continue; + } + + l_regUD.addDataBuffer(&l_readData, + sizeof(l_readData), + DEVICE_NVDIMM_ADDRESS(l_reg)); + } + + l_regUD.addToLog(io_err); + + // Clear the page4 attribute before exiting + l_page4 = 0x0; + i_nvdimm->setAttr<ATTR_NVDIMM_READING_PAGE4>(l_page4); + + } while(0); +} + +/* + * @brief Utility function to send the value of + * ATTR_NVDIMM_ARMED to the FSP + */ +void send_ATTR_NVDIMM_ARMED( Target* i_nvdimm, + ATTR_NVDIMM_ARMED_type& i_val ) +{ +#ifdef __HOSTBOOT_RUNTIME + errlHndl_t l_err = nullptr; + + // Send attr to HWSV if at runtime + AttributeTank::Attribute l_attr = {}; + if( !makeAttribute<ATTR_NVDIMM_ARMED> + (i_nvdimm, l_attr) ) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK"send_ATTR_NVDIMM_ARMED() Could not create Attribute"); + /*@ + *@errortype + *@reasoncode NVDIMM_CANNOT_MAKE_ATTRIBUTE + *@severity ERRORLOG_SEV_PREDICTIVE + *@moduleid SEND_ATTR_NVDIMM_ARMED + *@devdesc Couldn't create an Attribute to send the data + * to the FSP + *@custdesc NVDIMM encryption error + */ + l_err = new ERRORLOG::ErrlEntry( + ERRORLOG::ERRL_SEV_PREDICTIVE, + SEND_ATTR_NVDIMM_ARMED, + NVDIMM_CANNOT_MAKE_ATTRIBUTE, + ERRORLOG::ErrlEntry::ADD_SW_CALLOUT ); + l_err->collectTrace(NVDIMM_COMP_NAME); + errlCommit( l_err, NVDIMM_COMP_ID ); + } + else + { + std::vector<TARGETING::AttributeTank::Attribute> l_attrList; + l_attrList.push_back(l_attr); + l_err = sendAttributes( l_attrList ); + if (l_err) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK"send_ATTR_NVDIMM_ARMED() Error sending ATTR_NVDIMM_ARMED down to FSP"); + l_err->setSev(ERRORLOG::ERRL_SEV_PREDICTIVE); + l_err->collectTrace(NVDIMM_COMP_NAME); + errlCommit( l_err, NVDIMM_COMP_ID ); + } + } +#endif //__HOSTBOOT_RUNTIME +} + +/** + * @brief Grab the current slot that NVDIMM code is running + */ +errlHndl_t nvdimmGetRunningSlot(TARGETING::Target *i_nvdimm, uint8_t & o_slot) +{ + errlHndl_t l_err = nullptr; + uint8_t l_data = 0; + o_slot = 0; //default to slot 0 + + // Check if the firmware slot is 0 + l_err = nvdimmReadReg ( i_nvdimm, FW_SLOT_INFO, l_data); + if (l_err) + { + nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR); + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmGetRunningSlot() nvdimm[%X], failed to read slot info", + get_huid(i_nvdimm)); + } + else + { + // Bits 7-4 = RUNNING_FW_SLOT - slot number of running firmware + o_slot = (l_data & RUNNING_FW_SLOT) >> 4; + } + return l_err; +} + +/** + * @brief This function polls the command status register for arm completion + * + * @param[in] i_nvdimm - nvdimm target with NV controller + * + * @param[out] o_poll - total polled time in ms + * + * @return errlHndl_t - Null if successful, otherwise a pointer to + * the error log. + */ +errlHndl_t nvdimmPollArmDone(Target* i_nvdimm, + uint32_t &o_poll) +{ + TRACUCOMP(g_trac_nvdimm, ENTER_MRK"nvdimmPollArmDone() nvdimm[%X]", get_huid(i_nvdimm) ); + + errlHndl_t l_err = nullptr; + + l_err = nvdimmPollStatus ( i_nvdimm, ARM, o_poll); + + TRACUCOMP(g_trac_nvdimm, EXIT_MRK"nvdimmPollArmDone() nvdimm[%X]", + get_huid(i_nvdimm)); + + return l_err; +} + +/** + * @brief This function checks the arm status register to make sure + * the trigger has been armed to ddr_reset_n + * + * @param[in] i_nvdimm - nvdimm target with NV controller + * @param[in] i_arm_timeout - nvdimm local timeout status + * + * @return errlHndl_t - Null if successful, otherwise a pointer to + * the error log. + */ +errlHndl_t nvdimmCheckArmSuccess(Target *i_nvdimm, bool i_arm_timeout) +{ + TRACUCOMP(g_trac_nvdimm, ENTER_MRK"nvdimmCheckArmSuccess() nvdimm[%X]", + get_huid(i_nvdimm)); + + errlHndl_t l_err = nullptr; + uint8_t l_data = 0; + + l_err = nvdimmReadReg(i_nvdimm, ARM_STATUS, l_data); + + if (l_err) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmCheckArmSuccess() nvdimm[%X]" + "failed to read arm status reg!",get_huid(i_nvdimm)); + } + else if (((l_data & ARM_ERROR) == ARM_ERROR) || ((l_data & RESET_N_ARMED) != RESET_N_ARMED) || i_arm_timeout) + { + + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmCheckArmSuccess() nvdimm[%X]" + "failed to arm! ARM status 0x%X ARM timeout %d" + ,get_huid(i_nvdimm),l_data,i_arm_timeout); + /*@ + *@errortype + *@reasoncode NVDIMM_ARM_FAILED + *@severity ERRORLOG_SEV_PREDICTIVE + *@moduleid NVDIMM_SET_ARM + *@userdata1[0:31] Related ops (0xff = NA) + *@userdata1[32:63] Target Huid + *@userdata2[0:31] ARM Status + *@userdata2[32:63] ARM Timeout + *@devdesc Encountered error arming the catastrophic save + * trigger on NVDIMM. Make sure an energy source + * is connected to the NVDIMM and the ES policy + * is set properly + *@custdesc NVDIMM encountered error arming save trigger + */ + l_err = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_PREDICTIVE, + NVDIMM_SET_ARM, + NVDIMM_ARM_FAILED, + TWO_UINT32_TO_UINT64(ARM, get_huid(i_nvdimm)), + TWO_UINT32_TO_UINT64(l_data, i_arm_timeout), + ERRORLOG::ErrlEntry::NO_SW_CALLOUT ); + + l_err->collectTrace(NVDIMM_COMP_NAME, 256 ); + nvdimmAddVendorLog(i_nvdimm, l_err); + + // Failure to arm could mean internal NV controller error or + // even error on the battery pack. NVDIMM will lose persistency + // if failed to arm trigger + l_err->addHwCallout( i_nvdimm, + HWAS::SRCI_PRIORITY_HIGH, + HWAS::NO_DECONFIG, + HWAS::GARD_Fatal); + } + + TRACUCOMP(g_trac_nvdimm, EXIT_MRK"nvdimmCheckArmSuccess() nvdimm[%X] ret[%X]", + get_huid(i_nvdimm), l_data); + + return l_err; +} + +/** + * @brief This function performs arm precheck. + * + * @param[in] i_nvdimm - nvdimm target with NV controller + * + * @return errlHndl_t - Null if successful, otherwise a pointer to + * the error log. + */ +errlHndl_t nvdimmArmPreCheck(Target* i_nvdimm) +{ + TRACUCOMP(g_trac_nvdimm, ENTER_MRK"nvdimmArmPreCheck() nvdimm[%X]", + get_huid(i_nvdimm)); + + errlHndl_t l_err = nullptr; + uint8_t l_ready = 0; + uint8_t l_fwupdate = 0; + uint8_t l_module_health = 0; + uint8_t l_continue = true; + auto l_RegInfo = nvdimm_reg_t(); + + do + { + // Read out the Module Health status register + l_err = nvdimmReadReg(i_nvdimm, MODULE_HEALTH_STATUS0, l_module_health); + if (l_err) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmArmPreCheck() nvdimm[%X] - failed to read Module Health Status", + get_huid(i_nvdimm)); + errlCommit( l_err, NVDIMM_COMP_ID ); + l_continue = false; + break; + } + + // Read out the NVDimm Ready register + l_err = nvdimmReadReg(i_nvdimm, NVDIMM_READY, l_ready); + if (l_err) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmArmPreCheck() nvdimm[%X] - failed to read NVDimm Ready register", + get_huid(i_nvdimm)); + errlCommit( l_err, NVDIMM_COMP_ID ); + l_continue = false; + break; + } + + // Read out the FW OPs Status register + l_err = nvdimmReadReg(i_nvdimm, FIRMWARE_OPS_STATUS, l_fwupdate); + if (l_err) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmArmPreCheck() nvdimm[%X] - failed to read Firmware OPs Status register", + get_huid(i_nvdimm)); + errlCommit( l_err, NVDIMM_COMP_ID ); + l_continue = false; + } + + }while(0); + + // Check ARM pre-requisites + // All nvdimms in i_nvdimmTargetList must pass the pre-req checks + // before continuing with arm. + if ((!l_continue) || (l_module_health & NVM_LIFETIME_ERROR) + || (l_ready != NV_READY) + || (l_fwupdate & FW_OPS_UPDATE)) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmArmPreCheck() nvdimm[%X] - failed NVDimm Arm prechecks", + get_huid(i_nvdimm)); + /*@ + *@errortype + *@reasoncode NVDIMM_ARM_PRE_CHECK_FAILED + *@severity ERRORLOG_SEV_PREDICTIVE + *@moduleid NVDIMM_ARM_PRE_CHECK + *@userdata1[0:31] Target Huid + *@userdata1[32:39] l_continue + *@userdata1[40:47] l_module_health + *@userdata1[48:56] l_ready + *@userdata1[57:63] l_fwupdate + *@userdata2 <UNUSED> + *@devdesc NVDIMM failed arm precheck. Refer to FFDC for exact reason + *@custdesc NVDIMM failed the arm precheck and is unable to arm + */ + l_err = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_PREDICTIVE, + NVDIMM_ARM_PRE_CHECK, + NVDIMM_ARM_PRE_CHECK_FAILED, + NVDIMM_SET_USER_DATA_1(TARGETING::get_huid(i_nvdimm), + FOUR_UINT8_TO_UINT32(l_continue, l_module_health, l_ready, l_fwupdate)), + 0x0, + ERRORLOG::ErrlEntry::NO_SW_CALLOUT ); + + l_err->collectTrace( NVDIMM_COMP_NAME ); + + // Callout the dimm + l_err->addHwCallout( i_nvdimm, + HWAS::SRCI_PRIORITY_LOW, + HWAS::NO_DECONFIG, + HWAS::GARD_NULL); + + // Read relevant regs for trace data + nvdimmTraceRegs(i_nvdimm, l_RegInfo); + nvdimmAddPage4Regs(i_nvdimm,l_err); + nvdimmAddVendorLog(i_nvdimm, l_err); + + // Add reg traces to the error log + NVDIMM::UdNvdimmOPParms( l_RegInfo ).addToLog(l_err); + + } + + TRACUCOMP(g_trac_nvdimm, EXIT_MRK"nvdimmArmPreCheck() nvdimm[%X]", + get_huid(i_nvdimm)); + + return l_err; +} + + +bool nvdimmArm(TargetHandleList &i_nvdimmTargetList) +{ + bool o_arm_successful = true; + bool l_continue = true; + bool l_arm_timeout = false; + uint8_t l_data; + auto l_RegInfo = nvdimm_reg_t(); + uint64_t l_writeData; + uint32_t l_writeAddress; + size_t l_writeSize = sizeof(l_writeData); + + TRACFCOMP(g_trac_nvdimm, ENTER_MRK"nvdimmArm() numNvdimm[%d]", + i_nvdimmTargetList.size()); + + errlHndl_t l_err = nullptr; + errlHndl_t l_err_t = nullptr; + + // Prerequisite Arm Checks + for (auto const l_nvdimm : i_nvdimmTargetList) + { + l_err = nvdimmArmPreCheck(l_nvdimm); + + // If we are failing the precheck, commit the error then exit + if (l_err) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmArm() failed arm precheck, exiting"); + errlCommit(l_err, NVDIMM_COMP_ID); + return false; + } + } + + // Encryption unlocked check + // Check one nvdimm at a time + for (auto const l_nvdimm : i_nvdimmTargetList) + { + // Unlock function will create an error log + // Create another here to make it clear that the arm failed + TargetHandleList l_nvdimmTargetList; + l_nvdimmTargetList.push_back(l_nvdimm); + if (!nvdimm_encrypt_unlock(l_nvdimmTargetList)) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmArm() nvdimm[%X] - failed NVDimm Arm encryption unlock", + get_huid(l_nvdimm)); + /*@ + *@errortype + *@reasoncode NVDIMM_ARM_ENCRYPTION_UNLOCK_FAILED + *@severity ERRORLOG_SEV_PREDICTIVE + *@moduleid NVDIMM_ARM + *@userdata1 Target Huid + *@userdata2 <UNUSED> + *@devdesc NVDIMM failed to unlock encryption during arming + *@custdesc NVDIMM failed to ARM + */ + l_err = new ERRORLOG::ErrlEntry( + ERRORLOG::ERRL_SEV_PREDICTIVE, + NVDIMM_ARM, + NVDIMM_ARM_ENCRYPTION_UNLOCK_FAILED, + get_huid(l_nvdimm), + 0x0, + ERRORLOG::ErrlEntry::NO_SW_CALLOUT ); + + l_err->collectTrace( NVDIMM_COMP_NAME ); + + // Callout the dimm + l_err->addHwCallout( l_nvdimm, + HWAS::SRCI_PRIORITY_MED, + HWAS::DELAYED_DECONFIG, + HWAS::GARD_NULL); + + // Read relevant regs for trace data + nvdimmTraceRegs(l_nvdimm, l_RegInfo); + nvdimmAddPage4Regs(l_nvdimm,l_err); + nvdimmAddVendorLog(l_nvdimm, l_err); + + // Add reg traces to the error log + NVDIMM::UdNvdimmOPParms( l_RegInfo ).addToLog(l_err); + + // Commit the error then exit + errlCommit(l_err, NVDIMM_COMP_ID); + return false; + } + } + + // Mask MBACALFIR EventN to separate ARM handling + for (TargetHandleList::iterator it = i_nvdimmTargetList.begin(); + it != i_nvdimmTargetList.end();) + { + TargetHandleList l_mcaList; + getParentAffinityTargets(l_mcaList, *it, CLASS_UNIT, TYPE_MCA); + assert(l_mcaList.size(), "nvdimmArm() failed to find parent MCA."); + + l_writeAddress = MBACALFIR_OR_MASK_REG; + l_writeData = MBACALFIR_EVENTN_OR_BIT; + l_err = deviceWrite(l_mcaList[0], &l_writeData, l_writeSize, + DEVICE_SCOM_ADDRESS(l_writeAddress)); + if(l_err) + { + TRACFCOMP(g_trac_nvdimm, "SCOM to address 0x%08x failed", + l_writeAddress); + errlCommit( l_err, NVDIMM_COMP_ID ); + } + it++; + } + + for (auto const l_nvdimm : i_nvdimmTargetList) + { + l_arm_timeout = false; + + // skip if the nvdimm is already armed + ATTR_NVDIMM_ARMED_type l_armed_state = {}; + l_armed_state = l_nvdimm->getAttr<ATTR_NVDIMM_ARMED>(); + if (l_armed_state.armed) + { + TRACFCOMP(g_trac_nvdimm, "nvdimmArm() nvdimm[%X] called when already armed", get_huid(l_nvdimm)); + continue; + } + + // Set ES Policy, contains all of its status checks + l_err = nvdimmSetESPolicy(l_nvdimm); + if (l_err) + { + TRACFCOMP(g_trac_nvdimm, "nvdimmArm() nvdimm[%X] failed to set ES Policy", get_huid(l_nvdimm)); + o_arm_successful = false; + + nvdimmDisarm(i_nvdimmTargetList); + + // Committing the error as we don't want this to interrupt + // the boot. This will notify the user that action is needed + // on this module + l_err->setSev(ERRORLOG::ERRL_SEV_PREDICTIVE); + l_err->collectTrace(NVDIMM_COMP_NAME); + + // Callout the nvdimm on high and gard + l_err->addHwCallout( l_nvdimm, + HWAS::SRCI_PRIORITY_HIGH, + HWAS::NO_DECONFIG, + HWAS::GARD_Fatal); + + errlCommit( l_err, NVDIMM_COMP_ID ); + + break; + } + + // Clear all status registers in case of leftover bits + l_err = nvdimmWriteReg(l_nvdimm, NVDIMM_MGT_CMD0, CLEAR_ALL_STATUS); + if (l_err) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmArm() nvdimm[%X] - error clearing all status registers", + get_huid(l_nvdimm)); + o_arm_successful = false; + break; + } + + bool l_is_retryable = true; + //continue flag set by the retry loop to continue on the outer loop + bool l_continue_arm = false; + //break flag set by the retry loop to break on the outer loop + bool l_break = false; + errlHndl_t l_err_retry = nullptr; + + // Attempt arm multiple times in case of glitches + for (size_t l_retry = 0; l_retry <= ARM_MAX_RETRY_COUNT; l_retry++) + { + + l_err = NVDIMM::nvdimmChangeArmState(l_nvdimm, ARM_TRIGGER); + // If we run into any error here we will just + // commit the error log and move on. Let the + // system continue to boot and let the user + // salvage the data + if (l_err) + { + TRACFCOMP(g_trac_nvdimm, "nvdimmArm() nvdimm[%X] failed to trigger arm", get_huid(l_nvdimm)); + + nvdimmDisarm(i_nvdimmTargetList); + + // Committing the error as we don't want this to interrupt + // the boot. This will notify the user that action is needed + // on this module + l_err->setSev(ERRORLOG::ERRL_SEV_PREDICTIVE); + l_err->collectTrace(NVDIMM_COMP_NAME); + errlCommit( l_err, NVDIMM_COMP_ID ); + o_arm_successful = false; + + // Cause the main loop to skip the rest of the arm procedure + // and move to the next target + l_continue_arm = true; + break; + } + + // Arm happens one module at a time. No need to set any offset on the counter + uint32_t l_poll = 0; + l_err = nvdimmPollArmDone(l_nvdimm, l_poll); + if (l_err) + { + TRACFCOMP(g_trac_nvdimm, "nvdimmArm() nvdimm[%X] arm command timed out", get_huid(l_nvdimm)); + l_arm_timeout = true; + + l_err_t = notifyNvdimmProtectionChange(l_nvdimm, NVDIMM_DISARMED); + if (l_err_t) + { + errlCommit( l_err_t, NVDIMM_COMP_ID ); + } + + // Committing the error as we don't want this to interrupt + // the boot. This will notify the user that action is needed + // on this module + l_err->setSev(ERRORLOG::ERRL_SEV_PREDICTIVE); + l_err->collectTrace(NVDIMM_COMP_NAME); + + errlCommit( l_err, NVDIMM_COMP_ID ); + o_arm_successful = false; + } + + // Pass l_arm_timeout value in for health status check + l_continue = l_arm_timeout; + + // Sleep for 1 second before checking the health status + // to let the glitches settle in case there were any + nanosleep(1, 0); + + // Check health status registers and exit if required + l_err = nvdimmHealthStatusCheck( l_nvdimm, HEALTH_PRE_ARM, l_continue ); + + // Check for health status failure + // Any fail picked up by the health check is a legit fail + if (l_err) + { + TRACFCOMP(g_trac_nvdimm, "nvdimmArm() nvdimm[%X] failed first health status check", get_huid(l_nvdimm)); + + // The arm timeout variable is used here as the continue variable for the + // health status check. This was done to include the timeout for use in the check + // If true either the arm timed out with a health status fail or the + // health status check failed with another disarm and exit condition + if (!l_continue) + { + errlCommit( l_err, NVDIMM_COMP_ID ); + + // Disarming all dimms due to error + nvdimmDisarm(i_nvdimmTargetList); + o_arm_successful = false; + + // Cause the main loop to exit out of the main arm procedure + l_break = true; + break; + } + else + { + errlCommit( l_err, NVDIMM_COMP_ID ); + + // Cause the main loop to skip the rest of the arm procedure + // and move to the next target + l_continue_arm = true; + break; + } + } + + l_err = nvdimmCheckArmSuccess(l_nvdimm, l_arm_timeout); + + // At this point we have passed the health check. If the arm were + // to fail now, it is likely it was due to some glitch. Let's retry + // the arm again as long as the fail is not due to timeout. + // A timeout would mean a charging issue, it would have been caught + // by the health check. + l_is_retryable = !l_arm_timeout && l_retry < ARM_MAX_RETRY_COUNT; + if (l_err) + { + TRACFCOMP(g_trac_nvdimm, "nvdimmArm() nvdimm[%X] failed to succesfully arm. %s retryable.", + get_huid(l_nvdimm), l_is_retryable? "IS" : "NOT"); + + if (l_is_retryable) + { + // Save the original error + // If a previous error was saved then delete it + if (l_err_retry) + { + delete l_err_retry; + } + l_err_retry = l_err; + + /*@ + *@errortype + *@reasoncode NVDIMM_ARM_RETRY + *@severity ERRORLOG_SEV_INFORMATIONAL + *@moduleid NVDIMM_ARM_ERASE + *@userdata1[0:31] Target Huid + *@userdata1[32:39] l_is_retryable + *@userdata1[40:47] MAX arm retry count + *@userdata2[0:31] Original errlog plid + *@userdata2[32:63] Original errlog reason code + *@devdesc NVDIMM encountered a glitch causing the initial + * arm to fail. System firmware will retry the arm + *@custdesc NVDIMM requires an arm retry + */ + l_err = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_INFORMATIONAL, + NVDIMM_ARM_ERASE, + NVDIMM_ARM_RETRY, + NVDIMM_SET_USER_DATA_1(TARGETING::get_huid(l_nvdimm), + FOUR_UINT8_TO_UINT32(l_is_retryable, ARM_MAX_RETRY_COUNT,0,0)), + TWO_UINT32_TO_UINT64(l_err_retry->plid(), l_err_retry->reasonCode()), + ERRORLOG::ErrlEntry::NO_SW_CALLOUT ); + + l_err->collectTrace( NVDIMM_COMP_NAME ); + + // Callout the dimm + l_err->addHwCallout( l_nvdimm, + HWAS::SRCI_PRIORITY_LOW, + HWAS::NO_DECONFIG, + HWAS::GARD_NULL); + + errlCommit( l_err, NVDIMM_COMP_ID ); + } + else + { + // Handle retryable error + if (l_err_retry) + { + ERRORLOG::ErrlUserDetailsString("Arm RETRY failed").addToLog(l_err_retry); + + // Delete the current errlog and use the original errlog for callout + delete l_err; + l_err = l_err_retry; + l_err_retry = nullptr; + } + + // Disarming all dimms due to error + nvdimmDisarm(i_nvdimmTargetList); + + l_err_t = notifyNvdimmProtectionChange(l_nvdimm, NVDIMM_DISARMED); + if (l_err_t) + { + errlCommit( l_err_t, NVDIMM_COMP_ID ); + } + + // Committing the error as we don't want this to interrupt + // the boot. This will notify the user that action is needed + // on this module + l_err->setSev(ERRORLOG::ERRL_SEV_PREDICTIVE); + l_err->collectTrace(NVDIMM_COMP_NAME); + + // Dump Traces for error logs + nvdimmTraceRegs( l_nvdimm, l_RegInfo ); + nvdimmAddPage4Regs(l_nvdimm,l_err); + + // Add reg traces to the error log + NVDIMM::UdNvdimmOPParms( l_RegInfo ).addToLog(l_err); + + errlCommit(l_err, NVDIMM_COMP_ID); + o_arm_successful = false; + + // Cause the main loop to exit out of the main arm procedure + l_break = true; + break; + } + } + else + { + // Arm worked. Exit the retry loop + break; + } // close nvdimmCheckArmSuccess check + } // close arm retry loop + + if (l_continue_arm) + { + continue; + } + else if (l_break) + { + break; + } + + // After arming the trigger, erase the image to prevent the possible + // stale image getting the restored on the next boot in case of failed + // save. + l_err = nvdimmEraseNF(l_nvdimm); + if (l_err) + { + TRACFCOMP(g_trac_nvdimm, "nvdimmArm() nvdimm[%X] failed to erase post arm", get_huid(l_nvdimm)); + + // Disarming all dimms due to error + nvdimmDisarm(i_nvdimmTargetList); + + // Committing the error as we don't want this to interrupt + // the boot. This will notify the user that action is needed + // on this module + l_err->setSev(ERRORLOG::ERRL_SEV_PREDICTIVE); + l_err->collectTrace(NVDIMM_COMP_NAME); + errlCommit( l_err, NVDIMM_COMP_ID ); + o_arm_successful = false; + break; + } + + // Arm successful, update armed status + l_err = NVDIMM::notifyNvdimmProtectionChange(l_nvdimm, + NVDIMM::NVDIMM_ARMED); + if (l_err) + { + l_err->setSev(ERRORLOG::ERRL_SEV_PREDICTIVE); + l_err->collectTrace(NVDIMM_COMP_NAME); + errlCommit(l_err, NVDIMM_COMP_ID); + } + + // Enable Persistency and Warning Threshold notifications + l_err = nvdimmWriteReg(l_nvdimm, SET_EVENT_NOTIFICATION_CMD, ENABLE_NOTIFICATIONS); + if (l_err) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK"NDVIMM HUID[%X] setting persistency notification", + TARGETING::get_huid(l_nvdimm)); + break; + } + + // Check notification status and errors + l_err = nvdimmReadReg(l_nvdimm, SET_EVENT_NOTIFICATION_STATUS, l_data); + if (l_err) + { + break; + } + else if (((l_data & SET_EVENT_NOTIFICATION_ERROR) == SET_EVENT_NOTIFICATION_ERROR) + || ((l_data & NOTIFICATIONS_ENABLED) != NOTIFICATIONS_ENABLED)) + { + TRACFCOMP(g_trac_nvdimm, "nvdimmArm() nvdimm[%X] failed to set event notification", + get_huid(l_nvdimm)); + + // Set NVDIMM Status flag to partial working, as error detected but data might persist + notifyNvdimmProtectionChange(l_nvdimm, NVDIMM_RISKY_HW_ERROR); + + /*@ + *@errortype + *@reasoncode NVDIMM_SET_EVENT_NOTIFICATION_ERROR + *@severity ERRORLOG_SEV_PREDICTIVE + *@moduleid NVDIMM_SET_EVENT_NOTIFICATION + *@userdata1[0:31] Target Huid + *@userdata2 <UNUSED> + *@devdesc NVDIMM threw an error or failed to set event + * notifications during arming + *@custdesc NVDIMM failed to enable event notificaitons + */ + l_err = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_PREDICTIVE, + NVDIMM_SET_EVENT_NOTIFICATION, + NVDIMM_SET_EVENT_NOTIFICATION_ERROR, + TARGETING::get_huid(l_nvdimm), + 0x0, + ERRORLOG::ErrlEntry::NO_SW_CALLOUT ); + + l_err->collectTrace( NVDIMM_COMP_NAME ); + + // Callout the dimm + l_err->addHwCallout( l_nvdimm, + HWAS::SRCI_PRIORITY_LOW, + HWAS::NO_DECONFIG, + HWAS::GARD_NULL); + + // Read relevant regs for trace data + nvdimmTraceRegs(l_nvdimm, l_RegInfo); + nvdimmAddPage4Regs(l_nvdimm,l_err); + nvdimmAddVendorLog(l_nvdimm, l_err); + + // Add reg traces to the error log + NVDIMM::UdNvdimmOPParms( l_RegInfo ).addToLog(l_err); + + errlCommit( l_err, NVDIMM_COMP_ID ); + + // We are after the arm step now, so on any error cases let's log it + // then move to the next nvdimm + continue; + } + + // Re-check health status registers + l_err = nvdimmHealthStatusCheck( l_nvdimm, HEALTH_POST_ARM, l_continue ); + + // Check for health status failure + if (l_err) + { + TRACFCOMP(g_trac_nvdimm, "nvdimmArm() nvdimm[%X] failed final health status check", get_huid(l_nvdimm)); + + errlCommit( l_err, NVDIMM_COMP_ID ); + continue; + } + + } + + // Check for uncommited i2c fail error logs + if (l_err) + { + TRACFCOMP(g_trac_nvdimm, "nvdimmArm() failed an i2c read/write"); + errlCommit( l_err, NVDIMM_COMP_ID ); + nvdimmDisarm(i_nvdimmTargetList); + return false; + } + + // Unmask firs if the arm completed successfully + if (o_arm_successful) + { + // Unmask MBACALFIR EventN and set to recoverable + for (TargetHandleList::iterator it = i_nvdimmTargetList.begin(); + it != i_nvdimmTargetList.end();) + { + TargetHandleList l_mcaList; + getParentAffinityTargets(l_mcaList, *it, CLASS_UNIT, TYPE_MCA); + assert(l_mcaList.size(), "nvdimmArm() failed to find parent MCA."); + + // Set MBACALFIR_ACTION0 to recoverable + l_writeAddress = MBACALFIR_ACTION0_REG; + l_writeData = 0; + l_err = deviceRead(l_mcaList[0], &l_writeData, l_writeSize, + DEVICE_SCOM_ADDRESS(l_writeAddress)); + if(l_err) + { + TRACFCOMP(g_trac_nvdimm, "SCOM to address 0x%08x failed", + l_writeAddress); + errlCommit( l_err, NVDIMM_COMP_ID ); + } + + + l_writeData &= MBACALFIR_EVENTN_AND_BIT; + l_err = deviceWrite(l_mcaList[0], &l_writeData, l_writeSize, + DEVICE_SCOM_ADDRESS(l_writeAddress)); + if(l_err) + { + TRACFCOMP(g_trac_nvdimm, "SCOM to address 0x%08x failed", + l_writeAddress); + errlCommit( l_err, NVDIMM_COMP_ID ); + } + + // Set MBACALFIR_ACTION1 to recoverable + l_writeAddress = MBACALFIR_ACTION1_REG; + l_writeData = 0; + l_err = deviceRead(l_mcaList[0], &l_writeData, l_writeSize, + DEVICE_SCOM_ADDRESS(l_writeAddress)); + if(l_err) + { + TRACFCOMP(g_trac_nvdimm, "SCOM to address 0x%08x failed", + l_writeAddress); + errlCommit( l_err, NVDIMM_COMP_ID ); + } + + l_writeData |= MBACALFIR_EVENTN_OR_BIT; + l_err = deviceWrite(l_mcaList[0], &l_writeData, l_writeSize, + DEVICE_SCOM_ADDRESS(l_writeAddress)); + if(l_err) + { + TRACFCOMP(g_trac_nvdimm, "SCOM to address 0x%08x failed", + l_writeAddress); + errlCommit( l_err, NVDIMM_COMP_ID ); + } + + // Unmask MBACALFIR[8] + l_writeAddress = MBACALFIR_AND_MASK_REG; + l_writeData = MBACALFIR_UNMASK_BIT; + l_err = deviceWrite(l_mcaList[0], &l_writeData, l_writeSize, + DEVICE_SCOM_ADDRESS(l_writeAddress)); + if(l_err) + { + TRACFCOMP(g_trac_nvdimm, "SCOM to address 0x%08x failed", + l_writeAddress); + errlCommit( l_err, NVDIMM_COMP_ID ); + } + + it++; + } + + } + + TRACFCOMP(g_trac_nvdimm, EXIT_MRK"nvdimmArm() returning %d", + o_arm_successful); + return o_arm_successful; +} + +bool nvdimmDisarm(TargetHandleList &i_nvdimmTargetList) +{ + bool o_disarm_successful = true; + + TRACFCOMP(g_trac_nvdimm, ENTER_MRK"nvdimmDisarm() %d", + i_nvdimmTargetList.size()); + + errlHndl_t l_err = nullptr; + + for (auto const l_nvdimm : i_nvdimmTargetList) + { + l_err = NVDIMM::nvdimmChangeArmState(l_nvdimm, DISARM_TRIGGER); + // If we run into any error here we will just + // commit the error log and move on. Let the + // system continue to boot and let the user + // salvage the data + if (l_err) + { + // Committing the error as we don't want this to interrupt + // the boot. This will notify the user that action is needed + // on this module + l_err->setSev(ERRORLOG::ERRL_SEV_PREDICTIVE); + l_err->collectTrace(NVDIMM_COMP_NAME); + errlCommit( l_err, NVDIMM_COMP_ID ); + o_disarm_successful = false; + continue; + } + + // Disarm successful, update armed status + l_err = NVDIMM::notifyNvdimmProtectionChange(l_nvdimm, + NVDIMM::NVDIMM_DISARMED); + if (l_err) + { + l_err->setSev(ERRORLOG::ERRL_SEV_PREDICTIVE); + l_err->collectTrace(NVDIMM_COMP_NAME); + errlCommit(l_err, NVDIMM_COMP_ID); + } + } + + TRACFCOMP(g_trac_nvdimm, EXIT_MRK"nvdimmDisarm() returning %d", + o_disarm_successful); + + return o_disarm_successful; + +} + + +/* + * @brief Wrapper function to return NVDIMMs to factory default + */ +bool nvdimmFactoryDefault(TargetHandleList &i_nvdimmList) +{ + errlHndl_t l_err = nullptr; + bool l_success = true; + + // Factory default for all nvdimms in the list + for (const auto & l_nvdimm : i_nvdimmList) + { + l_err = nvdimm_factory_reset(l_nvdimm); + if (l_err) + { + l_success = false; + errlCommit( l_err, NVDIMM_COMP_ID ); + continue; + } + + // Update nvdimm status + l_err = notifyNvdimmProtectionChange(l_nvdimm, NVDIMM_DISARMED); + if (l_err) + { + errlCommit( l_err, NVDIMM_COMP_ID ); + } + } + + return l_success; +} + + +/* + * @brief Function to start secure erase verify of NVDIMMs + */ +bool nvdimmSecureEraseVerifyStart(TargetHandleList &i_nvdimmList) +{ + errlHndl_t l_err = nullptr; + bool l_success = true; + + // Secure erase verify for all nvdimms in the list + for (const auto & l_nvdimm : i_nvdimmList) + { + // Clear the erase_verify_status reg + l_err = nvdimmWriteReg(l_nvdimm, + ERASE_VERIFY_STATUS, + ERASE_VERIFY_CLEAR); + if (l_err) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK + "nvdimmSecureEraseVerifyStart() HUID 0x%X" + "Failed to write ERASE_VERIFY_STATUS register", + get_huid(l_nvdimm)); + l_success = false; + errlCommit( l_err, NVDIMM_COMP_ID ); + continue; + } + + // Start the erase verify operation + l_err = nvdimmWriteReg(l_nvdimm, + ERASE_VERIFY_CONTROL, + ERASE_VERIFY_START); + if (l_err) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK + "nvdimmSecureEraseVerifyStart() HUID 0x%X" + "Failed to write ERASE_VERIFY_CONTROL register", + get_huid(l_nvdimm)); + l_success = false; + errlCommit( l_err, NVDIMM_COMP_ID ); + continue; + } + + // Call notify to clear NV_STATUS bit + l_err = notifyNvdimmProtectionChange(l_nvdimm, + ERASE_VERIFY_STARTED); + if (l_err) + { + l_success = false; + errlCommit(l_err, NVDIMM_COMP_ID); + continue; + } + } + + return l_success; +} + + +/* + * @brief Function to check status of secure erase verify of NVDIMMs + */ +bool nvdimmSecureEraseVerifyStatus(TargetHandleList &i_nvdimmList) +{ + errlHndl_t l_err = nullptr; + bool l_success = true; + uint8_t l_data = 0; + + // Check secure erase verify status for all nvdimms in the list + for (const auto & l_nvdimm : i_nvdimmList) + { + // Check if secure-erase-verify is already complete for this nvdimm + ATTR_NV_STATUS_FLAG_type l_nv_status = + l_nvdimm->getAttr<ATTR_NV_STATUS_FLAG>(); + if (l_nv_status & NV_STATUS_ERASE_VERIFY_SET) + { + continue; + } + + l_err = nvdimmReadReg(l_nvdimm, ERASE_VERIFY_CONTROL, l_data); + if (l_err) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK + "nvdimmSecureEraseVerifyStatus() HUID 0x%X" + "Failed to read ERASE_VERIFY_CONTROL register", + get_huid(l_nvdimm)); + l_success = false; + errlCommit( l_err, NVDIMM_COMP_ID ); + continue; // Continue to next nvdimm + } + + // If trigger is set the operation is not yet complete + if (l_data & ERASE_VERIFY_TRIGGER) + { + continue; // Continue to next nvdimm + } + + // Secure erase verify on this nvdimm is complete + // Call notify to set NV_STATUS bit + l_err = notifyNvdimmProtectionChange(l_nvdimm, + ERASE_VERIFY_COMPLETED); + if (l_err) + { + l_success = false; + errlCommit(l_err, NVDIMM_COMP_ID); + } + + + // Check the status register + l_err = nvdimmReadReg(l_nvdimm, ERASE_VERIFY_STATUS, l_data); + if (l_err) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK + "nvdimmSecureEraseVerifyStatus() HUID 0x%X" + "Failed to read ERASE_VERIFY_STATUS register", + get_huid(l_nvdimm)); + l_success = false; + errlCommit( l_err, NVDIMM_COMP_ID ); + continue; // Continue to next nvdimm + } + + // Non-zero status is an error + if (l_data) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmSecureEraseVerifyStatus() " + "HUID 0x%X ERASE_VERIFY_STATUS returned non-zero status", + get_huid(l_nvdimm)); + /*@ + *@errortype + *@reasoncode NVDIMM_ERASE_VERIFY_STATUS_NONZERO + *@severity ERRORLOG_SEV_PREDICTIVE + *@moduleid NVDIMM_SECURE_ERASE_VERIFY_STATUS + *@userdata1 NVDIMM HUID + *@userdata2 ERASE_VERIFY_STATUS + *@devdesc Error detected during secure erase verify + *@custdesc NVDIMM erase error + */ + l_err = new ERRORLOG::ErrlEntry( + ERRORLOG::ERRL_SEV_PREDICTIVE, + NVDIMM_SECURE_ERASE_VERIFY_STATUS, + NVDIMM_ERASE_VERIFY_STATUS_NONZERO, + get_huid(l_nvdimm), + l_data, + ERRORLOG::ErrlEntry::NO_SW_CALLOUT ); + l_err->collectTrace(NVDIMM_COMP_NAME); + l_err->addPartCallout( l_nvdimm, + HWAS::NV_CONTROLLER_PART_TYPE, + HWAS::SRCI_PRIORITY_HIGH); + nvdimmAddVendorLog(l_nvdimm, l_err); + errlCommit( l_err, NVDIMM_COMP_ID ); + l_success = false; + continue; // Continue to next nvdimm + } + + + // Check the result registers + uint16_t l_result = 0; + l_err = nvdimmReadReg(l_nvdimm, ERASE_VERIFY_RESULT_MSB, l_data); + if (l_err) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK + "nvdimmSecureEraseVerifyStatus() HUID 0x%X" + "Failed to read ERASE_VERIFY_RESULT_MSB register", + get_huid(l_nvdimm)); + l_success = false; + errlCommit( l_err, NVDIMM_COMP_ID ); + continue; // Continue to next nvdimm + } + + // Save result + l_result = l_data << 8; + + l_err = nvdimmReadReg(l_nvdimm, ERASE_VERIFY_RESULT_LSB, l_data); + if (l_err) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK + "nvdimmSecureEraseVerifyStatus() HUID 0x%X" + "Failed to read ERASE_VERIFY_RESULT_LSB register", + get_huid(l_nvdimm)); + l_success = false; + errlCommit( l_err, NVDIMM_COMP_ID ); + continue; // Continue to next nvdimm + } + + // Save result + l_result |= l_data; + + // Non-zero result is an error + if (l_result) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmSecureEraseVerifyStatus() " + "HUID 0x%X ERASE_VERIFY_RESULT returned non-zero data", + get_huid(l_nvdimm)); + /*@ + *@errortype + *@reasoncode NVDIMM_ERASE_VERIFY_RESULT_NONZERO + *@severity ERRORLOG_SEV_PREDICTIVE + *@moduleid NVDIMM_SECURE_ERASE_VERIFY_STATUS + *@userdata1 NVDIMM HUID + *@userdata2 ERASE_VERIFY_RESULT + *@devdesc Error detected during secure erase verify + *@custdesc NVDIMM erase error + */ + l_err = new ERRORLOG::ErrlEntry( + ERRORLOG::ERRL_SEV_PREDICTIVE, + NVDIMM_SECURE_ERASE_VERIFY_STATUS, + NVDIMM_ERASE_VERIFY_RESULT_NONZERO, + get_huid(l_nvdimm), + l_result, + ERRORLOG::ErrlEntry::NO_SW_CALLOUT ); + l_err->collectTrace(NVDIMM_COMP_NAME); + l_err->addPartCallout( l_nvdimm, + HWAS::NV_CONTROLLER_PART_TYPE, + HWAS::SRCI_PRIORITY_HIGH); + nvdimmAddVendorLog(l_nvdimm, l_err); + errlCommit( l_err, NVDIMM_COMP_ID ); + l_success = false; + continue; // Continue to next nvdimm + } + + } + + return l_success; +} + + } // end NVDIMM namespace diff --git a/src/usr/isteps/nvdimm/nvdimm.H b/src/usr/isteps/nvdimm/nvdimm.H index 4d97a9c66..e66e42470 100644 --- a/src/usr/isteps/nvdimm/nvdimm.H +++ b/src/usr/isteps/nvdimm/nvdimm.H @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2014,2019 */ +/* Contributors Listed Below - COPYRIGHT 2014,2020 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -27,6 +27,7 @@ #define NVDIMM_H__ #include <usr/errl/errlentry.H> +#include <targeting/common/target.H> #include <targeting/common/commontargeting.H> #include <targeting/common/util.H> #include <targeting/common/utilFilter.H> @@ -40,6 +41,12 @@ extern trace_desc_t* g_trac_nvdimm; namespace NVDIMM { +#define NVDIMM_SET_USER_DATA_1(left_32_ops_id, right_32_huid) \ + TWO_UINT32_TO_UINT64(left_32_ops_id, right_32_huid) + +#define NVDIMM_SET_USER_DATA_2_TIMEOUT(left_32_polled, right_32_timeout) \ + NVDIMM_SET_USER_DATA_1(left_32_polled, right_32_timeout) + // I2C registers for page 0-3, extracted from JEDEC BAEBI spec // Refer to BAEBI spec for details @@ -121,11 +128,13 @@ enum i2cReg : uint16_t SET_ES_POLICY_STATUS = 0x070, FIRMWARE_OPS_STATUS = 0x071, OPERATIONAL_UNIT_OPS_STATUS = 0x072, - RESTORE_FAIL_INFO = 0x088, - OPERATIONAL_UNIT_FAIL_INFO = 0x08F, + ERASE_FAIL_INFO = 0x073, + ARM_FAIL_INFO = 0x076, CSAVE_INFO = 0x080, CSAVE_FAIL_INFO0 = 0x084, CSAVE_FAIL_INFO1 = 0x085, + RESTORE_FAIL_INFO = 0x088, + OPERATIONAL_UNIT_FAIL_INFO = 0x08F, NVM_LIFETIME_ERROR_THRESHOLD = 0x090, ES_LIFETIME_ERROR_THRESHOLD = 0x091, ES_TEMP_ERROR_HIGH_THRESHOLD0 = 0x094, @@ -274,6 +283,45 @@ enum i2cReg : uint16_t TYPED_BLOCK_DATA_BYTE30 = 0x39E, TYPED_BLOCK_DATA_BYTE31 = 0x39F, TYPED_BLOCK_DATA_OFFSET = 0x3E0, + PANIC_CNT = 0x406, + STATUS_EVENT_INT_INFO1 = 0x40A, + STATUS_EVENT_INT_INFO2 = 0x40B, + FLASH_BAD_BLK_PCT = 0x41D, // Read only; Percentage of flash blocks + // in the flash array marked as bad blocks + PARITY_ERROR_COUNT = 0x423, + FLASH_ERROR_COUNT0 = 0x428, // Read only; LSB[7:0] Flash error count + FLASH_ERROR_COUNT1 = 0x429, // Read only; [15:8] + FLASH_ERROR_COUNT2 = 0x42A, // Read only; MSB[23:16] + FLASH_BAD_BLOCK_COUNT0 = 0x42B, + FLASH_BAD_BLOCK_COUNT1 = 0x42C, + BPM_MAGIC_REG1 = 0x430, + BPM_MAGIC_REG2 = 0x431, + SCAP_STATUS = 0x432, + SCAP_REG = 0x434, + SCAP_DATA = 0x435, + I2C_REG_PROTECT = 0x43D, + BPM_REG_CMD = 0x440, + BPM_CMD_STATUS = 0x441, + BPM_PAYLOAD_LENGTH = 0x442, + BPM_REG_ERR_STATUS = 0x443, + BPM_REG_PAYLOAD_START = 0x444, + ERASE_VERIFY_CONTROL = 0x51A, + ERASE_VERIFY_STATUS = 0x51B, + ERASE_VERIFY_RESULT_LSB = 0x51C, + ERASE_VERIFY_RESULT_MSB = 0x51D, + ERASE_VERIFY_TEST = 0x51E, + ENCRYPTION_COMMAND = 0x51F, + ENCRYPTION_CONFIG_STATUS = 0x520, + ENCRYPTION_ACCESS_KEY_SET = 0x521, + ENCRYPTION_ACCESS_KEY_VERIFY = 0x522, + ENCRYPTION_ACCESS_KEY_UNLOCK = 0x523, + ENCRYPTION_RAMDOM_STRING_SET = 0x524, + ENCRYPTION_RANDOM_STRING_VERIFY = 0x525, + ENCRYPTION_ERASE_KEY_SET = 0x526, + ENCRYPTION_ERASE_KEY_VERIFY = 0x527, + ENCRYPTION_ERASE_KEY_TEST = 0x528, + ENCRYPTION_ERASE_KEY_TEST_VERIFY = 0x529, + ENCRYPTION_KEY_VALIDATION = 0x52A, }; // i2cReg macros @@ -292,6 +340,7 @@ enum page : uint8_t TWO = 0x02, THREE = 0x03, FOUR = 0x04, + FIVE = 0x05, }; // Enums for inputs/expected output to/from the i2c registers @@ -306,6 +355,7 @@ enum i2c_in_values : uint8_t RESET_CTRLR = 0x01, VALID_IMAGE = 0x01, RESET_CONTROLLER = 0x01, + FACTORY_DEFAULT = 0x01, }; enum i2c_out_values : uint8_t @@ -317,11 +367,20 @@ enum i2c_out_values : uint8_t CHARGE_IN_PROGRESS = 0x01, SAVE_SUCCESS = 0x01, RSTR_SUCCESS = 0X01, - ARM_SUCCESS = 0X09, + ARM_SUCCESS = 0X01, ERASE_SUCCESS = 0X01, ES_SUCCESS = 0x05, CHARGE_SUCCESS = 0x00, NV_READY = 0xA5, + FACTORY_RESET_IN_PROGRESS = 0x03, + NO_RESET_N = 0x20, + RESET_N_ARMED = 0x08, + ES_POLICY_ERROR = 0x02, + ARM_ERROR = 0X02, + RSTR_ERROR = 0x02, + SAVE_ERROR = 0x02, + ERASE_ERROR = 0x02, + CLEAR_ALL_STATUS = 0x3C, //Clears CAVE, RESTORE, ERASE, and ARM status regs }; // Timeout-related enum @@ -330,6 +389,7 @@ enum timeout : uint32_t OPS_POLL_TIME_MS = 5000, NV_READY_POLL_TIME_MS = 1000, PAGE_SWITCH_POLL_TIME_NS = 100, + KEY_WRITE_DELAY_MS = 100, }; // Assign an id to each of the 6 major ops @@ -354,6 +414,119 @@ enum misc }; /** + * @brief Encryption key data + */ +static constexpr size_t ENC_KEY_SIZE = 32; +struct nvdimmKeyData_t +{ + uint8_t rs[ENC_KEY_SIZE]; // Random String (RS) + uint8_t ek[ENC_KEY_SIZE]; // Erase Key (EK) + uint8_t ak[ENC_KEY_SIZE]; // Access Key (AK) +}; + +struct scap_status_bits +{ + uint8_t Reserved1 : 1; // Bit 7 + uint8_t Bpm_Bsl_Mode : 1; // Bit 6 + uint8_t Reserved2 : 1; // Bit 5 + uint8_t Present : 1; // Bit 4 + uint8_t Delay : 1; // Bit 3 + uint8_t Error : 1; // Bit 2 + uint8_t Busy : 1; // Bit 1 + uint8_t Enable : 1; // Bit 0 +} PACKED; + +/** + * @brief Union simplifying manipulation of SCAP_STATUS bits + */ +union scap_status_union +{ + uint8_t full; + scap_status_bits bit; + + /** + * @brief Constructor + */ + scap_status_union() + : full(0) + {} +} PACKED; + +typedef scap_status_union scap_status_register_t; + +// Bits in Health Status Check Registers +enum health_status : uint8_t +{ + // Module Health Status0 + VOLTAGE_REGULATOR_FAILED = 0x01, + VDD_LOST = 0x02, + VPP_LOST = 0x04, + VTT_LOST = 0x08, + DRAM_NOT_SELF_REFRESH = 0x10, + CONTROLLER_HARDWARE_ERROR = 0x20, + NVM_CONTROLLER_ERROR = 0x40, + NVM_LIFETIME_ERROR = 0x80, + // Module Health Status1 + NOT_ENOUGH_ENERGY_FOR_CSAVE = 0x01, + INVALID_FIRMWARE_ERROR = 0x02, + CONFIG_DATA_ERROR = 0x04, + NO_ES_PRESENT = 0x08, + ES_POLICY_NOT_SET = 0x10, + ES_HARDWARE_FAILURE = 0x20, + ES_HEALTH_ASSESSMENT_ERROR = 0x40, + // Error Threshold Status + ES_LIFETIME_ERROR = 0x02, + ES_TEMP_ERROR = 0x04, +}; + +// Int representation for health status function call +enum health_function : uint8_t +{ + HEALTH_SAVE = 0x01, + HEALTH_RESTORE = 0x02, + HEALTH_UPDATE = 0x03, + HEALTH_PRE_ARM = 0x04, + HEALTH_POST_ARM = 0x05, +}; + +// Event notification register values +enum event_n : uint8_t +{ + PERSISTENCY_NOTIFICATION = 0x01, + SET_EVENT_NOTIFICATION_ERROR = 0x02, + WARNING_THRESHOLD_NOTIFICATION = 0x02, + PERSISTENCY_ENABLED = 0x04, + WARNING_THRESHOLD_ENABLED = 0x08, + ENABLE_NOTIFICATIONS = 0x03, + NOTIFICATIONS_ENABLED = 0x0C, +}; + +// MBACALFIR register addresses +enum mbacal_addresses : uint32_t +{ + MBACALFIR_AND_MASK_REG = 0x07010904, + MBACALFIR_OR_MASK_REG = 0x07010905, + MBACALFIR_ACTION0_REG = 0x07010906, + MBACALFIR_ACTION1_REG = 0x07010907, +}; + +// MBACALFIR bit masks for event n +enum mbacal_bitmask_values : uint64_t +{ + MBACALFIR_EVENTN_AND_BIT = 0xff7fffffffffffff, + MBACALFIR_EVENTN_OR_BIT = 0x0080000000000000, + MBACALFIR_UNMASK_BIT = 0xff7fffffffffffff, +}; + + +/** + * @brief Mask MCBACALFIR Event N to prevent PRD from handling event + * + * @param[in] - i_nvdimm - nvdimm target for operation on its parent MCA + */ +void maskMbacalfir_eventn(TARGETING::Target* i_nvdimm); + +/** * @brief Wrapper to call deviceOp to read the NV controller via I2C * * @param[in] i_nvdimm - nvdimm target with NV controller @@ -434,6 +607,169 @@ errlHndl_t nvdimmPollStatus(TARGETING::Target *i_nvdimm, ops_id i_ops_id, uint32 * the error log. */ errlHndl_t nvdimmSetESPolicy(TARGETING::Target* i_nvdimm); + +/** + * @brief Helper function to handle conflicting attribute keys + * + * @param[in] i_attrKeysFw - firmware key attribute + * + * @param[in] i_attrKeysAnchor - anchor key attribute + * + * @return errlHndl_t - Null if successful, otherwise a pointer to + * the error log + */ +errlHndl_t nvdimm_handleConflictingKeys( + TARGETING::ATTR_NVDIMM_ENCRYPTION_KEYS_FW_typeStdArr& i_attrKeysFw, + TARGETING::ATTR_NVDIMM_ENCRYPTION_KEYS_ANCHOR_typeStdArr& i_attrKeysAnchor); + + +/** + * @brief Helper function to validate attribute keys + * + * @param[in] i_attrData - pointer to attribute key data + * + * @return errlHndl_t - Null if successful, otherwise a pointer to + * the error log + */ +errlHndl_t nvdimm_checkValidAttrKeys( nvdimmKeyData_t* i_attrData ); + + +/** + * @brief Helper function to write encryption key regs (RS/EK/AK) + * + * @param[in] i_nvdimm - nvdimm target + * + * @param[in] i_keyData - data to write to the key reg + * + * @param[in] i_keyReg - enum register to write key + * + * @param[in] i_verifyReg - enum register to verify key written + * + * @param[in] i_secondAttempt - normally false, true if verif check failed + * + * @return errlHndl_t - Null if successful, otherwise a pointer to + * the error log + */ +errlHndl_t nvdimm_setKeyReg(TARGETING::Target* i_nvdimm, + uint8_t* i_keyData, + uint32_t i_keyReg, + uint32_t i_verifyReg, + bool i_secondAttempt); + + +/** + * @brief Helper function to generate randon number for encryption keys + * Generates ENC_KEY_SIZE bytes of data + * Different implementations for boot vs runtime + * + * @param[out] o_genData - pointer to generated data + * + * @return errlHndl_t - Null if successful, otherwise a pointer to + * the error log + */ +errlHndl_t nvdimm_getRandom(uint8_t* o_genData); + + +/** + * @brief Helper function to make a random number valid for keys + * Keys must not contain 0x00 or 0xFF + * - 0x00 KEY_TERMINATE_BYTE terminates a key < 32 bytes + * - 0xFF KEY_ABORT_BYTE aborts the key reg write process + * This function finds invalid bytes in the first random number + * and replaces with bytes from the second random number + * + * @param[out] o_genData - pointer to final generated data + * + * @param[in] i_xtraData - pointer to extra generated data + * + * @return - false if successful, true if failed + * + */ +bool nvdimm_keyifyRandomNumber(uint8_t* o_genData, uint8_t* i_xtraData); + + +/** + * @brief Helper function to validate a random number + * + * @param[in] i_genData - pointer to generated data + * + * @return - true if valid, false if invalid + * + */ +bool nvdimm_validRandomNumber(uint8_t* i_genData); + + +/** + * @brief Helper function to set encryption error + * in ATTR_NVDIMM_ARMED + * + * @param[in] i_nvdimm - nvdimm target + * + */ +void nvdimmSetEncryptionError(TARGETING::Target *i_nvdimm); + + +/** + * @brief Helper function to reset the NVDIMM controller + * + * @param[in] i_nvdimm - nvdimm target + * + * @return errlHndl_t - Null if successful, otherwise a pointer to + * the error log + */ +errlHndl_t nvdimmResetController(TARGETING::Target *i_nvdimm); + + +/** + * @brief Helper function to factory reset NVDIMM + * + * @param[in] i_nvdimm - nvdimm target + * + * @return errlHndl_t - Null if successful, otherwise a pointer to + * the error log + */ +errlHndl_t nvdimm_factory_reset(TARGETING::Target *i_nvdimm); + + +#ifndef __HOSTBOOT_RUNTIME + +/** + * @brief Helper function to get TPM pointer for random number generation + * + * @param[out] - pointer to a functional TPM or nullptr if no TPM found + * + * @return errlHndl_t - Null if successful, otherwise a pointer to + * the error log + */ +errlHndl_t nvdimm_getTPM(TARGETING::Target*& o_tpm); + +#endif + +/** + * @brief This function checks for valid image on the given target + * + * @param[in] i_nvdimm - nvdimm target with NV controller + * + * @param[out] o_imgValid - return true if the target has a valid image + * + * @return errlHndl_t - Null if successful, otherwise a pointer to + * the error log. + */ +errlHndl_t nvdimmValidImage(TARGETING::Target *i_nvdimm, bool &o_imgValid); + + +/** + * @brief This function grabs the current slot NVDIMM code is running + * Slot 0 is the failure slot, Slot 1 is the updateable slot + * + * @param[in] i_nvdimm - nvdimm target with NV controller + * @param[out] o_slot - 0 or 1 + * + * @return errlHndl_t - Null if successful, otherwise a pointer to + * the error log. + */ +errlHndl_t nvdimmGetRunningSlot(TARGETING::Target *i_nvdimm, uint8_t & o_slot); + } //End NVDIMM namespace diff --git a/src/usr/isteps/nvdimm/nvdimm.mk b/src/usr/isteps/nvdimm/nvdimm.mk index 397b27814..d9418b414 100644 --- a/src/usr/isteps/nvdimm/nvdimm.mk +++ b/src/usr/isteps/nvdimm/nvdimm.mk @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2018 +# Contributors Listed Below - COPYRIGHT 2018,2019 # [+] International Business Machines Corp. # # @@ -47,11 +47,14 @@ EXTRAINCDIR += ${PROCEDURE_PATH}/hwp/ffdc/ OBJS += nvdimm.o OBJS += nvdimmdd.o OBJS += errlud_nvdimm.o +OBJS += nvdimmErrorLog.o ifneq (${HOSTBOOT_RUNTIME},1) # code update path for NVDIMMs (not at RUNTIME) OBJS += nvdimm_update.o +# code update path for BPMs (not at runtime) +OBJS += bpm_update.o endif diff --git a/src/usr/isteps/nvdimm/nvdimmErrorLog.C b/src/usr/isteps/nvdimm/nvdimmErrorLog.C new file mode 100644 index 000000000..9fbd27d14 --- /dev/null +++ b/src/usr/isteps/nvdimm/nvdimmErrorLog.C @@ -0,0 +1,1317 @@ +/* IBM_PROLOG_BEGIN_TAG */ +/* This is an automatically generated prolog. */ +/* */ +/* $Source: src/usr/isteps/nvdimm/nvdimmErrorLog.C $ */ +/* */ +/* OpenPOWER HostBoot Project */ +/* */ +/* Contributors Listed Below - COPYRIGHT 2014,2019 */ +/* [+] International Business Machines Corp. */ +/* */ +/* */ +/* Licensed under the Apache License, Version 2.0 (the "License"); */ +/* you may not use this file except in compliance with the License. */ +/* You may obtain a copy of the License at */ +/* */ +/* http://www.apache.org/licenses/LICENSE-2.0 */ +/* */ +/* Unless required by applicable law or agreed to in writing, software */ +/* distributed under the License is distributed on an "AS IS" BASIS, */ +/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */ +/* implied. See the License for the specific language governing */ +/* permissions and limitations under the License. */ +/* */ +/* IBM_PROLOG_END_TAG */ + +#include "nvdimm.H" +#include <trace/interface.H> +#include <errl/errlentry.H> +#include <errl/errlmanager.H> +#include <errl/errludtarget.H> +#include <targeting/common/commontargeting.H> +#include <targeting/common/util.H> +#include <targeting/common/utilFilter.H> +#include <fapi2.H> +#include <isteps/nvdimm/nvdimmreasoncodes.H> +#include <isteps/nvdimm/nvdimm.H> +#include "errlud_nvdimm.H" + +using namespace TARGETING; + +namespace NVDIMM +{ + +/** + * @brief Read and save various status registers needed for error log traces + * + * @param[in] i_nvdimm - nvdimm target + * + * @param[out] o_RegInfo - struct to hold register data + * + */ +void nvdimmTraceRegs(Target *i_nvdimm, nvdimm_reg_t& o_RegInfo) +{ + uint8_t l_data = 0x0; + errlHndl_t l_err = nullptr; + + // Read MODULE HEALTH register + l_err = nvdimmReadReg(i_nvdimm, MODULE_HEALTH, l_data); + if(l_err) + { + errlCommit( l_err, NVDIMM_COMP_ID ); + } + o_RegInfo.Module_Health = l_data; + + // Read MODULE HEALTH STATUS0 register + l_err = nvdimmReadReg(i_nvdimm, MODULE_HEALTH_STATUS0, l_data); + if(l_err) + { + errlCommit( l_err, NVDIMM_COMP_ID ); + } + o_RegInfo.Module_Health_Status0 = l_data; + + // Read MODULE HEALTH STATUS1 register + l_err = nvdimmReadReg(i_nvdimm, MODULE_HEALTH_STATUS1, l_data); + if(l_err) + { + errlCommit( l_err, NVDIMM_COMP_ID ); + } + o_RegInfo.Module_Health_Status1 = l_data; + + // Read CSAVE STATUS register + l_err = nvdimmReadReg(i_nvdimm, CSAVE_STATUS, l_data); + if(l_err) + { + errlCommit( l_err, NVDIMM_COMP_ID ); + } + o_RegInfo.CSave_Status = l_data; + + // Read CSAVE INFO register + l_err = nvdimmReadReg(i_nvdimm, CSAVE_INFO, l_data); + if(l_err) + { + errlCommit( l_err, NVDIMM_COMP_ID ); + } + o_RegInfo.CSave_Info = l_data; + + // Read CSAVE FAIL INFO0 register + l_err = nvdimmReadReg(i_nvdimm, CSAVE_FAIL_INFO0, l_data); + if(l_err) + { + errlCommit( l_err, NVDIMM_COMP_ID ); + } + o_RegInfo.CSave_Fail_Info0 = l_data; + + // Read CSAVE FAIL INFO1 register + l_err = nvdimmReadReg(i_nvdimm, CSAVE_FAIL_INFO1, l_data); + if(l_err) + { + errlCommit( l_err, NVDIMM_COMP_ID ); + } + o_RegInfo.CSave_Fail_Info1 = l_data; + + // Read CSAVE TIMEOUT0 register + l_err = nvdimmReadReg(i_nvdimm, CSAVE_TIMEOUT0, l_data); + if(l_err) + { + errlCommit( l_err, NVDIMM_COMP_ID ); + } + o_RegInfo.CSave_Timeout0 = l_data; + + // Read CSAVE TIMEOUT1 register + l_err = nvdimmReadReg(i_nvdimm, CSAVE_TIMEOUT1, l_data); + if(l_err) + { + errlCommit( l_err, NVDIMM_COMP_ID ); + } + o_RegInfo.CSave_Timeout1 = l_data; + + // Read ERROR THRESHOLD STATUS register + l_err = nvdimmReadReg(i_nvdimm, ERROR_THRESHOLD_STATUS, l_data); + if(l_err) + { + errlCommit( l_err, NVDIMM_COMP_ID ); + } + o_RegInfo.Error_Threshold_Status = l_data; + + // Read NVDIMM READY register + l_err = nvdimmReadReg(i_nvdimm, NVDIMM_READY, l_data); + if(l_err) + { + errlCommit( l_err, NVDIMM_COMP_ID ); + } + o_RegInfo.NVDimm_Ready = l_data; + + // Read NVDIMM CMD STATUS0 register + l_err = nvdimmReadReg(i_nvdimm, NVDIMM_CMD_STATUS0, l_data); + if(l_err) + { + errlCommit( l_err, NVDIMM_COMP_ID ); + } + o_RegInfo.NVDimm_CMD_Status0 = l_data; + + // Read ERASE STATUS register + l_err = nvdimmReadReg(i_nvdimm, ERASE_STATUS, l_data); + if(l_err) + { + errlCommit( l_err, NVDIMM_COMP_ID ); + } + o_RegInfo.Erase_Status = l_data; + + // Read ERASE FAIL INFO register + l_err = nvdimmReadReg(i_nvdimm, ERASE_FAIL_INFO, l_data); + if(l_err) + { + errlCommit( l_err, NVDIMM_COMP_ID ); + } + o_RegInfo.Erase_Fail_Info = l_data; + + // Read ERASE TIMEOUT0 register + l_err = nvdimmReadReg(i_nvdimm, ERASE_TIMEOUT0, l_data); + if(l_err) + { + errlCommit( l_err, NVDIMM_COMP_ID ); + } + o_RegInfo.Erase_Timeout0 = l_data; + + // Read ERASE TIMEOUT1 register + l_err = nvdimmReadReg(i_nvdimm, ERASE_TIMEOUT1, l_data); + if(l_err) + { + errlCommit( l_err, NVDIMM_COMP_ID ); + } + o_RegInfo.Erase_Timeout1 = l_data; + + // Read ABORT CMD TIMEOUT register + l_err = nvdimmReadReg(i_nvdimm, ABORT_CMD_TIMEOUT, l_data); + if(l_err) + { + errlCommit( l_err, NVDIMM_COMP_ID ); + } + o_RegInfo.Abort_CMD_Timeout = l_data; + + // Read SET ES POLICY STATUS register + l_err = nvdimmReadReg(i_nvdimm, SET_ES_POLICY_STATUS, l_data); + if(l_err) + { + errlCommit( l_err, NVDIMM_COMP_ID ); + } + o_RegInfo.Set_ES_Policy_Status = l_data; + + // Read RESTORE STATUS register + l_err = nvdimmReadReg(i_nvdimm, RESTORE_STATUS, l_data); + if(l_err) + { + errlCommit( l_err, NVDIMM_COMP_ID ); + } + o_RegInfo.Restore_Status = l_data; + + // Read RESTORE FAIL INFO register + l_err = nvdimmReadReg(i_nvdimm, RESTORE_FAIL_INFO, l_data); + if(l_err) + { + errlCommit( l_err, NVDIMM_COMP_ID ); + } + o_RegInfo.Restore_Fail_Info = l_data; + + // Read RESTORE TIMEOUT0 register + l_err = nvdimmReadReg(i_nvdimm, RESTORE_TIMEOUT0, l_data); + if(l_err) + { + errlCommit( l_err, NVDIMM_COMP_ID ); + } + o_RegInfo.Restore_Timeout0 = l_data; + + // Read RESTORE TIMEOUT1 register + l_err = nvdimmReadReg(i_nvdimm, RESTORE_TIMEOUT1, l_data); + if(l_err) + { + errlCommit( l_err, NVDIMM_COMP_ID ); + } + o_RegInfo.Restore_Timeout1 = l_data; + + // Read ARM STATUS register + l_err = nvdimmReadReg(i_nvdimm, ARM_STATUS, l_data); + if(l_err) + { + errlCommit( l_err, NVDIMM_COMP_ID ); + } + o_RegInfo.Arm_Status = l_data; + + // Read ARM FAIL INFO register + l_err = nvdimmReadReg(i_nvdimm, ARM_FAIL_INFO, l_data); + if(l_err) + { + errlCommit( l_err, NVDIMM_COMP_ID ); + } + o_RegInfo.Arm_Fail_Info = l_data; + + // Read ARM TIMEOUT0 register + l_err = nvdimmReadReg(i_nvdimm, ARM_TIMEOUT0, l_data); + if(l_err) + { + errlCommit( l_err, NVDIMM_COMP_ID ); + } + o_RegInfo.Arm_Timeout0 = l_data; + + // Read ARM TIMEOUT1 register + l_err = nvdimmReadReg(i_nvdimm, ARM_TIMEOUT1, l_data); + if(l_err) + { + errlCommit( l_err, NVDIMM_COMP_ID ); + } + o_RegInfo.Arm_Timeout1 = l_data; + + // Read SET EVENT NOTIFICATION STATUS register + l_err = nvdimmReadReg(i_nvdimm, SET_EVENT_NOTIFICATION_STATUS, l_data); + if(l_err) + { + errlCommit( l_err, NVDIMM_COMP_ID ); + } + o_RegInfo.Set_Event_Notification_Status = l_data; + + // Read NVDIMM Encryption Configuration and Status Register for Security Errors + l_err = nvdimmReadReg(i_nvdimm, ENCRYPTION_CONFIG_STATUS, l_data); + if(l_err) + { + errlCommit( l_err, NVDIMM_COMP_ID ); + } + o_RegInfo.Encryption_Config_Status = l_data; +} + +/** + * @brief Helper function for standard callout of an NVDIMM + * + * @param[in] i_nvdimm - nvdimm target + * + * @param[in] i_step - the nvdimm function calling the health check + * + * @param[out] o_err - error log handler to be modified + * + * @return bool - true to commit log and continue, false to return + * the error log to caller and exit. + */ +bool nvdimmCalloutDimm(Target *i_nvdimm, uint8_t i_step, errlHndl_t& o_err) +{ + bool l_continue = true; + uint8_t l_data; + errlHndl_t l_err = nullptr; + + // Check which callout check is necessary + switch(i_step) + { + // Post save errors always continue with callouts + case HEALTH_SAVE: + { + // Check to see if the nvdimm image is still valid + l_err = nvdimmValidImage(i_nvdimm, l_continue); + if(l_err) + { + errlCommit( l_err, NVDIMM_COMP_ID ); + } + + // Checkout image validity and set dimm status accordingly + if(l_continue) + { + // Set ATTR_NV_STATUS_FLAG to partially working as data may still persist + nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR_VAL_SR); + + // Callout dimm but do not deconfig or gard + o_err->addHwCallout( i_nvdimm, + HWAS::SRCI_PRIORITY_LOW, + HWAS::NO_DECONFIG, + HWAS::GARD_NULL); + } + else + { + // Callout, deconfig and gard the dimm + o_err->addHwCallout( i_nvdimm, + HWAS::SRCI_PRIORITY_HIGH, + HWAS::DECONFIG, + HWAS::GARD_Fatal); + } + + break; + } + + // Post restore errors always continue with callouts + case HEALTH_RESTORE: + { + // Check restore status + l_err = nvdimmReadReg(i_nvdimm, RESTORE_STATUS, l_data); + if (l_err) + { + errlCommit( l_err, NVDIMM_COMP_ID ); + } + else if ((l_data & RSTR_SUCCESS) != RSTR_SUCCESS) + { + l_continue = false; + } + + // Check restore status and set dimm status accordingly + if(l_continue) + { + // Set ATTR_NV_STATUS_FLAG to partially working as data may still persist + nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR_VAL_SR); + + // Callout dimm but do not deconfig or gard + o_err->addHwCallout( i_nvdimm, + HWAS::SRCI_PRIORITY_LOW, + HWAS::NO_DECONFIG, + HWAS::GARD_NULL); + } + else + { + // Callout, deconfig and gard the dimm + o_err->addHwCallout( i_nvdimm, + HWAS::SRCI_PRIORITY_HIGH, + HWAS::DECONFIG, + HWAS::GARD_Fatal); + } + + break; + } + + // Post ARM errors need check for arm success + case HEALTH_PRE_ARM: + { + + // Check arm status + l_err = nvdimmReadReg(i_nvdimm, ARM_STATUS, l_data); + if (l_err) + { + errlCommit( l_err, NVDIMM_COMP_ID ); + } + else if (((l_data & ARM_SUCCESS) != ARM_SUCCESS) || ((l_data & RESET_N_ARMED) != RESET_N_ARMED)) + { + l_continue = false; + } + + // Check arm status and set dimm status accordingly + if(l_continue) + { + // Set ATTR_NV_STATUS_FLAG to partially working as data may still persist + notifyNvdimmProtectionChange(i_nvdimm,NVDIMM_RISKY_HW_ERROR); + + // Callout dimm without deconfig or gard + o_err->addHwCallout( i_nvdimm, + HWAS::SRCI_PRIORITY_LOW, + HWAS::NO_DECONFIG, + HWAS::GARD_NULL); + } + else + { + // Set ATTR_NV_STATUS_FLAG to dimm diarmed + l_err = notifyNvdimmProtectionChange(i_nvdimm, NVDIMM_DISARMED); + if (l_err) + { + errlCommit( l_err, NVDIMM_COMP_ID ); + } + + // Callout and gard the dimm + o_err->addHwCallout( i_nvdimm, + HWAS::SRCI_PRIORITY_HIGH, + HWAS::NO_DECONFIG, + HWAS::GARD_Fatal); + } + + break; + } + + // Post ARM errors need check for arm success + case HEALTH_POST_ARM: + { + // Callout dimm but do not deconfig or gard + o_err->addHwCallout( i_nvdimm, + HWAS::SRCI_PRIORITY_LOW, + HWAS::NO_DECONFIG, + HWAS::GARD_NULL); + + // Set ATTR_NV_STATUS_FLAG to partially working as data may persist despite errors + notifyNvdimmProtectionChange(i_nvdimm,NVDIMM_RISKY_HW_ERROR); + + break; + } + + } + + return l_continue; +} + +/** + * @brief Helper function for BPM/Cable high, NVDIMM low callout + * + * @param[in] i_nvdimm - nvdimm target + * + * @param[in] i_step - the nvdimm function calling the health check + * + * @param[out] o_err - error log handler to be modified + * + * @return bool - true to commit log and continue, false to return + * the error log to caller and exit. + */ +bool nvdimmBPMCableCallout(Target *i_nvdimm, uint8_t i_step, errlHndl_t& o_err) +{ + bool l_continue = true; + uint8_t l_data; + errlHndl_t l_err = nullptr; + + // Check which callout check is necessary + switch(i_step) + { + // Post save errors always continue with callouts + case HEALTH_SAVE: + { + // Check to see if the nvdimm image is still valid + l_err = nvdimmValidImage(i_nvdimm, l_continue); + if(l_err) + { + errlCommit( l_err, NVDIMM_COMP_ID ); + } + + // Callout BPM and Cable but cannot deconfig or gard + o_err->addPartCallout( i_nvdimm, + HWAS::BPM_PART_TYPE, + HWAS::SRCI_PRIORITY_HIGH); + o_err->addPartCallout( i_nvdimm, + HWAS::BPM_CABLE_PART_TYPE, + HWAS::SRCI_PRIORITY_HIGH); + + // Check image validity and set dimm status accordingly + if(l_continue) + { + // Set ATTR_NV_STATUS_FLAG to partially working as data may still persist + nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR_VAL_SR); + + // Callout dimm but do not deconfig or gard + o_err->addHwCallout( i_nvdimm, + HWAS::SRCI_PRIORITY_LOW, + HWAS::NO_DECONFIG, + HWAS::GARD_NULL); + } + else + { + // Callout dimm, deconfig and gard + o_err->addHwCallout( i_nvdimm, + HWAS::SRCI_PRIORITY_LOW, + HWAS::DECONFIG, + HWAS::GARD_Fatal); + } + + break; + } + + // Post restore errors always continue with callouts + case HEALTH_RESTORE: + { + // Check restore status + l_err = nvdimmReadReg(i_nvdimm, RESTORE_STATUS, l_data); + if (l_err) + { + errlCommit( l_err, NVDIMM_COMP_ID ); + } + else if ((l_data & RSTR_SUCCESS) != RSTR_SUCCESS) + { + l_continue = false; + } + + // Callout dimm but do not deconfig or gard + o_err->addPartCallout( i_nvdimm, + HWAS::BPM_PART_TYPE, + HWAS::SRCI_PRIORITY_HIGH); + o_err->addPartCallout( i_nvdimm, + HWAS::BPM_CABLE_PART_TYPE, + HWAS::SRCI_PRIORITY_HIGH); + // Callout dimm but do not deconfig or gard + o_err->addHwCallout( i_nvdimm, + HWAS::SRCI_PRIORITY_LOW, + HWAS::NO_DECONFIG, + HWAS::GARD_NULL); + + // Check restore status and set dimm status accordingly + if(l_continue) + { + // Set ATTR_NV_STATUS_FLAG to partially working as data may still persist + nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR_VAL_SR); + } + + break; + } + + // Post ARM errors need check for arm success + case HEALTH_PRE_ARM: + { + // Check arm status + l_err = nvdimmReadReg(i_nvdimm, ARM_STATUS, l_data); + if (l_err) + { + errlCommit( l_err, NVDIMM_COMP_ID ); + } + else if (((l_data & ARM_SUCCESS) != ARM_SUCCESS) || ((l_data & RESET_N_ARMED) != RESET_N_ARMED)) + { + l_continue = false; + } + + // Callout BPM and Cable but cannot deconfig or gard + o_err->addPartCallout( i_nvdimm, + HWAS::BPM_PART_TYPE, + HWAS::SRCI_PRIORITY_HIGH); + o_err->addPartCallout( i_nvdimm, + HWAS::BPM_CABLE_PART_TYPE, + HWAS::SRCI_PRIORITY_HIGH); + + // Check arm status and set dimm status accordingly + if(l_continue) + { + // Set ATTR_NV_STATUS_FLAG to partially working as data may still persist + notifyNvdimmProtectionChange(i_nvdimm,NVDIMM_RISKY_HW_ERROR); + } + + // Callout dimm but do not deconfig or gard + o_err->addHwCallout( i_nvdimm, + HWAS::SRCI_PRIORITY_LOW, + HWAS::NO_DECONFIG, + HWAS::GARD_NULL); + break; + } + + // Post ARM errors need check for arm success + case HEALTH_POST_ARM: + { + // Callout dimm but do not deconfig or gard + o_err->addPartCallout( i_nvdimm, + HWAS::BPM_PART_TYPE, + HWAS::SRCI_PRIORITY_HIGH); + o_err->addPartCallout( i_nvdimm, + HWAS::BPM_CABLE_PART_TYPE, + HWAS::SRCI_PRIORITY_HIGH); + o_err->addHwCallout( i_nvdimm, + HWAS::SRCI_PRIORITY_LOW, + HWAS::NO_DECONFIG, + HWAS::GARD_NULL); + + // Set ATTR_NV_STATUS_FLAG to partially working as data may still persist + notifyNvdimmProtectionChange(i_nvdimm,NVDIMM_RISKY_HW_ERROR); + + break; + } + + } + + return l_continue; +} + +/** + * @brief Helper function for BPM high, NVDIMM low callout + * + * @param[in] i_nvdimm - nvdimm target + * + * @param[in] i_step - the nvdimm function calling the health check + * + * @param[out] o_err - error log handler to be modified + * + * @return bool - true to commit log and continue, false to return + * the error log to caller and exit. + */ +bool nvdimmBPMCallout(Target *i_nvdimm, uint8_t i_step, errlHndl_t& o_err) +{ + bool l_continue = true; + uint8_t l_data; + errlHndl_t l_err = nullptr; + + // Check which callout check is necessary + switch(i_step) + { + // Post save errors always continue with callouts + case HEALTH_SAVE: + { + // Callout BPM on high + o_err->addPartCallout( i_nvdimm, + HWAS::BPM_PART_TYPE, + HWAS::SRCI_PRIORITY_HIGH); + + // Callout dimm but do not deconfig or gard + o_err->addHwCallout( i_nvdimm, + HWAS::SRCI_PRIORITY_LOW, + HWAS::NO_DECONFIG, + HWAS::GARD_NULL); + + // Set ATTR_NV_STATUS_FLAG to partially working as data may still persist + nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR_VAL_SR); + + break; + } + + // Post restore errors always continue with callouts + case HEALTH_RESTORE: + { + // Callout BPM on high + o_err->addPartCallout( i_nvdimm, + HWAS::BPM_PART_TYPE, + HWAS::SRCI_PRIORITY_HIGH); + + // Callout dimm but do not deconfig or gard + o_err->addHwCallout( i_nvdimm, + HWAS::SRCI_PRIORITY_LOW, + HWAS::NO_DECONFIG, + HWAS::GARD_NULL); + + // Set ATTR_NV_STATUS_FLAG to partially working as data may still persist + nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR_VAL_SR); + + break; + } + + // Post ARM errors need check for arm success + case HEALTH_PRE_ARM: + { + // Check arm status + l_err = nvdimmReadReg(i_nvdimm, ARM_STATUS, l_data); + if (l_err) + { + errlCommit( l_err, NVDIMM_COMP_ID ); + } + else if (((l_data & ARM_SUCCESS) != ARM_SUCCESS) || ((l_data & RESET_N_ARMED) != RESET_N_ARMED)) + { + l_continue = false; + } + + // Callout BPM on high + o_err->addPartCallout( i_nvdimm, + HWAS::BPM_PART_TYPE, + HWAS::SRCI_PRIORITY_HIGH); + + // Callout dimm but do not deconfig or gard + o_err->addHwCallout( i_nvdimm, + HWAS::SRCI_PRIORITY_LOW, + HWAS::NO_DECONFIG, + HWAS::GARD_NULL); + + // Check arm status and set dimm status accordingly + if(l_continue) + { + // Set ATTR_NV_STATUS_FLAG to partially working as data may still persist + notifyNvdimmProtectionChange(i_nvdimm,NVDIMM_RISKY_HW_ERROR); + } + else + { + // Set ATTR_NV_STATUS_FLAG to dimm diarmed + l_err = notifyNvdimmProtectionChange(i_nvdimm, NVDIMM_DISARMED); + if (l_err) + { + errlCommit( l_err, NVDIMM_COMP_ID ); + } + } + + break; + } + + // Post ARM errors need check for arm success + case HEALTH_POST_ARM: + { + // Callout BPM on high + o_err->addPartCallout( i_nvdimm, + HWAS::BPM_PART_TYPE, + HWAS::SRCI_PRIORITY_HIGH); + + // Callout dimm but do not deconfig or gard + o_err->addHwCallout( i_nvdimm, + HWAS::SRCI_PRIORITY_LOW, + HWAS::NO_DECONFIG, + HWAS::GARD_NULL); + + // Set ATTR_NV_STATUS_FLAG to partially working as data may still persist + notifyNvdimmProtectionChange(i_nvdimm,NVDIMM_RISKY_HW_ERROR); + + break; + } + + } + + return l_continue; +} + +/** + * @brief Function checking the Health Status Registers for an nvdimm + * + * @param[in] i_nvdimm - nvdimm target + * + * @param[in] i_step - the nvdimm step calling the check + * + * @param[out] o_continue - bool to signal a return to caller fail + * + * @return errlHndl_t - Null if successful, otherwise a pointer to + * the error log. + */ +errlHndl_t nvdimmHealthStatusCheck(Target *i_nvdimm, uint8_t i_step, bool& o_continue) +{ + uint8_t l_data = 0x0; + errlHndl_t l_err = nullptr; + errlHndl_t l_err_t = nullptr; + nvdimm_reg_t l_RegInfo; + bool l_arm_timeout = false; + + if (i_step == HEALTH_PRE_ARM) + { + l_arm_timeout = o_continue; + } + + //Collect Register data for parsing and traces + nvdimmTraceRegs(i_nvdimm, l_RegInfo); + + // Read SET_EVENT_NOTIFICATION_STATUS register + l_err = nvdimmReadReg(i_nvdimm, SET_EVENT_NOTIFICATION_STATUS, l_data); + if(l_err) + { + errlCommit( l_err, NVDIMM_COMP_ID ); + } + l_RegInfo.Set_Event_Notification_Status = l_data; + + // Read RESTORE STATUS register + l_err = nvdimmReadReg(i_nvdimm, RESTORE_STATUS , l_data); + if(l_err) + { + errlCommit( l_err, NVDIMM_COMP_ID ); + } + l_RegInfo.Restore_Status = l_data; + + // Read RESTORE_FAIL_INFO register + l_err = nvdimmReadReg(i_nvdimm, RESTORE_FAIL_INFO , l_data); + if(l_err) + { + errlCommit( l_err, NVDIMM_COMP_ID ); + } + l_RegInfo.Restore_Fail_Info = l_data; + + // Read NVDIMM_CMD_STATUS0 register + l_err = nvdimmReadReg(i_nvdimm, NVDIMM_CMD_STATUS0 , l_data); + if(l_err) + { + errlCommit( l_err, NVDIMM_COMP_ID ); + } + l_RegInfo.NVDimm_CMD_Status0 = l_data; + + // Read ARM_STATUS register + l_err = nvdimmReadReg(i_nvdimm, ARM_STATUS , l_data); + if(l_err) + { + errlCommit( l_err, NVDIMM_COMP_ID ); + } + l_RegInfo.Arm_Status = l_data; + + // Read SET_ES_POLICY_STATUS register + l_err = nvdimmReadReg(i_nvdimm, SET_ES_POLICY_STATUS , l_data); + if(l_err) + { + errlCommit( l_err, NVDIMM_COMP_ID ); + } + l_RegInfo.Set_ES_Policy_Status = l_data; + + // Check all nvdimm deconfig cases + do + { + // Check MODULE_HEALTH_STATUS0[0] + if ((l_RegInfo.Module_Health_Status0 & VOLTAGE_REGULATOR_FAILED) == VOLTAGE_REGULATOR_FAILED) + { + /*@ + *@errortype + *@reasoncode NVDIMM_VOLTAGE_REGULATOR_FAILED + *@severity ERRORLOG_SEV_PREDICTIVE + *@moduleid NVDIMM_MODULE_HEALTH_STATUS_CHECK + *@userdata1[0:31] Target Huid + *@userdata2 <UNUSED> + *@devdesc NVDIMM failed module health status check due to + * voltage regulator failure + *@custdesc NVDIMM failed module health status check + */ + l_err = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_PREDICTIVE, + NVDIMM_MODULE_HEALTH_STATUS_CHECK, + NVDIMM_VOLTAGE_REGULATOR_FAILED, + TARGETING::get_huid(i_nvdimm), + 0x0, + ERRORLOG::ErrlEntry::NO_SW_CALLOUT ); + break; + } + + // Check MODULE_HEALTH_STATUS0[1] + if ((l_RegInfo.Module_Health_Status0 & VDD_LOST) == VDD_LOST) + { + /*@ + *@errortype + *@reasoncode NVDIMM_VDD_LOST + *@severity ERRORLOG_SEV_PREDICTIVE + *@moduleid NVDIMM_MODULE_HEALTH_STATUS_CHECK + *@userdata1[0:31] Target Huid + *@userdata2 <UNUSED> + *@devdesc NVDIMM failed module health status check due to + * vdd loss + *@custdesc NVDIMM failed module health status check + */ + l_err = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_PREDICTIVE, + NVDIMM_MODULE_HEALTH_STATUS_CHECK, + NVDIMM_VDD_LOST, + TARGETING::get_huid(i_nvdimm), + 0x0, + ERRORLOG::ErrlEntry::NO_SW_CALLOUT ); + break; + } + + // Check MODULE_HEALTH_STATUS0[2] + if ((l_RegInfo.Module_Health_Status0 & VPP_LOST) == VPP_LOST) + { + /*@ + *@errortype + *@reasoncode NVDIMM_VPP_LOST + *@severity ERRORLOG_SEV_PREDICTIVE + *@moduleid NVDIMM_MODULE_HEALTH_STATUS_CHECK + *@userdata1[0:31] Target Huid + *@userdata2 <UNUSED> + *@devdesc NVDIMM failed module health status check due to + * vpp loss + *@custdesc NVDIMM failed module health status check + */ + l_err = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_PREDICTIVE, + NVDIMM_MODULE_HEALTH_STATUS_CHECK, + NVDIMM_VPP_LOST, + TARGETING::get_huid(i_nvdimm), + 0x0, + ERRORLOG::ErrlEntry::NO_SW_CALLOUT ); + break; + } + + // Check MODULE_HEALTH_STATUS0[3] + if ((l_RegInfo.Module_Health_Status0 & VTT_LOST) == VTT_LOST) + { + /*@ + *@errortype + *@reasoncode NVDIMM_VTT_LOST + *@severity ERRORLOG_SEV_PREDICTIVE + *@moduleid NVDIMM_MODULE_HEALTH_STATUS_CHECK + *@userdata1[0:31] Target Huid + *@userdata2 <UNUSED> + *@devdesc NVDIMM failed module health status check due to + * vtt loss + *@custdesc NVDIMM failed module health status check + */ + l_err = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_PREDICTIVE, + NVDIMM_MODULE_HEALTH_STATUS_CHECK, + NVDIMM_VTT_LOST, + TARGETING::get_huid(i_nvdimm), + 0x0, + ERRORLOG::ErrlEntry::NO_SW_CALLOUT ); + break; + } + + // Check MODULE_HEALTH_STATUS0[4] + if ((l_RegInfo.Module_Health_Status0 & DRAM_NOT_SELF_REFRESH) == DRAM_NOT_SELF_REFRESH) + { + /*@ + *@errortype + *@reasoncode NVDIMM_DRAM_NOT_SELF_REFRESH + *@severity ERRORLOG_SEV_PREDICTIVE + *@moduleid NVDIMM_MODULE_HEALTH_STATUS_CHECK + *@userdata1[0:31] Target Huid + *@userdata2 <UNUSED> + *@devdesc NVDIMM failed module health status check due to + * no self refresh on the nvdimm + *@custdesc NVDIMM failed module health status check + */ + l_err = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_PREDICTIVE, + NVDIMM_MODULE_HEALTH_STATUS_CHECK, + NVDIMM_DRAM_NOT_SELF_REFRESH, + TARGETING::get_huid(i_nvdimm), + 0x0, + ERRORLOG::ErrlEntry::NO_SW_CALLOUT ); + break; + } + + // Check MODULE_HEALTH_STATUS0[5] + if ((l_RegInfo.Module_Health_Status0 & CONTROLLER_HARDWARE_ERROR) == CONTROLLER_HARDWARE_ERROR) + { + /*@ + *@errortype + *@reasoncode NVDIMM_CONTROLLER_HARDWARE_ERROR + *@severity ERRORLOG_SEV_PREDICTIVE + *@moduleid NVDIMM_MODULE_HEALTH_STATUS_CHECK + *@userdata1[0:31] Target Huid + *@userdata2 <UNUSED> + *@devdesc NVDIMM failed module health status check due to + * error with the hardware controller + *@custdesc NVDIMM failed module health status check + */ + l_err = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_PREDICTIVE, + NVDIMM_MODULE_HEALTH_STATUS_CHECK, + NVDIMM_CONTROLLER_HARDWARE_ERROR, + TARGETING::get_huid(i_nvdimm), + 0x0, + ERRORLOG::ErrlEntry::NO_SW_CALLOUT ); + break; + } + + // Check MODULE_HEALTH_STATUS0[6] + if ((l_RegInfo.Module_Health_Status0 & NVM_CONTROLLER_ERROR) == NVM_CONTROLLER_ERROR) + { + /*@ + *@errortype + *@reasoncode NVDIMM_NVM_CONTROLLER_ERROR + *@severity ERRORLOG_SEV_PREDICTIVE + *@moduleid NVDIMM_MODULE_HEALTH_STATUS_CHECK + *@userdata1[0:31] Target Huid + *@userdata2 <UNUSED> + *@devdesc NVDIMM failed module health status check due to + * error with the nvdimm controller + *@custdesc NVDIMM failed module health status check + */ + l_err = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_PREDICTIVE, + NVDIMM_MODULE_HEALTH_STATUS_CHECK, + NVDIMM_NVM_CONTROLLER_ERROR, + TARGETING::get_huid(i_nvdimm), + 0x0, + ERRORLOG::ErrlEntry::NO_SW_CALLOUT ); + break; + } + + + // Check MODULE_HEALTH_STATUS0[7] + if ((l_RegInfo.Module_Health_Status0 & NVM_LIFETIME_ERROR) == NVM_LIFETIME_ERROR) + { + /*@ + *@errortype + *@reasoncode NVDIMM_NVM_LIFETIME_ERROR + *@severity ERRORLOG_SEV_PREDICTIVE + *@moduleid NVDIMM_MODULE_HEALTH_STATUS_CHECK + *@userdata1[0:31] Target Huid + *@userdata2 <UNUSED> + *@devdesc NVDIMM failed module health status check due to + * an nvdimm lifetime error + *@custdesc NVDIMM failed module health status check + */ + l_err = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_PREDICTIVE, + NVDIMM_MODULE_HEALTH_STATUS_CHECK, + NVDIMM_NVM_LIFETIME_ERROR, + TARGETING::get_huid(i_nvdimm), + 0x0, + ERRORLOG::ErrlEntry::NO_SW_CALLOUT ); + break; + } + + // Check MODULE_HEALTH_STATUS1[1] + if ((l_RegInfo.Module_Health_Status1 & INVALID_FIRMWARE_ERROR) == INVALID_FIRMWARE_ERROR) + { + /*@ + *@errortype + *@reasoncode NVDIMM_INVALID_FIRMWARE_ERROR + *@severity ERRORLOG_SEV_PREDICTIVE + *@moduleid NVDIMM_MODULE_HEALTH_STATUS_CHECK + *@userdata1[0:31] Target Huid + *@userdata2 <UNUSED> + *@devdesc NVDIMM failed module health status check due to + * an invalid firmware image + *@custdesc NVDIMM failed module health status check + */ + l_err = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_PREDICTIVE, + NVDIMM_MODULE_HEALTH_STATUS_CHECK, + NVDIMM_INVALID_FIRMWARE_ERROR, + TARGETING::get_huid(i_nvdimm), + 0x0, + ERRORLOG::ErrlEntry::NO_SW_CALLOUT ); + break; + } + + // Check MODULE_HEALTH_STATUS1[2] + if ((l_RegInfo.Module_Health_Status1 & CONFIG_DATA_ERROR) == CONFIG_DATA_ERROR) + { + /*@ + *@errortype + *@reasoncode NVDIMM_CONFIG_DATA_ERROR + *@severity ERRORLOG_SEV_PREDICTIVE + *@moduleid NVDIMM_MODULE_HEALTH_STATUS_CHECK + *@userdata1[0:31] Target Huid + *@userdata2 <UNUSED> + *@devdesc NVDIMM failed module health status check due to + * invalid configuration data + *@custdesc NVDIMM failed module health status check + */ + l_err = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_PREDICTIVE, + NVDIMM_MODULE_HEALTH_STATUS_CHECK, + NVDIMM_CONFIG_DATA_ERROR, + TARGETING::get_huid(i_nvdimm), + 0x0, + ERRORLOG::ErrlEntry::NO_SW_CALLOUT ); + break; + } + + }while(0); + + if (l_err) + { + // Setup Trace + l_err->collectTrace( NVDIMM_COMP_NAME ); + + // Add reg traces to the error log + NVDIMM::UdNvdimmOPParms( l_RegInfo ).addToLog(l_err); + + // Callout nvdimm depending on istep call + o_continue &= nvdimmCalloutDimm(i_nvdimm, i_step, l_err); + + if(l_arm_timeout) + { + // Callout and gard the dimm + l_err->addHwCallout( i_nvdimm, + HWAS::SRCI_PRIORITY_LOW, + HWAS::NO_DECONFIG, + HWAS::GARD_Fatal); + } + } + + // Check all BPM and Cable high, nvdimm low cases + do + { + // If function calling is SAVE, ignore NOT_ENOUGH_ENERGY_FOR_CSAVE + if (i_step != HEALTH_SAVE) + { + // Check MODULE_HEALTH_STATUS1[0] + if ((l_RegInfo.Module_Health_Status1 & NOT_ENOUGH_ENERGY_FOR_CSAVE) == NOT_ENOUGH_ENERGY_FOR_CSAVE) + { + /*@ + *@errortype + *@reasoncode NVDIMM_NOT_ENOUGH_ENERGY_FOR_CSAVE + *@severity ERRORLOG_SEV_PREDICTIVE + *@moduleid NVDIMM_MODULE_HEALTH_STATUS_CHECK + *@userdata1[0:31] Target Huid + *@userdata2 <UNUSED> + *@devdesc NVDIMM failed module health status check due to + * insufficient energy for csave + *@custdesc NVDIMM failed module health status check + */ + l_err_t = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_PREDICTIVE, + NVDIMM_MODULE_HEALTH_STATUS_CHECK, + NVDIMM_NOT_ENOUGH_ENERGY_FOR_CSAVE, + TARGETING::get_huid(i_nvdimm), + 0x0, + ERRORLOG::ErrlEntry::NO_SW_CALLOUT ); + break; + } + } + + // Check MODULE_HEALTH_STATUS1[3] + if ((l_RegInfo.Module_Health_Status1 & NO_ES_PRESENT) == NO_ES_PRESENT) + { + /*@ + *@errortype + *@reasoncode NVDIMM_NO_ES_PRESENT + *@severity ERRORLOG_SEV_PREDICTIVE + *@moduleid NVDIMM_MODULE_HEALTH_STATUS_CHECK + *@userdata1[0:31] Target Huid + *@userdata2 <UNUSED> + *@devdesc NVDIMM failed module health status check due to + * no ES active + *@custdesc NVDIMM failed module health status check + */ + l_err_t = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_PREDICTIVE, + NVDIMM_MODULE_HEALTH_STATUS_CHECK, + NVDIMM_NO_ES_PRESENT, + TARGETING::get_huid(i_nvdimm), + 0x0, + ERRORLOG::ErrlEntry::NO_SW_CALLOUT ); + break; + } + + // Check MODULE_HEALTH_STATUS1[5] + if ((l_RegInfo.Module_Health_Status1 & ES_HARDWARE_FAILURE) == ES_HARDWARE_FAILURE) + { + /*@ + *@errortype + *@reasoncode NVDIMM_ES_HARDWARE_FAILURE + *@severity ERRORLOG_SEV_PREDICTIVE + *@moduleid NVDIMM_MODULE_HEALTH_STATUS_CHECK + *@userdata1[0:31] Target Huid + *@userdata2 <UNUSED> + *@devdesc NVDIMM failed module health status check due to + * ES hardware failure + *@custdesc NVDIMM failed module health status check + */ + l_err_t = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_PREDICTIVE, + NVDIMM_MODULE_HEALTH_STATUS_CHECK, + NVDIMM_ES_HARDWARE_FAILURE, + TARGETING::get_huid(i_nvdimm), + 0x0, + ERRORLOG::ErrlEntry::NO_SW_CALLOUT ); + break; + } + + // Check MODULE_HEALTH_STATUS1[6] + if ((l_RegInfo.Module_Health_Status1 & ES_HEALTH_ASSESSMENT_ERROR) == ES_HEALTH_ASSESSMENT_ERROR) + { + /*@ + *@errortype + *@reasoncode NVDIMM_ES_HEALTH_ASSESSMENT_ERROR + *@severity ERRORLOG_SEV_PREDICTIVE + *@moduleid NVDIMM_MODULE_HEALTH_STATUS_CHECK + *@userdata1[0:31] Target Huid + *@userdata2 <UNUSED> + *@devdesc NVDIMM failed module health status check due to + * ES error during health assessment + *@custdesc NVDIMM failed module health status check + */ + l_err_t = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_PREDICTIVE, + NVDIMM_MODULE_HEALTH_STATUS_CHECK, + NVDIMM_ES_HEALTH_ASSESSMENT_ERROR, + TARGETING::get_huid(i_nvdimm), + 0x0, + ERRORLOG::ErrlEntry::NO_SW_CALLOUT ); + break; + } + + }while(0); + + if (l_err_t) + { + // Setup Trace + l_err_t->collectTrace( NVDIMM_COMP_NAME ); + + // Add reg traces to the error log + NVDIMM::UdNvdimmOPParms( l_RegInfo ).addToLog(l_err_t); + + // Callout BPM, Cable, and nvdimm + o_continue &= nvdimmBPMCableCallout(i_nvdimm, i_step, l_err_t); + } + + // Check for multiple errors and commit old error + if ((l_err) && (l_err_t)) + { + errlCommit( l_err, NVDIMM_COMP_ID ); + } + + // If there was a new error, save off to l_err + if (l_err_t) + { + l_err = l_err_t; + l_err_t = nullptr; + } + + // Check all BPM high, nvdimm low cases + do + { + // Check ERROR_THRESHOLD_STATUS[1] + if ((l_RegInfo.Error_Threshold_Status & ES_LIFETIME_ERROR) == ES_LIFETIME_ERROR) + { + /*@ + *@errortype + *@reasoncode NVDIMM_ES_LIFETIME_ERROR + *@severity ERRORLOG_SEV_PREDICTIVE + *@moduleid NVDIMM_MODULE_HEALTH_STATUS_CHECK + *@userdata1[0:31] Target Huid + *@userdata2 <UNUSED> + *@devdesc NVDIMM failed module health status check due to + * ES lifetime error + *@custdesc NVDIMM failed module health status check + */ + l_err_t = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_PREDICTIVE, + NVDIMM_MODULE_HEALTH_STATUS_CHECK, + NVDIMM_ES_LIFETIME_ERROR, + TARGETING::get_huid(i_nvdimm), + 0x0, + ERRORLOG::ErrlEntry::NO_SW_CALLOUT ); + break; + } + + // Check ERROR_THRESHOLD_STATUS[2] + if ((l_RegInfo.Error_Threshold_Status & ES_TEMP_ERROR) == ES_TEMP_ERROR) + { + /*@ + *@errortype + *@reasoncode NVDIMM_ES_TEMP_ERROR + *@severity ERRORLOG_SEV_PREDICTIVE + *@moduleid NVDIMM_MODULE_HEALTH_STATUS_CHECK + *@userdata1[0:31] Target Huid + *@userdata2 <UNUSED> + *@devdesc NVDIMM failed module health status check due to + * ES temporary error + *@custdesc NVDIMM failed module health status check + */ + l_err_t = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_PREDICTIVE, + NVDIMM_MODULE_HEALTH_STATUS_CHECK, + NVDIMM_ES_TEMP_ERROR, + TARGETING::get_huid(i_nvdimm), + 0x0, + ERRORLOG::ErrlEntry::NO_SW_CALLOUT ); + break; + } + + }while(0); + + if (l_err_t) + { + // Setup Trace + l_err_t->collectTrace( NVDIMM_COMP_NAME ); + + // Add reg traces to the error log + NVDIMM::UdNvdimmOPParms( l_RegInfo ).addToLog(l_err_t); + + // Callout nvdimm + o_continue &= nvdimmBPMCallout(i_nvdimm, i_step, l_err_t); + } + + // Check for multiple errors and commit old error + if ((l_err) && (l_err_t)) + { + errlCommit( l_err, NVDIMM_COMP_ID ); + } + + // If there was a new error, save off to l_err + if (l_err_t) + { + l_err = l_err_t; + l_err_t = nullptr; + } + + // Check special pre arm case + if (i_step == HEALTH_PRE_ARM) + { + // Check ES_POLICY_NOT_SET[4] + if ((l_RegInfo.Set_ES_Policy_Status & ES_POLICY_NOT_SET) == ES_POLICY_NOT_SET) + { + /*@ + *@errortype + *@reasoncode NVDIMM_ES_POLICY_NOT_SET + *@severity ERRORLOG_SEV_PREDICTIVE + *@moduleid NVDIMM_MODULE_HEALTH_STATUS_CHECK + *@userdata1[0:31] Target Huid + *@userdata2 <UNUSED> + *@devdesc NVDIMM failed module health status check due to + * ES policy not being set during an arm + *@custdesc NVDIMM failed module health status check + */ + l_err_t = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_PREDICTIVE, + NVDIMM_MODULE_HEALTH_STATUS_CHECK, + NVDIMM_ES_POLICY_NOT_SET, + TARGETING::get_huid(i_nvdimm), + 0x0, + ERRORLOG::ErrlEntry::NO_SW_CALLOUT ); + o_continue = false; + // Callout dimm but no deconfig and gard + l_err_t->addHwCallout( i_nvdimm, + HWAS::SRCI_PRIORITY_LOW, + HWAS::NO_DECONFIG, + HWAS::GARD_NULL); + } + } + + // Check for multiple errors and commit old error + if ((l_err) && (l_err_t)) + { + errlCommit( l_err, NVDIMM_COMP_ID ); + } + + // If there was a new error, save off to l_err + if (l_err_t) + { + l_err = l_err_t; + l_err_t = nullptr; + } + + return l_err; +} + +} // end NVDIMM namespace diff --git a/src/usr/isteps/nvdimm/nvdimmErrorLog.H b/src/usr/isteps/nvdimm/nvdimmErrorLog.H new file mode 100644 index 000000000..dae8e2f2f --- /dev/null +++ b/src/usr/isteps/nvdimm/nvdimmErrorLog.H @@ -0,0 +1,108 @@ +/* IBM_PROLOG_BEGIN_TAG */ +/* This is an automatically generated prolog. */ +/* */ +/* $Source: src/usr/isteps/nvdimm/nvdimmErrorLog.H $ */ +/* */ +/* OpenPOWER HostBoot Project */ +/* */ +/* Contributors Listed Below - COPYRIGHT 2014,2019 */ +/* [+] International Business Machines Corp. */ +/* */ +/* */ +/* Licensed under the Apache License, Version 2.0 (the "License"); */ +/* you may not use this file except in compliance with the License. */ +/* You may obtain a copy of the License at */ +/* */ +/* http://www.apache.org/licenses/LICENSE-2.0 */ +/* */ +/* Unless required by applicable law or agreed to in writing, software */ +/* distributed under the License is distributed on an "AS IS" BASIS, */ +/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */ +/* implied. See the License for the specific language governing */ +/* permissions and limitations under the License. */ +/* */ +/* IBM_PROLOG_END_TAG */ + +#ifndef NVDIMM_ERROR_LOG_H__ +#define NVDIMM_ERROR_LOG_H__ + +#include <usr/errl/errlentry.H> +#include <targeting/common/commontargeting.H> +#include <targeting/common/util.H> +#include <targeting/common/utilFilter.H> +#include <i2c/eepromif.H> +#include <map> +#include "nvdimmdd.H" +#include "nvdimm.H" + +using namespace TARGETING; +using namespace EEPROM; + +// Trace definition +extern trace_desc_t* g_trac_nvdimm; + +namespace NVDIMM +{ + +/** + * @brief Function to read and save status registers for traces + * + * @param[in] i_nvdimm - nvdimm target with NV controller + * + * @param[out] o_RegInfo - the structure holding the register data + * + */ +void nvdimmTraceRegs(Target *i_nvdimm, nvdimm_reg_t& o_RegInfo); + +/** + * @brief Helper function for standard callout of an NVDIMM + * + * @param[in] i_nvdimm - nvdimm target + * + * @param[out] o_err - error log handler to be modified + * + * @return bool - true to commit log and continue, false to return + * the error log to caller and exit. + */ +bool nvdimmCalloutDimm(Target *i_nvdimm, uint8_t i_step, errlHndl_t& o_err); + +/** + * @brief Helper function for BPM/Cable high, NVDIMM low callout + * + * @param[in] i_nvdimm - nvdimm target + * + * @param[out] o_err - error log handler to be modified + * + * @return bool - true to commit log and continue, false to return + * the error log to caller and exit. + */ +bool nvdimmBPMCableCallout(Target *i_nvdimm, uint8_t i_step, errlHndl_t& o_err); + +/** + * @brief Helper function for BPM high, NVDIMM low callout + * + * @param[in] i_nvdimm - nvdimm target + * + * @param[out] o_err - error log handler to be modified + * + * @return bool - true to commit log and continue, false to return + * the error log to caller and exit. + */ +bool nvdimmBPMCallout(Target *i_nvdimm, uint8_t i_step, errlHndl_t& o_err); + +/** + * @brief Function checking the Health Status Registers for an nvdimm + * + * @param[in] i_nvdimm - nvdimm target + * + * @param[out] o_exit - bool to signify exit procedure + * + * @return errlHndl_t - Null if successful, otherwise a pointer to + * the error log. + */ +errlHndl_t nvdimmHealthStatusCheck(Target *i_nvdimm, uint8_t i_step, bool& o_continue); + +} //End NVDIMM namespace + + +#endif // NVDIMM_ERROR_LOG_H__ diff --git a/src/usr/isteps/nvdimm/nvdimm_update.C b/src/usr/isteps/nvdimm/nvdimm_update.C index 2e1f61c8c..6075a660f 100644 --- a/src/usr/isteps/nvdimm/nvdimm_update.C +++ b/src/usr/isteps/nvdimm/nvdimm_update.C @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2018,2019 */ +/* Contributors Listed Below - COPYRIGHT 2018,2020 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -26,6 +26,7 @@ #include "nvdimm.H" #include <isteps/nvdimm/nvdimm.H> #include <isteps/nvdimm/nvdimmreasoncodes.H> +#include "bpm_update.H" #include <initservice/istepdispatcherif.H> // sendProgressCode #include <util/utilmclmgr.H> // secure LID manager @@ -33,6 +34,7 @@ #include <devicefw/userif.H> #include <vpd/spdenums.H> #include <sys/time.h> +#include <vector> // Unique tracing for nvdimm update process const char NVDIMM_UPD[] = "NVDIMM_UPD"; @@ -41,7 +43,7 @@ TRAC_INIT(&g_trac_nvdimm_upd, NVDIMM_UPD, 2*KILOBYTE); // Easy macro replace for unit testing -// #define TRACUCOMP(args...) TRACFCOMP(args) +//#define TRACUCOMP(args...) TRACFCOMP(args) #define TRACUCOMP(args...) namespace NVDIMM @@ -144,8 +146,10 @@ typedef union { } nvdimm_cmd_status0_t; // A code update block is composed of this many bytes -const uint8_t BYTES_PER_BLOCK = 32; +constexpr uint8_t BYTES_PER_BLOCK = 32; +// Maximum allowed region write retries +constexpr uint8_t MAX_REGION_WRITE_RETRY_ATTEMPTS = 3; /////////////////////////////////////////////////////////////////////////////// // NVDIMM LID Image @@ -182,6 +186,7 @@ uint16_t NvdimmLidImage::getVersion() return o_version; } + const uint8_t * NvdimmLidImage::getHeaderAndSmartSignature(uint16_t & o_size) { o_size = 0; @@ -264,7 +269,10 @@ NvdimmInstalledImage::NvdimmInstalledImage(TARGETING::Target * i_nvDimm) : iv_dimm(i_nvDimm), iv_version(INVALID_VERSION), iv_manufacturer_id(INVALID_ID), iv_product_id(INVALID_ID), iv_timeout(INVALID_TIMEOUT), - iv_max_blocks_per_region(INVALID_REGION_BLOCK_SIZE) + iv_max_blocks_per_region(INVALID_REGION_BLOCK_SIZE), + iv_fw_update_mode_enabled(false), + iv_region_write_retries(0), + iv_blockSizeSupported(INVALID_BLOCK_SIZE) { // initialize to invalid values } @@ -350,12 +358,50 @@ errlHndl_t NvdimmInstalledImage::getVersion(uint16_t & o_version, return l_err; } +errlHndl_t NvdimmInstalledImage::getBlockWriteSizeSupported(uint64_t & o_blockSize) +{ + errlHndl_t l_err = nullptr; + + do { + if (iv_blockSizeSupported == INVALID_BLOCK_SIZE) + { + uint16_t version = INVALID_VERSION; + l_err = getVersion(version, 0); + if (l_err) + { + TRACFCOMP(g_trac_nvdimm_upd, ERR_MRK"getBlockWriteSizeSupported: " + "Failed to get version for 0x%.8X NVDIMM", + TARGETING::get_huid(iv_dimm)); + break; + } + + // The block write is more prone to random system interrupt + // which does something funny to the i2c bus. + // v3.A has the timeout increased to mitigate that + if (version >= 0x3A00) + { + // version supports 32-byte block size + iv_blockSizeSupported = 32; + } + else + { + // default to word size max write + iv_blockSizeSupported = sizeof(uint16_t); + } + TRACFCOMP( g_trac_nvdimm_upd, ERR_MRK"getBlockWriteSizeSupported: " + "block size %d supported for 0x%.8X NVDIMM (version 0x%04X)", + iv_blockSizeSupported, TARGETING::get_huid(iv_dimm), + version ); + } + } while (0); + o_blockSize = iv_blockSizeSupported; + return l_err; +} errlHndl_t NvdimmInstalledImage::updateImage(NvdimmLidImage * i_lidImage) { errlHndl_t l_err = nullptr; - // need to always disable this after it gets enabled - bool l_fw_update_mode_enabled = false; + do { INITSERVICE::sendProgressCode(); //////////////////////////////////////////////////////////////////////// @@ -381,7 +427,7 @@ errlHndl_t NvdimmInstalledImage::updateImage(NvdimmLidImage * i_lidImage) TRACFCOMP(g_trac_nvdimm_upd,ERR_MRK"updateImage: " "NV controller is busy (0x%08X) for NVDIMM 0x%.8X", l_status.whole, TARGETING::get_huid(iv_dimm)); - /* + /*@ *@errortype *@moduleid UPDATE_IMAGE *@reasoncode NVDIMM_OPERATION_IN_PROGRESS @@ -398,11 +444,14 @@ errlHndl_t NvdimmInstalledImage::updateImage(NvdimmLidImage * i_lidImage) l_status.whole, ERRORLOG::ErrlEntry::NO_SW_CALLOUT ); l_err->collectTrace( NVDIMM_COMP_NAME, 256 ); + nvdimmAddVendorLog(iv_dimm, l_err); l_err->addPartCallout( iv_dimm, HWAS::NV_CONTROLLER_PART_TYPE, HWAS::SRCI_PRIORITY_HIGH ); l_err->addProcedureCallout( HWAS::EPUB_PRC_HB_CODE, HWAS::SRCI_PRIORITY_LOW ); + nvdimmAddPage4Regs(iv_dimm,l_err); + nvdimmAddUpdateRegs(iv_dimm,l_err); break; } @@ -427,8 +476,6 @@ errlHndl_t NvdimmInstalledImage::updateImage(NvdimmLidImage * i_lidImage) TARGETING::get_huid(iv_dimm)); break; } - // Set this flag so we will disable the update mode on error - l_fw_update_mode_enabled = true; // 5. Clear the Firmware Operation status TRACUCOMP(g_trac_nvdimm_upd, "updateImage: step 5"); @@ -549,7 +596,7 @@ errlHndl_t NvdimmInstalledImage::updateImage(NvdimmLidImage * i_lidImage) "NVDIMM 0x%.8X: data checksums mismatch (calc host: 0x%X " "and nv: 0x%X) for first part (header + SMART signature)", TARGETING::get_huid(iv_dimm), hostCksm, nvCksm); - /* + /*@ *@errortype *@moduleid UPDATE_IMAGE *@reasoncode NVDIMM_CHECKSUM_ERROR @@ -571,6 +618,7 @@ errlHndl_t NvdimmInstalledImage::updateImage(NvdimmLidImage * i_lidImage) 0x0000), ERRORLOG::ErrlEntry::NO_SW_CALLOUT ); l_err->collectTrace( NVDIMM_COMP_NAME, 256 ); + nvdimmAddVendorLog(iv_dimm, l_err); // maybe some data was altered on the NV controller l_err->addPartCallout( iv_dimm, @@ -579,6 +627,8 @@ errlHndl_t NvdimmInstalledImage::updateImage(NvdimmLidImage * i_lidImage) // possible code issue l_err->addProcedureCallout( HWAS::EPUB_PRC_HB_CODE, HWAS::SRCI_PRIORITY_LOW ); + nvdimmAddPage4Regs(iv_dimm,l_err); + nvdimmAddUpdateRegs(iv_dimm,l_err); break; } @@ -641,7 +691,6 @@ errlHndl_t NvdimmInstalledImage::updateImage(NvdimmLidImage * i_lidImage) // 12. Disable firmware update mode TRACUCOMP(g_trac_nvdimm_upd, "updateImage: step 12"); - l_fw_update_mode_enabled = false; // don't retry the disable on error l_err = changeFwUpdateMode(FW_UPDATE_MODE_DISABLED); if (l_err) { @@ -668,7 +717,7 @@ errlHndl_t NvdimmInstalledImage::updateImage(NvdimmLidImage * i_lidImage) // Reset controller to activate new firmware TRACUCOMP(g_trac_nvdimm_upd, "updateImage: resetController"); - l_err = resetController(); + l_err = nvdimmResetController(iv_dimm); if (l_err) { TRACFCOMP(g_trac_nvdimm_upd, ERR_MRK "updateImage: " @@ -701,7 +750,7 @@ errlHndl_t NvdimmInstalledImage::updateImage(NvdimmLidImage * i_lidImage) } while (0); // If update operation is aborted, we need to disable update mode - if (l_fw_update_mode_enabled) + if (iv_fw_update_mode_enabled) { TRACFCOMP(g_trac_nvdimm_upd, "updateImage: update was aborted, so disable FW_UPDATE_MODE"); errlHndl_t l_err2 = changeFwUpdateMode(FW_UPDATE_MODE_DISABLED); @@ -765,7 +814,7 @@ errlHndl_t NvdimmInstalledImage::updateImageData(NvdimmLidImage * i_lidImage) } if (fw_img_total_regions == 0) { - /* + /*@ *@errortype *@moduleid UPDATE_IMAGE_DATA *@reasoncode NVDIMM_ZERO_TOTAL_REGIONS @@ -787,6 +836,8 @@ errlHndl_t NvdimmInstalledImage::updateImageData(NvdimmLidImage * i_lidImage) 0x00000000), ERRORLOG::ErrlEntry::ADD_SW_CALLOUT ); l_err->collectTrace( NVDIMM_COMP_NAME, 256 ); + nvdimmAddPage4Regs(iv_dimm,l_err); + nvdimmAddUpdateRegs(iv_dimm,l_err); break; } @@ -812,11 +863,15 @@ errlHndl_t NvdimmInstalledImage::updateImageData(NvdimmLidImage * i_lidImage) break; } + uint8_t l_region_write_retries = 0; // local region write retry count uint16_t region = 0; while (region < fw_img_total_regions) { - if (region % 10 == 0) + if (region % 100 == 0) { + TRACFCOMP(g_trac_nvdimm_upd, + "updateImage: progress code for sending region %d", + region); INITSERVICE::sendProgressCode(); } TRACUCOMP(g_trac_nvdimm_upd, "updateImage: step 10.a - region 0x%04X", @@ -914,15 +969,17 @@ errlHndl_t NvdimmInstalledImage::updateImageData(NvdimmLidImage * i_lidImage) if (hostCksm != nvCksm) { TRACFCOMP(g_trac_nvdimm_upd, ERR_MRK"updateImageData: " - "Region %d of NVDIMM 0x%.8X: data checksums mismatch " + "Region %d out of %d on NVDIMM 0x%.8X: data checksums mismatch " "(calc host: 0x%X and nv: 0x%X)", - region, TARGETING::get_huid(iv_dimm), hostCksm, nvCksm); + region, fw_img_total_regions, + TARGETING::get_huid(iv_dimm), hostCksm, nvCksm); - /* + /*@ *@errortype *@moduleid UPDATE_IMAGE_DATA *@reasoncode NVDIMM_CHECKSUM_ERROR - *@userdata1 NVDIMM Target Huid + *@userdata1[0:31] NVDIMM Target Huid + *@userdata1[32:63] Retry count for this region *@userdata2[0:15] Host checksum calculated *@userdata2[16:31] NV checksum returned *@userdata2[32:47] size of data for checksum @@ -934,18 +991,44 @@ errlHndl_t NvdimmInstalledImage::updateImageData(NvdimmLidImage * i_lidImage) ERRORLOG::ERRL_SEV_PREDICTIVE, UPDATE_IMAGE_DATA, NVDIMM_CHECKSUM_ERROR, - TARGETING::get_huid(iv_dimm), + TWO_UINT32_TO_UINT64( + TARGETING::get_huid(iv_dimm), + l_region_write_retries), FOUR_UINT16_TO_UINT64( hostCksm, nvCksm, region, data_len), ERRORLOG::ErrlEntry::NO_SW_CALLOUT ); - l_err->collectTrace( NVDIMM_COMP_NAME, 256 ); + nvdimmAddVendorLog(iv_dimm, l_err); l_err->addPartCallout( iv_dimm, HWAS::NV_CONTROLLER_PART_TYPE, HWAS::SRCI_PRIORITY_HIGH ); l_err->addProcedureCallout( HWAS::EPUB_PRC_HB_CODE, HWAS::SRCI_PRIORITY_LOW ); + nvdimmAddPage4Regs(iv_dimm,l_err); + nvdimmAddUpdateRegs(iv_dimm,l_err); + + // Under the total retry attempts per region? + if (l_region_write_retries < MAX_REGION_WRITE_RETRY_ATTEMPTS) + { + TRACFCOMP(g_trac_nvdimm_upd, ERR_MRK"updateImageData: " + "Region %d on NVDIMM 0x%.8X failed, retry %d", + region, TARGETING::get_huid(iv_dimm),l_region_write_retries); + l_err->collectTrace(NVDIMM_UPD, 512); + + // Change PREDICTIVE to INFORMATIONAL as this might be recoverable + l_err->setSev(ERRORLOG::ERRL_SEV_INFORMATIONAL); + + // Commit this log and retry region write + ERRORLOG::errlCommit(l_err, NVDIMM_COMP_ID); + l_err = nullptr; + + // Update total for this region + l_region_write_retries++; + // update total retries for entire NVDIMM + iv_region_write_retries++; + continue; + } break; } @@ -989,7 +1072,7 @@ errlHndl_t NvdimmInstalledImage::changeFwUpdateMode(fw_update_mode i_mode) ((i_mode == FW_UPDATE_MODE_DISABLED) && (opStatus.fw_ops_update_mode == 0))) ) { - /* + /*@ *@errortype *@moduleid CHANGE_FW_UPDATE_MODE *@reasoncode NVDIMM_UPDATE_MODE_UNCHANGED @@ -1009,11 +1092,25 @@ errlHndl_t NvdimmInstalledImage::changeFwUpdateMode(fw_update_mode i_mode) 0x00, 0x00), ERRORLOG::ErrlEntry::NO_SW_CALLOUT ); l_err->collectTrace( NVDIMM_COMP_NAME, 256 ); + nvdimmAddVendorLog(iv_dimm, l_err); l_err->addPartCallout( iv_dimm, HWAS::NV_CONTROLLER_PART_TYPE, HWAS::SRCI_PRIORITY_HIGH ); l_err->addProcedureCallout( HWAS::EPUB_PRC_HB_CODE, HWAS::SRCI_PRIORITY_LOW ); + nvdimmAddPage4Regs(iv_dimm,l_err); + nvdimmAddUpdateRegs(iv_dimm,l_err); + } + else + { + if (opStatus.fw_ops_update_mode == 1) + { + iv_fw_update_mode_enabled = true; + } + else + { + iv_fw_update_mode_enabled = false; + } } } } @@ -1025,8 +1122,9 @@ errlHndl_t NvdimmInstalledImage::waitFwOpsBlockReceived() { errlHndl_t l_err = nullptr; - // retry for a total of 100ms - uint32_t timeout_ms_val = 100; + // retry for a total of 500ms + const uint32_t MAX_WAIT_FOR_OPS_BLOCK_RECEIVED = 500; + uint32_t timeout_ms_val = MAX_WAIT_FOR_OPS_BLOCK_RECEIVED; bool blockReceived = false; fw_ops_status_t opStatus; @@ -1042,6 +1140,7 @@ errlHndl_t NvdimmInstalledImage::waitFwOpsBlockReceived() TARGETING::get_huid(iv_dimm), timeout_ms_val); break; } + if (!opStatus.fw_ops_block_received) { // wait 1 millisecond between checking status @@ -1066,7 +1165,13 @@ errlHndl_t NvdimmInstalledImage::waitFwOpsBlockReceived() if (!blockReceived && !l_err) { - /* + TRACFCOMP(g_trac_nvdimm_upd, ERR_MRK"waitFwOpsBlockReceived: " + "NVDIMM 0x%.8X FIRMWARE_OPS_STATUS (timeout: %d ms) " + "-- Last status: 0x%02X", + TARGETING::get_huid(iv_dimm), MAX_WAIT_FOR_OPS_BLOCK_RECEIVED, + opStatus.whole); + + /*@ *@errortype *@moduleid WAIT_FW_OPS_BLOCK_RECEIVED *@reasoncode NVDIMM_BLOCK_NOT_RECEIVED @@ -1086,16 +1191,19 @@ errlHndl_t NvdimmInstalledImage::waitFwOpsBlockReceived() ( TWO_UINT8_TO_UINT16( 0x00, opStatus.whole), - 100, + MAX_WAIT_FOR_OPS_BLOCK_RECEIVED, timeout_ms_val ), ERRORLOG::ErrlEntry::NO_SW_CALLOUT ); l_err->collectTrace(NVDIMM_COMP_NAME, 512 ); + nvdimmAddVendorLog(iv_dimm, l_err); l_err->addPartCallout( iv_dimm, HWAS::NV_CONTROLLER_PART_TYPE, HWAS::SRCI_PRIORITY_HIGH ); l_err->addProcedureCallout( HWAS::EPUB_PRC_HB_CODE, HWAS::SRCI_PRIORITY_LOW ); + nvdimmAddPage4Regs(iv_dimm,l_err); + nvdimmAddUpdateRegs(iv_dimm,l_err); } return l_err; @@ -1145,7 +1253,7 @@ errlHndl_t NvdimmInstalledImage::waitFwOpsComplete() if (!opsComplete && !l_err) { - /* + /*@ *@errortype *@moduleid WAIT_FW_OPS_COMPLETE *@reasoncode NVDIMM_FW_OPS_IN_PROGRESS_TIMEOUT @@ -1169,11 +1277,14 @@ errlHndl_t NvdimmInstalledImage::waitFwOpsComplete() ), ERRORLOG::ErrlEntry::NO_SW_CALLOUT ); l_err->collectTrace(NVDIMM_COMP_NAME, 256 ); + nvdimmAddVendorLog(iv_dimm, l_err); l_err->addPartCallout( iv_dimm, HWAS::NV_CONTROLLER_PART_TYPE, HWAS::SRCI_PRIORITY_HIGH ); l_err->addProcedureCallout( HWAS::EPUB_PRC_HB_CODE, HWAS::SRCI_PRIORITY_LOW ); + nvdimmAddPage4Regs(iv_dimm,l_err); + nvdimmAddUpdateRegs(iv_dimm,l_err); } } return l_err; @@ -1234,6 +1345,103 @@ errlHndl_t NvdimmInstalledImage::clearFwOpsStatus() "NVDIMM 0x%.8X clear FIRMWARE_OPS_STATUS register failed", TARGETING::get_huid(iv_dimm)); } + else + { + // Verify expected bits cleared + + // Setup expected cleared status byte + fw_ops_status_t l_cleared_ops_status; + l_cleared_ops_status.whole = 0x00; + if (iv_fw_update_mode_enabled) + { + // set BIT 2 -- this should not be cleared by the command + l_cleared_ops_status.fw_ops_update_mode = 1; + } + + // Set some timeout so this doesn't cause endless loop + uint16_t timeout_val = INVALID_TIMEOUT; + l_err = getFwOpsTimeout(timeout_val); + // Note: potential error will just exit the while loop and be returned + + // convert seconds to ms value + // double the timeout to ensure enough time has elapsed for the clear + // note: doubling here instead of just doubling timeout_val since that + // variable is only a bit16 vs bit32 + uint32_t timeout_ms_val = timeout_val * 1000 * 2; + + fw_ops_status_t l_ops_status; + + while (!l_err) + { + l_err = nvdimmReadReg(iv_dimm, FIRMWARE_OPS_STATUS, l_ops_status.whole); + if (l_err) + { + TRACFCOMP(g_trac_nvdimm_upd, ERR_MRK"clearFwOpsStatus: " + "NVDIMM 0x%.8X read FIRMWARE_OPS_STATUS register failed " + " (0x%02X)", + TARGETING::get_huid(iv_dimm), l_ops_status.whole); + break; + } + + // Exit if expected cleared status is found + if (l_ops_status.whole == l_cleared_ops_status.whole) + { + break; + } + + // wait 1 millisecond between checking status + if (timeout_ms_val > 0) + { + timeout_ms_val -= 1; + nanosleep(0, NS_PER_MSEC); + } + else + { + // timeout hit + TRACFCOMP(g_trac_nvdimm_upd, ERR_MRK"clearFwOpsStatus: " + "NVDIMM 0x%.8X FIRMWARE_OPS_STATUS register reads 0x%02X " + "instead of cleared value of 0x%02X after %lld seconds", + TARGETING::get_huid(iv_dimm), l_ops_status.whole, + l_cleared_ops_status.whole, timeout_val*2); + + /*@ + *@errortype + *@moduleid CLEAR_FW_OPS_STATUS + *@reasoncode NVDIMM_CLEAR_FW_OPS_STATUS_TIMEOUT + *@userdata1 NVDIMM Target Huid + *@userdata2[0:7] Last FIRMWARE_OPS_STATUS read + *@userdata2[8:15] Expected cleared status + *@userdata2[16:31] Reserved + *@userdata2[32:63] Timeout (seconds) + *@devdesc FIRMWARE_OPS_STATUS not cleared + *@custdesc NVDIMM not updated + */ + l_err = new ERRORLOG::ErrlEntry( + ERRORLOG::ERRL_SEV_PREDICTIVE, + CLEAR_FW_OPS_STATUS, + NVDIMM_CLEAR_FW_OPS_STATUS_TIMEOUT, + TARGETING::get_huid(iv_dimm), + TWO_UINT16_ONE_UINT32_TO_UINT64 + ( + TWO_UINT8_TO_UINT16( + l_ops_status.whole, + l_cleared_ops_status.whole), + 0x0000, + timeout_val * 2 + ), + ERRORLOG::ErrlEntry::NO_SW_CALLOUT ); + l_err->collectTrace(NVDIMM_COMP_NAME, 256); + l_err->addPartCallout( iv_dimm, + HWAS::NV_CONTROLLER_PART_TYPE, + HWAS::SRCI_PRIORITY_HIGH ); + l_err->addProcedureCallout( HWAS::EPUB_PRC_HB_CODE, + HWAS::SRCI_PRIORITY_LOW ); + + break; + } + } // end of while (!l_err) loop + } // end of Verify expected bits cleared + return l_err; } @@ -1317,7 +1525,7 @@ errlHndl_t NvdimmInstalledImage::byteRegionBlockTransfer(const uint8_t * i_data, } if (blocks_per_region > max_blocks_per_region) { - /* + /*@ *@errortype *@moduleid BYTE_REGION_BLOCK_TRANSFER *@reasoncode NVDIMM_DATA_SIZE_TOO_LARGE @@ -1352,7 +1560,7 @@ errlHndl_t NvdimmInstalledImage::byteRegionBlockTransfer(const uint8_t * i_data, if (i_data_size > (BYTES_PER_BLOCK*blocks_per_region)) { - /* + /*@ *@errortype *@moduleid BYTE_REGION_BLOCK_TRANSFER *@reasoncode NVDIMM_DATA_SIZE_INVALID @@ -1421,15 +1629,29 @@ errlHndl_t NvdimmInstalledImage::byteRegionBlockTransfer(const uint8_t * i_data, TRACFCOMP(g_trac_nvdimm_upd, ERR_MRK"byteRegionBlockTransfer: " "Unable to open page for BLOCK %d transfer of NVDIMM " "0x%.8X", blockNum, TARGETING::get_huid(iv_dimm)); + break; } size_t l_numBytes = BYTES_PER_BLOCK; uint8_t l_reg_addr = ADDRESS(TYPED_BLOCK_DATA_BYTE0); + + // Grab whether word or 32-byte block write is supported + uint64_t blockSizeSupported = INVALID_BLOCK_SIZE; + l_err = getBlockWriteSizeSupported(blockSizeSupported); + if (l_err) + { + TRACFCOMP(g_trac_nvdimm_upd, ERR_MRK"byteRegionBlockTransfer: " + "Unable to grab maximum block write size for NVDIMM 0x%.8X", + TARGETING::get_huid(iv_dimm)); + break; + } + l_err = DeviceFW::deviceOp( DeviceFW::WRITE, iv_dimm, pCurrentBlockData, l_numBytes, - DEVICE_NVDIMM_ADDRESS(l_reg_addr) ); + DEVICE_NVDIMM_RAW_ADDRESS_WITH_BLOCKSIZE(l_reg_addr, blockSizeSupported) + ); if (l_err) { TRACFCOMP(g_trac_nvdimm_upd, ERR_MRK"byteRegionBlockTransfer: " @@ -1437,8 +1659,6 @@ errlHndl_t NvdimmInstalledImage::byteRegionBlockTransfer(const uint8_t * i_data, blockNum, l_reg_addr, TARGETING::get_huid(iv_dimm)); break; } - // increment to next block - pCurrentBlockData += BYTES_PER_BLOCK; // After a block has been transferred, verify that the 32-byte block // was received by polling FIRMWARE_OPS_STATUS offset for @@ -1449,10 +1669,39 @@ errlHndl_t NvdimmInstalledImage::byteRegionBlockTransfer(const uint8_t * i_data, TRACFCOMP(g_trac_nvdimm_upd, ERR_MRK"byteRegionBlockTransfer: " "Block %d read of FIRMWARE_OPS_STATUS failed on NVDIMM " " 0x%.8X", blockNum, TARGETING::get_huid(iv_dimm)); + + size_t tmpNumBytes = l_numBytes; + uint8_t tmpBuffer[tmpNumBytes]; + errlHndl_t l_err2 = DeviceFW::deviceOp( DeviceFW::READ, + iv_dimm, + tmpBuffer, + tmpNumBytes, + DEVICE_NVDIMM_ADDRESS(l_reg_addr) ); + if (l_err2) + { + TRACFCOMP(g_trac_nvdimm_upd, ERR_MRK"byteRegionBlockTransfer: " + "Block %d read from 0x%02X failed on NVDIMM 0x%.8X", + blockNum, l_reg_addr, TARGETING::get_huid(iv_dimm)); + l_err2->plid(l_err->plid()); + l_err2->collectTrace(NVDIMM_COMP_NAME); + l_err2->collectTrace(NVDIMM_UPD); + errlCommit(l_err2, NVDIMM_COMP_ID); + break; + } + else + { + TRACFBIN(g_trac_nvdimm_upd, "byteRegionBlockTransfer: Wrote block", pCurrentBlockData, l_numBytes); + TRACFBIN(g_trac_nvdimm_upd, "byteRegionBlockTransfer: Read-back block", tmpBuffer, l_numBytes); + } + break; } + // block of data successfully sent to NV controller TRACUCOMP(g_trac_nvdimm_upd,"byteRegionBlockTransfer: block 0x%02X successfully sent to NV controller", blockNum); + + // increment to next block + pCurrentBlockData += BYTES_PER_BLOCK; blockNum++; } @@ -1516,7 +1765,7 @@ errlHndl_t NvdimmInstalledImage::validateFwHeader() l_err = isFwOpsSuccess(opsSuccessful); if (!l_err && !opsSuccessful) { - /* + /*@ *@errortype *@moduleid VALIDATE_FW_HEADER *@reasoncode NVDIMM_FW_OPS_NOT_SUCCESSFUL @@ -1533,11 +1782,14 @@ errlHndl_t NvdimmInstalledImage::validateFwHeader() opsCmd.whole, ERRORLOG::ErrlEntry::NO_SW_CALLOUT ); l_err->collectTrace(NVDIMM_COMP_NAME, 256 ); + nvdimmAddVendorLog(iv_dimm, l_err); l_err->addPartCallout( iv_dimm, HWAS::NV_CONTROLLER_PART_TYPE, HWAS::SRCI_PRIORITY_HIGH ); l_err->addProcedureCallout( HWAS::EPUB_PRC_HB_CODE, HWAS::SRCI_PRIORITY_LOW ); + nvdimmAddPage4Regs(iv_dimm,l_err); + nvdimmAddUpdateRegs(iv_dimm,l_err); } } } @@ -1565,7 +1817,7 @@ errlHndl_t NvdimmInstalledImage::commitFwRegion() l_err = isFwOpsSuccess(opsSuccessful); if (!l_err && !opsSuccessful) { - /* + /*@ *@errortype *@moduleid COMMIT_FW_REGION *@reasoncode NVDIMM_FW_OPS_NOT_SUCCESSFUL @@ -1582,11 +1834,14 @@ errlHndl_t NvdimmInstalledImage::commitFwRegion() opsCmd.whole, ERRORLOG::ErrlEntry::NO_SW_CALLOUT ); l_err->collectTrace(NVDIMM_COMP_NAME, 256 ); + nvdimmAddVendorLog(iv_dimm, l_err); l_err->addPartCallout( iv_dimm, HWAS::NV_CONTROLLER_PART_TYPE, HWAS::SRCI_PRIORITY_HIGH ); l_err->addProcedureCallout( HWAS::EPUB_PRC_HB_CODE, HWAS::SRCI_PRIORITY_LOW ); + nvdimmAddPage4Regs(iv_dimm,l_err); + nvdimmAddUpdateRegs(iv_dimm,l_err); } } } @@ -1615,7 +1870,7 @@ errlHndl_t NvdimmInstalledImage::clearFwDataBlock() l_err = isFwOpsSuccess(ops_success); if (!l_err && !ops_success) { - /* + /*@ *@errortype *@moduleid CLEAR_FW_DATA_BLOCK *@reasoncode NVDIMM_FW_OPS_NOT_SUCCESSFUL @@ -1632,11 +1887,14 @@ errlHndl_t NvdimmInstalledImage::clearFwDataBlock() opsCmd.whole, ERRORLOG::ErrlEntry::NO_SW_CALLOUT ); l_err->collectTrace(NVDIMM_COMP_NAME, 256 ); + nvdimmAddVendorLog(iv_dimm, l_err); l_err->addPartCallout( iv_dimm, HWAS::NV_CONTROLLER_PART_TYPE, HWAS::SRCI_PRIORITY_HIGH ); l_err->addProcedureCallout( HWAS::EPUB_PRC_HB_CODE, HWAS::SRCI_PRIORITY_LOW ); + nvdimmAddPage4Regs(iv_dimm,l_err); + nvdimmAddUpdateRegs(iv_dimm,l_err); } } } @@ -1664,7 +1922,7 @@ errlHndl_t NvdimmInstalledImage::validateFwImage() // create an error if operation not successful if (!l_err && !opsSuccessful) { - /* + /*@ *@errortype *@moduleid VALIDATE_FW_IMAGE *@reasoncode NVDIMM_FW_OPS_NOT_SUCCESSFUL @@ -1681,12 +1939,14 @@ errlHndl_t NvdimmInstalledImage::validateFwImage() opsCmd.whole, ERRORLOG::ErrlEntry::NO_SW_CALLOUT ); l_err->collectTrace(NVDIMM_COMP_NAME, 256 ); + nvdimmAddVendorLog(iv_dimm, l_err); l_err->addPartCallout( iv_dimm, HWAS::NV_CONTROLLER_PART_TYPE, HWAS::SRCI_PRIORITY_HIGH ); l_err->addProcedureCallout( HWAS::EPUB_PRC_HB_CODE, HWAS::SRCI_PRIORITY_LOW ); - + nvdimmAddPage4Regs(iv_dimm,l_err); + nvdimmAddUpdateRegs(iv_dimm,l_err); } } } @@ -1694,41 +1954,6 @@ errlHndl_t NvdimmInstalledImage::validateFwImage() return l_err; } -errlHndl_t NvdimmInstalledImage::resetController() -{ - errlHndl_t l_err = nullptr; - - // If bit 0 is set, the module shall start a Reset Controller operation - l_err = nvdimmWriteReg(iv_dimm, NVDIMM_MGT_CMD0, 0x01); - if (l_err) - { - TRACFCOMP(g_trac_nvdimm_upd,ERR_MRK"resetController: NVDIMM 0x%.8X " - "write of 0x01 to NVDIMM_MGT_CMD0 register failed", - TARGETING::get_huid(iv_dimm)); - } - else - { - TRACUCOMP(g_trac_nvdimm_upd,"resetController: waiting 5 seconds after controller 0x%.8X reset", - TARGETING::get_huid(iv_dimm)); - - // sleep 5 seconds to allow for i2c controller to come back online - nanosleep(5,0); - - TRACUCOMP(g_trac_nvdimm_upd,"resetController: now check if NV controller is ready again", - TARGETING::get_huid(iv_dimm)); - - // Now wait until NV controller is ready again after reset - l_err = nvdimmReady(iv_dimm); - if (l_err) - { - TRACFCOMP(g_trac_nvdimm_upd,ERR_MRK"resetController: NV controller for " - "NVDIMM 0x%.8X is not reporting as ready after reset", - TARGETING::get_huid(iv_dimm)); - } - } - return l_err; -} - uint16_t NvdimmInstalledImage::crc16(const uint8_t * i_data, int i_data_size) { // From JEDEC JESD245B.01 document @@ -1769,6 +1994,9 @@ bool NvdimmsUpdate::runUpdateUsingLid(NvdimmLidImage * i_lidImage, errlHndl_t l_err = nullptr; for (auto pInstalledImage : i_list) { + TARGETING::Target * l_nvdimm = pInstalledImage->getNvdimmTarget(); + uint64_t l_nvdimm_huid = TARGETING::get_huid(l_nvdimm); + INITSERVICE::sendProgressCode(); bool updateNeeded = false; l_err = isUpdateNeeded(updateNeeded, i_lidImage, pInstalledImage); @@ -1785,14 +2013,69 @@ bool NvdimmsUpdate::runUpdateUsingLid(NvdimmLidImage * i_lidImage, } else if (updateNeeded) { + // shared trace variables + uint32_t l_installed_type = INVALID_TYPE; + l_err = pInstalledImage->getType(l_installed_type); + if (l_err) + { + // Continue updating other dimms + TRACFCOMP(g_trac_nvdimm_upd, + ERR_MRK"NvdimmsUpdate::runUpdateUsingLid() - " + "Unable to get nvdimm[0x%.8X] installed image type. " + "RC=0x%X, PLID=0x%.8X", l_nvdimm_huid, + ERRL_GETRC_SAFE(l_err), ERRL_GETPLID_SAFE(l_err)); + commitPredictiveNvdimmError(l_err); + l_err = nullptr; + continue; + } + + uint16_t l_oldVersion = INVALID_VERSION; + l_err = pInstalledImage->getVersion(l_oldVersion); + if (l_err) + { + // This shouldn't happen as getVersion should return a + // cached version + TRACFCOMP(g_trac_nvdimm_upd, + ERR_MRK"NvdimmsUpdate::runUpdateUsingLid() - " + "Failed to find current NVDIMM level of %.8X. " + "RC=0x%X, PLID=0x%.8X", l_nvdimm_huid, + ERRL_GETRC_SAFE(l_err), ERRL_GETPLID_SAFE(l_err)); + commitPredictiveNvdimmError(l_err); + l_err = nullptr; + o_no_error_found = false; + continue; + } + // perform update for this DIMM with the current LID image TRACFCOMP(g_trac_nvdimm_upd, "NvdimmsUpdate::runUpdateUsingLid() - " - "now update nvdimm[0x%.8X]", - TARGETING::get_huid(pInstalledImage->getNvdimmTarget())); + "now update nvdimm[0x%.8X]", l_nvdimm_huid); TRACFCOMP(g_trac_nvdimm_upd,"Updating with flash size: 0x%08X", i_lidImage->getFlashImageSize()); + /*@ + *@errortype INFORMATIONAL + *@reasoncode NVDIMM_START_UPDATE + *@moduleid NVDIMM_RUN_UPDATE_USING_LID + *@userdata1 NVDIMM Target Huid + *@userdata2[0:15] Old level (current) + *@userdata2[16:31] Update image level (new) + *@userdata2[32:63] Installed type (manufacturer and product) + *@devdesc Start of the NVDIMM update of this controller + *@custdesc NVDIMM update started + */ + l_err = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_INFORMATIONAL, + NVDIMM_RUN_UPDATE_USING_LID, + NVDIMM_START_UPDATE, + l_nvdimm_huid, + TWO_UINT16_ONE_UINT32_TO_UINT64( + l_oldVersion, i_lidImage->getVersion(), + l_installed_type), + ERRORLOG::ErrlEntry::ADD_SW_CALLOUT); + l_err->collectTrace(NVDIMM_UPD, 256); + ERRORLOG::errlCommit(l_err, NVDIMM_COMP_ID); + l_err = nullptr; + l_err = pInstalledImage->updateImage(i_lidImage); if (l_err) { @@ -1803,14 +2086,150 @@ bool NvdimmsUpdate::runUpdateUsingLid(NvdimmLidImage * i_lidImage, TRACFCOMP(g_trac_nvdimm_upd, ERR_MRK"NvdimmsUpdate::runUpdateUsingLid() - " "NVDIMM 0x%.8X NV controller update failed. " - "RC=0x%X, PLID=0x%.8X", - TARGETING::get_huid(pInstalledImage->getNvdimmTarget()), + "RC=0x%X, PLID=0x%.8X", l_nvdimm_huid, ERRL_GETRC_SAFE(l_err), ERRL_GETPLID_SAFE(l_err)); commitPredictiveNvdimmError(l_err); l_err = nullptr; o_no_error_found = false; + continue; + } + else + { + // successfully updated this NVDIMM + + // Note: call for version should just return a saved value + uint16_t curVersion = INVALID_VERSION; + l_err = pInstalledImage->getVersion(curVersion); + if (l_err) + { + TRACFCOMP(g_trac_nvdimm_upd, + ERR_MRK"NvdimmsUpdate::runUpdateUsingLid() - " + "Failed to find current NVDIMM level of %.8X after " + "successful update. RC=0x%X, PLID=0x%.8X", + l_nvdimm_huid, + ERRL_GETRC_SAFE(l_err), ERRL_GETPLID_SAFE(l_err)); + commitPredictiveNvdimmError(l_err); + l_err = nullptr; + } + + /*@ + *@errortype INFORMATIONAL + *@reasoncode NVDIMM_UPDATE_COMPLETE + *@moduleid NVDIMM_RUN_UPDATE_USING_LID + *@userdata1[0:31] NVDIMM Target Huid + *@userdata1[32:63] Total region write retries + *@userdata2[0:15] Previous level + *@userdata2[16:31] Current updated level + *@userdata2[32:63] Installed type (manufacturer and product) + *@devdesc Successful update of NVDIMM code + *@custdesc NVDIMM was successfully updated + */ + l_err = new ERRORLOG::ErrlEntry( + ERRORLOG::ERRL_SEV_INFORMATIONAL, + NVDIMM_RUN_UPDATE_USING_LID, + NVDIMM_UPDATE_COMPLETE, + TWO_UINT32_TO_UINT64( + l_nvdimm_huid, + pInstalledImage->getRegionWriteRetries()), + TWO_UINT16_ONE_UINT32_TO_UINT64( + l_oldVersion, curVersion, + l_installed_type), + ERRORLOG::ErrlEntry::ADD_SW_CALLOUT ); + l_err->collectTrace(NVDIMM_UPD, 512); + ERRORLOG::errlCommit(l_err, NVDIMM_COMP_ID); } } // end of updateNeeded + + ///////////////////////////////////////////////////////////////// + // Should not exit the nvdimm update stage until each nvdimm + // is running at the lid's code level + // (or a predictive error was logged for that nvdimm) + ///////////////////////////////////////////////////////////////// + + // Check NVDIMM is at the latest level and it is running from slot 1 + uint16_t l_curVersion = INVALID_VERSION; + l_err = pInstalledImage->getVersion(l_curVersion, true); + if (l_err) + { + TRACFCOMP(g_trac_nvdimm_upd, + ERR_MRK"NvdimmsUpdate::runUpdateUsingLid() - " + "Failed to find current level of NVDIMM %.8X. " + "RC=0x%X, PLID=0x%.8X", l_nvdimm_huid, + ERRL_GETRC_SAFE(l_err), ERRL_GETPLID_SAFE(l_err)); + commitPredictiveNvdimmError(l_err); + l_err = nullptr; + o_no_error_found = false; + continue; + } + uint8_t l_slot_running = 0; + l_err = nvdimmGetRunningSlot(l_nvdimm, l_slot_running); + if (l_err) + { + TRACFCOMP(g_trac_nvdimm_upd, + ERR_MRK"NvdimmsUpdate::runUpdateUsingLid() - " + "Failed to find running slot of NVDIMM %.8X. " + "RC=0x%X, PLID=0x%.8X", l_nvdimm_huid, + ERRL_GETRC_SAFE(l_err), ERRL_GETPLID_SAFE(l_err)); + commitPredictiveNvdimmError(l_err); + l_err = nullptr; + o_no_error_found = false; + continue; + } + + if ((l_slot_running == 0) || (l_curVersion != i_lidImage->getVersion())) + { + // Not running latest code on this NVDIMM + TRACFCOMP(g_trac_nvdimm_upd, + ERR_MRK"NvdimmsUpdate::runUpdateUsingLid() - " + "NVDIMM %.8X running from slot %d with code level " + "0x%04X (lid level: 0x%04X)", + l_nvdimm_huid, l_slot_running, l_curVersion, + i_lidImage->getVersion()); + /*@ + *@errortype + *@reasoncode NVDIMM_NOT_RUNNING_LATEST_LEVEL + *@severity ERRORLOG_SEV_PREDICTIVE + *@moduleid NVDIMM_RUN_UPDATE_USING_LID + *@userdata1 NVDIMM Target Huid + *@userdata2[0:15] NVDIMM slot + *@userdata2[16:31] slot1 version + *@userdata2[32:47] latest version from lid + *@devdesc Encountered error after update while checking + * if NVDIMM is running latest code level + *@custdesc NVDIMM not running latest firmware level + */ + l_err = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_PREDICTIVE, + NVDIMM_RUN_UPDATE_USING_LID, + NVDIMM_NOT_RUNNING_LATEST_LEVEL, + l_nvdimm_huid, + FOUR_UINT16_TO_UINT64( + l_slot_running, + l_curVersion, + i_lidImage->getVersion(), + 0x0000), + ERRORLOG::ErrlEntry::NO_SW_CALLOUT ); + + l_err->collectTrace( NVDIMM_COMP_NAME ); + + // Add callout of nvdimm with no deconfig/gard + l_err->addHwCallout( l_nvdimm, + HWAS::SRCI_PRIORITY_LOW, + HWAS::NO_DECONFIG, + HWAS::GARD_NULL); + + // Maybe vendor log will tell why it isn't running latest code level + nvdimmAddVendorLog(l_nvdimm, l_err); + commitPredictiveNvdimmError(l_err); + l_err = nullptr; + o_no_error_found = false; + } + else + { + TRACFCOMP(g_trac_nvdimm_upd, + "NvdimmsUpdate::runUpdateUsingLid() - " + "NVDIMM %.8X running from slot %d with latest level 0x%04X", + l_nvdimm_huid, l_slot_running, l_curVersion); + } } return o_no_error_found; } @@ -1826,12 +2245,15 @@ bool NvdimmsUpdate::runUpdate(void) // List of each installed NVDIMM type std::vector<NvdimmInstalledImage*> v_NVDIMM_16GB_list; std::vector<NvdimmInstalledImage*> v_NVDIMM_32GB_list; + BPM::bpmList_t NVDIMM_BPM_16GB_list; + BPM::bpmList_t NVDIMM_BPM_32GB_list; // Build up installed NVDIMM image lists for (auto l_nvdimm : iv_nvdimmList) { NvdimmInstalledImage * l_installed_image = new NvdimmInstalledImage(l_nvdimm); + l_err = l_installed_image->getType(l_installed_type); if (l_err) { @@ -1843,6 +2265,10 @@ bool NvdimmsUpdate::runUpdate(void) ERRL_GETPLID_SAFE(l_err)); commitPredictiveNvdimmError(l_err); o_no_error_found = false; + + // Delete the unused NvdimmInstalledImage pointer + delete l_installed_image; + continue; } @@ -1852,6 +2278,10 @@ bool NvdimmsUpdate::runUpdate(void) "0x%.8X NVDIMM is SMART_NVDIMM_16GB_TYPE", get_huid(l_nvdimm)); v_NVDIMM_16GB_list.push_back(l_installed_image); + + BPM::Bpm l_16gbBpm(l_nvdimm); + NVDIMM_BPM_16GB_list.push_back(l_16gbBpm); + } else if (l_installed_type == SMART_NVDIMM_32GB_TYPE) { @@ -1859,6 +2289,9 @@ bool NvdimmsUpdate::runUpdate(void) "0x%.8X NVDIMM is SMART_NVDIMM_32GB_TYPE", get_huid(l_nvdimm)); v_NVDIMM_32GB_list.push_back(l_installed_image); + + BPM::Bpm l_32gbBpm(l_nvdimm); + NVDIMM_BPM_32GB_list.push_back(l_32gbBpm); } else { @@ -1866,7 +2299,7 @@ bool NvdimmsUpdate::runUpdate(void) TRACFCOMP(g_trac_nvdimm_upd, "NvdimmsUpdate::runUpdate() - unknown " "nvdimm[%X] installed type 0x%04X, skipping update", TARGETING::get_huid(l_nvdimm), l_installed_type); - /* + /*@ *@errortype *@reasoncode NVDIMM_UNSUPPORTED_NVDIMM_TYPE *@moduleid NVDIMM_RUN_UPDATE @@ -1889,36 +2322,34 @@ bool NvdimmsUpdate::runUpdate(void) ERRORLOG::ErrlEntry::NO_SW_CALLOUT ); l_err->collectTrace(NVDIMM_COMP_NAME, 256 ); l_err->collectTrace(NVDIMM_UPD, 256); + nvdimmAddVendorLog(l_nvdimm, l_err); l_err->addPartCallout( l_nvdimm, HWAS::NV_CONTROLLER_PART_TYPE, HWAS::SRCI_PRIORITY_HIGH ); l_err->addProcedureCallout( HWAS::EPUB_PRC_HB_CODE, HWAS::SRCI_PRIORITY_LOW ); + nvdimmAddPage4Regs(l_nvdimm,l_err); + nvdimmAddUpdateRegs(l_nvdimm,l_err); ERRORLOG::errlCommit(l_err, NVDIMM_COMP_ID); + + // Delete the unused NvdimmInstalledImage object + delete l_installed_image; + continue; } } do { - // First check that updatable NVDIMMs exist on the system - if ((v_NVDIMM_16GB_list.size() == 0) && - (v_NVDIMM_32GB_list.size() == 0)) - { - TRACFCOMP(g_trac_nvdimm_upd, "NvdimmsUpdate::runUpdate() - " - "No updatable NVDIMMs present on the system"); - break; - } - - ///////////////////////// - // @todo: remove this check when SMART provides updated 32GB image - // The current 32GB image will cause the future updating to fail - if (v_NVDIMM_16GB_list.size() == 0) + // First check that updatable NVDIMMs or BPMs exist on the system + if ( (v_NVDIMM_16GB_list.size() == 0) + && (v_NVDIMM_32GB_list.size() == 0) + && (NVDIMM_BPM_16GB_list.size() == 0) + && (NVDIMM_BPM_32GB_list.size() == 0)) { TRACFCOMP(g_trac_nvdimm_upd, "NvdimmsUpdate::runUpdate() - " - "Only 16GB NVDIMM type is supported right now for update"); + "No updatable NVDIMMs or BPMs present on the system"); break; } - ///////////////////////// if (INITSERVICE::spBaseServicesEnabled()) { @@ -1935,7 +2366,15 @@ bool NvdimmsUpdate::runUpdate(void) break; } - for(const auto& lid : info.lidIds) + // Both the config and firmware images are needed to perform an + // update on a BPM. So, get pointers to each in the CompInfo + // struct's vector of LID IDs. + MCL::LidInfo * bpm_16gb_fw = nullptr; + MCL::LidInfo * bpm_16gb_config = nullptr; + MCL::LidInfo * bpm_32gb_fw = nullptr; + MCL::LidInfo * bpm_32gb_config = nullptr; + + for(auto& lid : info.lidIds) { TRACFCOMP(g_trac_nvdimm,"LID ID=0x%08X, size=%d, vAddr=%p", lid.id, lid.size, lid.vAddr); @@ -1966,6 +2405,22 @@ bool NvdimmsUpdate::runUpdate(void) v_NVDIMM_32GB_list); } } + else if (lid.id == NVDIMM_32GB_BPM_FW_LIDID) + { + bpm_32gb_fw = &lid; + } + else if (lid.id == NVDIMM_32GB_BPM_CONFIG_LIDID) + { + bpm_32gb_config = &lid; + } + else if (lid.id == NVDIMM_16GB_BPM_FW_LIDID) + { + bpm_16gb_fw = &lid; + } + else if (lid.id == NVDIMM_16GB_BPM_CONFIG_LIDID) + { + bpm_16gb_config = &lid; + } else if (lid.id != NVDIMM_SIGNATURE_LIDID) { TRACFCOMP(g_trac_nvdimm, "NvdimmsUpdate::runUpdate() - " @@ -1975,6 +2430,26 @@ bool NvdimmsUpdate::runUpdate(void) } } + // Run BPM updates on NVDIMMs + BPM::BpmFirmwareLidImage fwImage_16gb(bpm_16gb_fw->vAddr, + bpm_16gb_fw->size); + + BPM::BpmFirmwareLidImage fwImage_32gb(bpm_32gb_fw->vAddr, + bpm_32gb_fw->size); + + BPM::BpmConfigLidImage configImage_16gb(bpm_16gb_config->vAddr, + bpm_16gb_config->size); + + BPM::BpmConfigLidImage configImage_32gb(bpm_32gb_config->vAddr, + bpm_32gb_config->size); + + BPM::runBpmUpdates(&NVDIMM_BPM_16GB_list, + &NVDIMM_BPM_32GB_list, + &fwImage_16gb, + &fwImage_32gb, + &configImage_16gb, + &configImage_32gb); + // Destructor automatically unloads the NVDIMM flash binary } else @@ -1987,6 +2462,16 @@ bool NvdimmsUpdate::runUpdate(void) } } while (0); // end of flash update section + // Clean up the pointers used in v_NVDIMM_16GB_list and v_NVDIMM_32GB_list + for (const auto& pInstalledImage : v_NVDIMM_16GB_list) + { + delete pInstalledImage; + } + for (const auto& pInstalledImage : v_NVDIMM_32GB_list) + { + delete pInstalledImage; + } + return o_no_error_found; } @@ -2001,7 +2486,7 @@ errlHndl_t NvdimmsUpdate::isUpdateNeeded(bool & o_update_needed, uint32_t curType = INVALID_TYPE; do { - const TARGETING::Target * l_dimm = i_cur_image->getNvdimmTarget(); + TARGETING::Target * l_dimm = i_cur_image->getNvdimmTarget(); // check Types match (same manufacturer and product) lidType = i_lid_image->getType(); @@ -2038,7 +2523,7 @@ errlHndl_t NvdimmsUpdate::isUpdateNeeded(bool & o_update_needed, "isUpdateNeeded(): non-updatable SMART NVDIMM 0x%.8X " "(0x%04X)", TARGETING::get_huid(l_dimm), le16toh(curVersion)); - /* + /*@ *@errortype *@reasoncode NVDIMM_UPDATE_NOT_SUPPORTED *@moduleid NVDIMM_IS_UPDATE_NEEDED @@ -2046,9 +2531,9 @@ errlHndl_t NvdimmsUpdate::isUpdateNeeded(bool & o_update_needed, *@userdata1[32:63] NVDIMM Target Huid *@userdata2 NVDIMM type (manufacturer and product) *@devdesc Unable to update an NVDIMM at this code level - *@custdesc NVDIMM not updated + *@custdesc Unsupported level of NVDIMM hardware */ - l_err = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_PREDICTIVE, + l_err = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_UNRECOVERABLE, NVDIMM_IS_UPDATE_NEEDED, NVDIMM_UPDATE_NOT_SUPPORTED, TWO_UINT32_TO_UINT64( @@ -2057,9 +2542,15 @@ errlHndl_t NvdimmsUpdate::isUpdateNeeded(bool & o_update_needed, curType, ERRORLOG::ErrlEntry::NO_SW_CALLOUT ); l_err->collectTrace( NVDIMM_UPD, 256 ); + nvdimmAddVendorLog(const_cast<TARGETING::Target*>(l_dimm), + l_err); + l_err->addHwCallout( l_dimm, + HWAS::SRCI_PRIORITY_HIGH, + HWAS::DECONFIG, + HWAS::GARD_Fatal); l_err->addPartCallout( l_dimm, HWAS::NV_CONTROLLER_PART_TYPE, - HWAS::SRCI_PRIORITY_HIGH ); + HWAS::SRCI_PRIORITY_MED ); l_err->addProcedureCallout( HWAS::EPUB_PRC_HB_CODE, HWAS::SRCI_PRIORITY_LOW ); break; diff --git a/src/usr/isteps/nvdimm/nvdimm_update.H b/src/usr/isteps/nvdimm/nvdimm_update.H index 37153b9c2..3f71dff56 100644 --- a/src/usr/isteps/nvdimm/nvdimm_update.H +++ b/src/usr/isteps/nvdimm/nvdimm_update.H @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2018 */ +/* Contributors Listed Below - COPYRIGHT 2018,2020 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -42,6 +42,7 @@ const uint16_t INVALID_ID = 0xFFFF; const uint16_t INVALID_VERSION = 0xFFFF; const uint16_t INVALID_TIMEOUT = 0xFFFF; const uint32_t INVALID_TYPE = 0xFFFFFFFF; +const uint8_t INVALID_BLOCK_SIZE = 0x00; // Type is combination of manufacturer id and product id const uint32_t SMART_NVDIMM_16GB_TYPE = 0x01945377; @@ -56,8 +57,13 @@ const uint32_t NVDIMM_SIGNATURE_LIDID = 0x80D00025; // ignore this one const uint32_t NVDIMM_16GB_LIDID = 0x81e00640; const uint32_t NVDIMM_32GB_LIDID = 0x81e00641; +const uint32_t NVDIMM_16GB_BPM_FW_LIDID = 0x81e00642; +const uint32_t NVDIMM_16GB_BPM_CONFIG_LIDID = 0x81e00644; + +const uint32_t NVDIMM_32GB_BPM_FW_LIDID = 0x81e00643; +const uint32_t NVDIMM_32GB_BPM_CONFIG_LIDID = 0x81e00645; + -// Firmware Update Mode settings for FIRMWARE_OPS_CMD enum fw_update_mode : uint8_t { FW_UPDATE_MODE_DISABLED = 0x00, @@ -215,15 +221,40 @@ class NvdimmInstalledImage const bool i_force_recollect = false); /** + * @brief Read the current slot that is running + * @param o_slot - 0 or 1 + * @return error if read operation fails + */ + errlHndl_t getRunningSlot(uint8_t & o_slot); + + /** * @brief Accessor to grab the current NVDIMM target * @return NVDIMM target */ - const TARGETING::Target * getNvdimmTarget(void) + TARGETING::Target * getNvdimmTarget(void) { return iv_dimm; } /** + * @brief Accessor to grab the amount of retries it took to write regions + * @return Cumulative total region write retries + */ + uint8_t getRegionWriteRetries(void) + { + return iv_region_write_retries; + } + + /** + * @brief Accessor for what write size is supported for this installed nvdimm + * Prior to level 0x3A, only word size supported + * Level 0x3A and beyond support 32 byte block writes + * @param[out] maximum number of bytes allowed per write + * @return block write size supported for this current nvdimm level + */ + errlHndl_t getBlockWriteSizeSupported(uint64_t & o_blockSize); + + /** * @brief Update the current NV Controller * @param Update using this image * @return error pointer if failure to update, else nullptr @@ -250,6 +281,16 @@ class NvdimmInstalledImage // maximum blocks allowed per region (REGION_BLOCK_SIZE) uint8_t iv_max_blocks_per_region; + // set to true when doing update + bool iv_fw_update_mode_enabled; + + // retry attempts for all regions + uint8_t iv_region_write_retries; + + // what size block can be written (2 or 32 byte) + uint64_t iv_blockSizeSupported; + + // Helper functions for updating the installed lid /** * @brief Transfer a region of bytes in multiple 32-byte blocks @@ -318,13 +359,6 @@ class NvdimmInstalledImage errlHndl_t isFwOpsSuccess(bool & o_success); /** - * @brief Reset NV controller. Resets controller and waits for it to - * come back online - * @return error if reset failed, else nullptr - */ - errlHndl_t resetController(); - - /** * @brief Updates the NV controller with the lid's image data * (minus header and signature) * @param i_lidImage - lid object with image data diff --git a/src/usr/isteps/nvdimm/nvdimmdd.C b/src/usr/isteps/nvdimm/nvdimmdd.C index 730fe0271..044be454b 100755 --- a/src/usr/isteps/nvdimm/nvdimmdd.C +++ b/src/usr/isteps/nvdimm/nvdimmdd.C @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2011,2019 */ +/* Contributors Listed Below - COPYRIGHT 2011,2020 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -35,6 +35,7 @@ // Includes // ---------------------------------------------- #include <string.h> +#include <time.h> #include <sys/time.h> #include <trace/interface.H> #include <errl/errlentry.H> @@ -67,16 +68,18 @@ TRAC_INIT( & g_trac_nvdimmr, "NVDIMMR", KILOBYTE ); // Easy macro replace for unit testing -#define TRACUCOMP(args...) TRACFCOMP(args) -//#define TRACUCOMP(args...) +//#define TRACUCOMP(args...) TRACFCOMP(args) +#define TRACUCOMP(args...) // ---------------------------------------------- // Defines // ---------------------------------------------- #define MAX_BYTE_ADDR 2 #define NVDIMM_MAX_RETRIES 2 +#define MAX_READ_RETRY_SECS 30 // ---------------------------------------------- +using namespace TARGETING; namespace { @@ -95,13 +98,83 @@ static bool errorIsRetryable(uint16_t reasonCode) namespace NVDIMM { -// Register the perform Op with the routing code for DIMMs. +// Register the perform Op router with the routing code for DIMMs. DEVICE_REGISTER_ROUTE( DeviceFW::WILDCARD, DeviceFW::NVDIMM, TARGETING::TYPE_DIMM, + nvdimmPerformOpRouter ); + +// Register the perform Op with the routing code for DIMMs. +DEVICE_REGISTER_ROUTE( DeviceFW::WILDCARD, + DeviceFW::NVDIMM_RAW, + TARGETING::TYPE_DIMM, nvdimmPerformOp ); // ------------------------------------------------------------------ +// nvdimmPerformOpRouter +// ------------------------------------------------------------------ +errlHndl_t nvdimmPerformOpRouter( DeviceFW::OperationType i_opType, + TARGETING::Target * i_target, + void * io_buffer, + size_t & io_buflen, + int64_t i_accessType, + va_list i_args ) +{ + errlHndl_t l_err(nullptr); + + TRACDCOMP( g_trac_nvdimm, + ENTER_MRK"nvdimmPerformOpRouter()" ); + + // Get the NVDIMM register's address, where data will be accessed from + // Although the data is being retrieved as a 64 bit value + // it is really a 16 bit value. Data passed via an arg list + // are retrieved in 64 bit chunks. + uint16_t l_registerAddress = + static_cast<uint16_t>(va_arg(i_args, uint64_t)); + + // Get a handle to the data buffer for easy referencing + uint8_t* l_data = static_cast<uint8_t*>(io_buffer); + + TRACUCOMP(g_trac_nvdimm, INFO_MRK"nvdimmPerformOpRouter(): " + "operation type=%d, target HUID=0x%.8X, access type=%d, " + "buffer length=%d, buffer data=0x%.8X, register address=0x%.8X", + static_cast<uint64_t>(i_opType), get_huid(i_target), i_accessType, + io_buflen, *l_data, l_registerAddress); + + // Make the right read/write call based on operation type + if( i_opType == DeviceFW::READ ) + { + l_err = nvdimmReadReg( i_target, + l_registerAddress, + *l_data, + PAGE_VERIFY); + if (!l_err) + { + TRACUCOMP (g_trac_nvdimm, INFO_MRK"nvdimmPerformOpRouter(): " + "Read data(0x%X) from register(0x%X)", + *l_data, l_registerAddress); + } + } + else if( i_opType == DeviceFW::WRITE ) + { + TRACUCOMP (g_trac_nvdimm, INFO_MRK"nvdimmPerformOpRouter(): " + "Writing data(0x%X) to register(0x%X) ...", + *l_data, l_registerAddress); + + l_err = nvdimmWriteReg( i_target, + l_registerAddress, + *l_data, + PAGE_VERIFY); + } + + TRACDCOMP(g_trac_nvdimm, + EXIT_MRK"nvdimmPerformOpRouter() returning with %s", + (l_err == nullptr ? "no error, success" : "an error, failure") ); + + return l_err; +} + +// ------------------------------------------------------------------ // nvdimmPerformOp // ------------------------------------------------------------------ errlHndl_t nvdimmPerformOp( DeviceFW::OperationType i_opType, @@ -116,6 +189,7 @@ errlHndl_t nvdimmPerformOp( DeviceFW::OperationType i_opType, nvdimm_addr_t i2cInfo; i2cInfo.offset = va_arg( i_args, uint64_t ); + i2cInfo.blockSize = va_arg( i_args, uint64_t ); TRACDCOMP( g_trac_nvdimm, ENTER_MRK"nvdimmPerformOp()" ); @@ -206,7 +280,7 @@ errlHndl_t nvdimmPerformOp( DeviceFW::OperationType i_opType, l_currentOpLen = l_snglChipSize - i2cInfo.offset; } - TRACFCOMP( g_trac_nvdimm, + TRACUCOMP( g_trac_nvdimm, "nvdimmPerformOp(): i_opType=%d " "e/p/dA=%d/%d/0x%X, offset=0x%X, len=0x%X, " "snglChipKB=0x%X, chipCount=0x%X, devSizeKB=0x%X", i_opType, @@ -216,7 +290,7 @@ errlHndl_t nvdimmPerformOp( DeviceFW::OperationType i_opType, // Printing mux info separately, if combined, nothing is displayed char* l_muxPath = i2cInfo.i2cMuxPath.toString(); - TRACFCOMP(g_trac_nvdimm, "nvdimmPerformOp(): " + TRACUCOMP(g_trac_nvdimm, "nvdimmPerformOp(): " "muxSelector=0x%X, muxPath=%s", i2cInfo.i2cMuxBusSelector, l_muxPath); @@ -326,7 +400,7 @@ errlHndl_t crossesNvdimmPageBoundary( uint64_t i_offset, errlHndl_t err = nullptr; - if(i_offset >= NVDIMM_PAGE_SIZE || (i_offset+i_buflen) >= NVDIMM_PAGE_SIZE) + if(i_offset >= NVDIMM_PAGE_SIZE || (i_offset+i_buflen) > NVDIMM_PAGE_SIZE) { TRACFCOMP( g_trac_nvdimm, ERR_MRK"crossesNvdimmPageBoundary() - offset 0x%X, buflen 0x%X" @@ -425,7 +499,7 @@ errlHndl_t nvdimmRead ( TARGETING::Target * i_target, if( err ) { TRACFCOMP(g_trac_nvdimm, - "Failed reading data: original read"); + ERR_MRK"nvdimmRead(): Failed reading data: original read"); break; } @@ -462,12 +536,13 @@ errlHndl_t nvdimmReadData( TARGETING::Target * i_target, ENTER_MRK"nvdimmReadData()"); do { + timespec_t l_CurTime, l_PrevTime; + clock_gettime(CLOCK_MONOTONIC, &l_PrevTime); + int retry = 0; /************************************************************/ /* Attempt read multiple times ONLY on retryable fails */ /************************************************************/ - for (uint8_t retry = 0; - retry <= NVDIMM_MAX_RETRIES; - retry++) + do { // Only write the byte address if we have data to write if( 0 != i_byteAddressSize ) @@ -488,12 +563,10 @@ errlHndl_t nvdimmReadData( TARGETING::Target * i_target, if( l_err ) { - TRACFCOMP(g_trac_nvdimm, - ERR_MRK"nvdimmReadData(): I2C Read-Offset failed on " - "%d/%d/0x%X, aS=%d", - i_i2cInfo.port, i_i2cInfo.engine, - i_i2cInfo.devAddr, - i_byteAddressSize); + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmReadData(): " + "I2C Read-Offset failed on %d/%d/0x%X, aS=%d", + i_i2cInfo.port, i_i2cInfo.engine, + i_i2cInfo.devAddr, i_byteAddressSize); // Printing mux info separately, if combined, nothing is displayed char* l_muxPath = i_i2cInfo.i2cMuxPath.toString(); @@ -526,7 +599,7 @@ errlHndl_t nvdimmReadData( TARGETING::Target * i_target, if( l_err ) { - TRACFCOMP(g_trac_nvdimm, + TRACUCOMP(g_trac_nvdimm, ERR_MRK"nvdimmReadData(): I2C Read failed on " "%d/%d/0x%0X", i_i2cInfo.port, i_i2cInfo.engine, @@ -534,7 +607,7 @@ errlHndl_t nvdimmReadData( TARGETING::Target * i_target, // Printing mux info separately, if combined, nothing is displayed char* l_muxPath = i_i2cInfo.i2cMuxPath.toString(); - TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmReadData(): " + TRACUCOMP(g_trac_nvdimm, ERR_MRK"nvdimmReadData(): " "muxSelector=0x%X, muxPath=%s", i_i2cInfo.i2cMuxBusSelector, l_muxPath); @@ -567,64 +640,35 @@ errlHndl_t nvdimmReadData( TARGETING::Target * i_target, else // Handle retryable error { // If op will be attempted again: save log and continue - if ( retry < NVDIMM_MAX_RETRIES ) + // Only save original retryable error + if ( err_retryable == nullptr ) { - // Only save original retryable error - if ( err_retryable == nullptr ) - { - // Save original retryable error - err_retryable = l_err; - - TRACFCOMP( g_trac_nvdimm, ERR_MRK"nvdimmReadData(): " - "Retryable Error rc=0x%X, eid=0x%X, tgt=0x%X, " - "retry/MAX=%d/%d. Save error and retry", - err_retryable->reasonCode(), - err_retryable->eid(), - TARGETING::get_huid(i_target), - retry, NVDIMM_MAX_RETRIES); - - err_retryable->collectTrace(NVDIMM_COMP_NAME); - } - else - { - // Add data to original retryable error - TRACFCOMP( g_trac_nvdimm, ERR_MRK"nvdimmReadData(): " - "Another Retryable Error rc=0x%X, eid=0x%X " - "plid=0x%X, tgt=0x%X, retry/MAX=%d/%d. " - "Delete error and retry", - l_err->reasonCode(), l_err->eid(), l_err->plid(), - TARGETING::get_huid(i_target), - retry, NVDIMM_MAX_RETRIES); - - ERRORLOG::ErrlUserDetailsString( - "Another Retryable ERROR found") - .addToLog(err_retryable); - - // Delete this new retryable error - delete l_err; - l_err = nullptr; - } + // Save original retryable error + err_retryable = l_err; + + TRACUCOMP( g_trac_nvdimm, ERR_MRK"nvdimmReadData(): " + "Retryable Error rc=0x%X, eid=0x%X, tgt=0x%X, " + "retry=%d. Save error and retry", + err_retryable->reasonCode(), + err_retryable->eid(), + TARGETING::get_huid(i_target), + retry); - // continue to retry - continue; + err_retryable->collectTrace(NVDIMM_COMP_NAME); } - else // no more retries: trace and break + else { - TRACFCOMP( g_trac_nvdimm, ERR_MRK"nvdimmReadData(): " - "Error rc=0x%X, eid=%d, tgt=0x%X. No More " - "Retries (retry/MAX=%d/%d). Returning Error", - l_err->reasonCode(), l_err->eid(), - TARGETING::get_huid(i_target), - retry, NVDIMM_MAX_RETRIES); - - l_err->collectTrace(NVDIMM_COMP_NAME); - - // break from retry loop - break; + // Delete this new retryable error + delete l_err; + l_err = nullptr; } - } + } // retryable error + // update current time + clock_gettime(CLOCK_MONOTONIC, &l_CurTime); + retry++; } // end of retry loop + while( (l_CurTime.tv_sec - l_PrevTime.tv_sec) < MAX_READ_RETRY_SECS ); // Handle saved retryable error, if any if (err_retryable) @@ -641,13 +685,29 @@ errlHndl_t nvdimmReadData( TARGETING::Target * i_target, .addToLog(err_retryable); errlCommit(err_retryable, NVDIMM_COMP_ID); + + // Add trace of what operation failed for returned error + TRACFCOMP(g_trac_nvdimm, + ERR_MRK"nvdimmReadData(): I2C Read failed on " + "%d/%d/0x%0X", + i_i2cInfo.port, i_i2cInfo.engine, i_i2cInfo.devAddr ); + + // Printing mux info separately, if combined, nothing is displayed + char* l_muxPath = i_i2cInfo.i2cMuxPath.toString(); + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmReadData(): " + "muxSelector=0x%X, muxPath=%s", + i_i2cInfo.i2cMuxBusSelector, + l_muxPath); + free(l_muxPath); + l_muxPath = nullptr; } else { // Since we eventually succeeded, delete original retryable error - TRACFCOMP(g_trac_nvdimm, "nvdimmReadData(): Op successful, " - "deleting saved retryable err eid=0x%X, plid=0x%X", - err_retryable->eid(), err_retryable->plid()); + TRACUCOMP(g_trac_nvdimm, "nvdimmReadData(): Op successful, " + "after %d retries. Deleting saved retryable err eid=" + "0x%X, plid=0x%X", + retry, err_retryable->eid(), err_retryable->plid()); delete err_retryable; err_retryable = nullptr; @@ -676,8 +736,6 @@ errlHndl_t nvdimmWrite ( TARGETING::Target * i_target, size_t byteAddrSize = 0; uint8_t * newBuffer = nullptr; bool needFree = false; - uint32_t data_left = 0; - uint32_t diff_wps = 0; TRACDCOMP( g_trac_nvdimm, ENTER_MRK"nvdimmWrite()" ); @@ -742,6 +800,17 @@ errlHndl_t nvdimmWrite ( TARGETING::Target * i_target, // Setup a max-size buffer of writePageSize size_t newBufLen = i_i2cInfo.writePageSize; + + // Break data into max supported i2c transfer size, if possible + // (speeds up i2c operation) + if ( (i_i2cInfo.blockSize != 0) && + (io_buflen >= i_i2cInfo.blockSize) && + ((io_buflen % i_i2cInfo.blockSize) == 0) ) + { + newBufLen = i_i2cInfo.blockSize; + } + assert(newBufLen > 0, "Unable to allocate 0 buffer size for nvdimmWrite()"); + newBuffer = static_cast<uint8_t*>(malloc( newBufLen )); needFree = true; @@ -756,16 +825,6 @@ errlHndl_t nvdimmWrite ( TARGETING::Target * i_target, while( total_bytes_written < io_buflen ) { - // Determine how much data can be written in this loop - // Can't go over a writePageSize boundary - - // Total data left to write - data_left = io_buflen - total_bytes_written; - - // Difference to next writePageSize boundary - diff_wps = i_i2cInfo.writePageSize - - (i_i2cInfo.offset % i_i2cInfo.writePageSize); - // Add the data the user wanted to write memcpy( newBuffer, &l_data_ptr[total_bytes_written], @@ -785,15 +844,13 @@ errlHndl_t nvdimmWrite ( TARGETING::Target * i_target, } TRACUCOMP(g_trac_nvdimm,"nvdimmWrite() Loop: %d/%d/0x%X " - "writeBuflen=%d, offset=0x%X, " - "bAS=%d, diffs=%d/%d", + "writeBuflen=%d, offset=0x%X, bAS=%d", i_i2cInfo.port, i_i2cInfo.engine, i_i2cInfo.devAddr, - newBufLen, i_i2cInfo.offset, byteAddrSize, - data_left, diff_wps); + newBufLen, i_i2cInfo.offset, byteAddrSize); // Printing mux info separately, if combined, nothing is displayed char* l_muxPath = i_i2cInfo.i2cMuxPath.toString(); - TRACFCOMP(g_trac_nvdimm, "nvdimmWrite(): " + TRACUCOMP(g_trac_nvdimm, "nvdimmWrite(): " "muxSelector=0x%X, muxPath=%s", i_i2cInfo.i2cMuxBusSelector, l_muxPath); @@ -815,6 +872,14 @@ errlHndl_t nvdimmWrite ( TARGETING::Target * i_target, // for this loop TRACFCOMP(g_trac_nvdimm, "Failed writing data: original nvdimm write"); + // total writes for the data size (divide by each write size) + size_t totalWritesNeeded = io_buflen/newBufLen; + // current write number (writes done + next one) + size_t currentWrite = total_bytes_written/newBufLen + 1; + TRACFCOMP( g_trac_nvdimm,ERR_MRK"nvdimmWrite(): " + "Tried to write out %d bytes out of %d total: " + "Failed on the %d of %d writes", newBufLen, io_buflen, + currentWrite, totalWritesNeeded ); break; } @@ -842,7 +907,7 @@ errlHndl_t nvdimmWrite ( TARGETING::Target * i_target, io_buflen = total_bytes_written; - TRACSCOMP( g_trac_nvdimmr, + TRACUCOMP( g_trac_nvdimmr, "NVDIMM WRITE END : Offset %.2X : Len %d", i_i2cInfo.offset, io_buflen ); } while( 0 ); @@ -874,30 +939,70 @@ errlHndl_t nvdimmWriteData( TARGETING::Target * i_target, ENTER_MRK"nvdimmWriteData()"); errlHndl_t err = nullptr; errlHndl_t err_retryable = nullptr; + size_t data_length; + do { - /***********************************************************/ - /* Attempt write multiple times ONLY on retryable fails */ - /***********************************************************/ - for (uint8_t retry = 0; - retry <= NVDIMM_MAX_RETRIES; - retry++) - { - // Do the actual data write - err = deviceOp( DeviceFW::WRITE, - i_target, - i_dataToWrite, - i_dataLen, - DEVICE_I2C_ADDRESS_OFFSET( - i_i2cInfo.port, - i_i2cInfo.engine, - i_i2cInfo.devAddr, - i_byteAddressSize, - reinterpret_cast<uint8_t*>( - i_byteAddress), - i_i2cInfo.i2cMuxBusSelector, - &(i_i2cInfo.i2cMuxPath) )); + /***********************************************************/ + /* Attempt write multiple times ONLY on retryable fails */ + /***********************************************************/ + for ( uint8_t retry = 0; retry <= NVDIMM_MAX_RETRIES; retry++) + { + // use a temporary variable to allow retry as the + // data_length could be altered by deviceOp() failure + data_length = i_dataLen; + // Do the actual data write + if ( i_dataLen == sizeof(uint16_t) ) + { + err = deviceOp( DeviceFW::WRITE, + i_target, + i_dataToWrite, + data_length, + DeviceFW::I2C, + I2C_SMBUS_RW_W_CMD_PARAMS( + DeviceFW::I2C_SMBUS_WORD_NO_PEC, + i_i2cInfo.engine, + i_i2cInfo.port, + i_i2cInfo.devAddr, + *(reinterpret_cast<uint8_t*>(i_byteAddress) + + (i_byteAddressSize-1)), + i_i2cInfo.i2cMuxBusSelector, + &(i_i2cInfo.i2cMuxPath)) ); + } + else if ( i_dataLen == 32 ) + { + err = deviceOp( DeviceFW::WRITE, + i_target, + i_dataToWrite, + data_length, + DeviceFW::I2C, + I2C_SMBUS_RW_W_CMD_PARAMS( + DeviceFW::I2C_SMBUS_BLOCK_NO_BYTE_COUNT, + i_i2cInfo.engine, + i_i2cInfo.port, + i_i2cInfo.devAddr, + *(reinterpret_cast<uint8_t*>(i_byteAddress) + + (i_byteAddressSize-1)), + i_i2cInfo.i2cMuxBusSelector, + &(i_i2cInfo.i2cMuxPath)) ); + } + else + { + err = deviceOp( DeviceFW::WRITE, + i_target, + i_dataToWrite, + data_length, + DEVICE_I2C_ADDRESS_OFFSET( + i_i2cInfo.port, + i_i2cInfo.engine, + i_i2cInfo.devAddr, + i_byteAddressSize, + reinterpret_cast<uint8_t*>( + i_byteAddress), + i_i2cInfo.i2cMuxBusSelector, + &(i_i2cInfo.i2cMuxPath) )); + } if ( err == nullptr ) { // Operation completed successfully @@ -911,7 +1016,7 @@ errlHndl_t nvdimmWriteData( TARGETING::Target * i_target, "Write Non-Retryable fail %d/%d/0x%X, " "ldl=%d, offset=0x%X, aS=%d, retry=%d", i_i2cInfo.port, i_i2cInfo.engine, - i_i2cInfo.devAddr, i_dataLen, + i_i2cInfo.devAddr, data_length, i_i2cInfo.offset, i_i2cInfo.addrSize, retry); // Printing mux info separately, if combined, nothing is displayed @@ -930,7 +1035,7 @@ errlHndl_t nvdimmWriteData( TARGETING::Target * i_target, } else // Handle retryable error { - TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmWriteData(): I2C " + TRACUCOMP(g_trac_nvdimm, ERR_MRK"nvdimmWriteData(): I2C " "Write retryable fail %d/%d/0x%X, " "ldl=%d, offset=0x%X, aS=%d, writePageSize = %x", i_i2cInfo.port, i_i2cInfo.engine, @@ -940,7 +1045,7 @@ errlHndl_t nvdimmWriteData( TARGETING::Target * i_target, // Printing mux info separately, if combined, nothing is displayed char* l_muxPath = i_i2cInfo.i2cMuxPath.toString(); - TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmWriteData(): " + TRACUCOMP(g_trac_nvdimm, ERR_MRK"nvdimmWriteData(): " "muxSelector=0x%X, muxPath=%s", i_i2cInfo.i2cMuxBusSelector, l_muxPath); @@ -956,7 +1061,7 @@ errlHndl_t nvdimmWriteData( TARGETING::Target * i_target, // Save original retryable error err_retryable = err; - TRACFCOMP( g_trac_nvdimm, ERR_MRK"nvdimmWriteData(): " + TRACUCOMP( g_trac_nvdimm, ERR_MRK"nvdimmWriteData(): " "Error rc=0x%X, eid=0x%X plid=0x%X, " "tgt=0x%X, retry/MAX=%d/%d. Save error " "and retry", @@ -971,7 +1076,7 @@ errlHndl_t nvdimmWriteData( TARGETING::Target * i_target, else { // Add data to original retryable error - TRACFCOMP( g_trac_nvdimm, ERR_MRK"nvdimmWriteData(): " + TRACUCOMP( g_trac_nvdimm, ERR_MRK"nvdimmWriteData(): " "Another Retryable Error rc=0x%X, eid=0x%X " "plid=0x%X, tgt=0x%X, retry/MAX=%d/%d. " "Delete error and retry", @@ -1016,6 +1121,24 @@ errlHndl_t nvdimmWriteData( TARGETING::Target * i_target, { if (err) { + // Trace failure write parameters + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmWriteData(): I2C " + "Write retryable fail %d/%d/0x%X, " + "ldl=%d, offset=0x%X, aS=%d, writePageSize = %x", + i_i2cInfo.port, i_i2cInfo.engine, + i_i2cInfo.devAddr, i_dataLen, + i_i2cInfo.offset, i_i2cInfo.addrSize, + i_i2cInfo.writePageSize); + + // Printing mux info separately, if combined, nothing is displayed + char* l_muxPath = i_i2cInfo.i2cMuxPath.toString(); + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmWriteData(): " + "muxSelector=0x%X, muxPath=%s", + i_i2cInfo.i2cMuxBusSelector, + l_muxPath); + free(l_muxPath); + l_muxPath = nullptr; + // commit original retryable error with new err PLID err_retryable->plid(err->plid()); TRACFCOMP(g_trac_nvdimm, "nvdimmWriteData(): Committing saved " @@ -1031,7 +1154,7 @@ errlHndl_t nvdimmWriteData( TARGETING::Target * i_target, else { // Since we eventually succeeded, delete original retryable error - TRACFCOMP(g_trac_nvdimm, "nvdimmWriteData(): Op successful, " + TRACUCOMP(g_trac_nvdimm, "nvdimmWriteData(): Op successful, " "deleting saved retryable err eid=0x%X, plid=0x%X", err_retryable->eid(), err_retryable->plid()); @@ -1221,7 +1344,7 @@ errlHndl_t nvdimmReadAttributes ( TARGETING::Target * i_target, // Printing mux info separately, if combined, nothing is displayed char* l_muxPath = o_i2cInfo.i2cMuxPath.toString(); - TRACFCOMP(g_trac_nvdimm, "nvdimmReadAttributes(): " + TRACUCOMP(g_trac_nvdimm, "nvdimmReadAttributes(): " "muxSelector=0x%X, muxPath=%s", o_i2cInfo.i2cMuxBusSelector, l_muxPath); @@ -1481,4 +1604,175 @@ void getNVDIMMs( std::list<EEPROM::EepromInfo_t>& o_info ) o_info.size()); } +/** + * @brief Helper structure to keep track of memory ranges + */ +typedef struct memGroups_t +{ + Target* proc; + uint64_t membottom; + uint64_t memtop; + size_t group; +} memGroups_t; + +/** + * @brief Comparator for memGroups_t to allow sorting, sorts big-to-small + * @param[in] Left-side of compare + * @param[in] Right-side of compare + * @return true:left-side is bigger, false:right-side is bigger + */ +bool compare_memGroups(memGroups_t& i_ls, + memGroups_t& i_rs) +{ + return (i_ls.memtop > i_rs.memtop); +} + +/** + * @brief Check if given address is owned by nvdimms and return + * a new address that isn't if it was + */ +uint64_t get_top_addr_with_no_nvdimms( uint64_t i_topAddr ) +{ + // Default to just returning the same value we got (no nvdimms) + uint64_t o_topAddr = i_topAddr; + + // On a NVDIMM system we need to make sure that we don't + // use the NV memory for the HOMER (or other reserved + // memory). Depending on the specific memory layout + // the NV memory could be placed at the top of memory + // where we would normally land. + + // NVDIMMs are only on Nimbus systems + if( TARGETING::MODEL_NIMBUS + !=TARGETING::targetService().getProcessorModel() ) + { + return o_topAddr; + } + + // Skip all of this checking if the input value is weird + if( i_topAddr == 0 ) + { + return o_topAddr; + } + + // Build up a list of possible memory ranges + std::vector<memGroups_t> l_memGroups; + + ATTR_PROC_MEM_BASES_type l_memBases = {0}; + ATTR_PROC_MEM_SIZES_type l_memSizes = {0}; + const size_t l_numGroups = sizeof(ATTR_PROC_MEM_SIZES_type) + /sizeof(l_memSizes[0]); + + TARGETING::TargetHandleList l_procList; + TARGETING::getAllChips(l_procList, TARGETING::TYPE_PROC); + assert(l_procList.size() != 0, "Empty proc list returned!"); + for (auto l_pProc : l_procList) + { + // Get the memory group ranges under this proc + assert(l_pProc->tryGetAttr<ATTR_PROC_MEM_BASES>(l_memBases), + "Unable to get ATTR_PROC_MEM_BASES attribute"); + assert(l_pProc->tryGetAttr<ATTR_PROC_MEM_SIZES>(l_memSizes), + "Unable to get ATTR_PROC_MEM_SIZES attribute"); + + for (size_t l_grp=0; l_grp < l_numGroups; l_grp++) + { + // Non-zero size means that there is memory present + if (l_memSizes[l_grp]) + { + memGroups_t l_mg; + l_mg.proc = l_pProc; + l_mg.membottom = l_memBases[l_grp]; + l_mg.memtop = l_memBases[l_grp] + l_memSizes[l_grp]; + l_mg.group = l_grp; + l_memGroups.push_back(l_mg); + } + } + } + + + // Loop through the groups from biggest to smallest + // l_top_homer_addr should hit the biggest one first, then we'll + // find the next biggest if the first match has a nvdimm in it. + std::sort( l_memGroups.begin(), l_memGroups.end(), compare_memGroups ); + for( auto l_memGroup : l_memGroups ) + { + bool l_foundNvdimm = false; + + // Get the array of mcas/group from the attribute + // The attr contains 8 8-bit entries, one entry per group + // The bits specify which mcas are included in the group + ATTR_MSS_MEM_MC_IN_GROUP_type l_memMcGroup = {0}; + assert(l_memGroup.proc->tryGetAttr<ATTR_MSS_MEM_MC_IN_GROUP> + (l_memMcGroup), + "Unable to get ATTR_MSS_MEM_MC_IN_GROUP attribute"); + + // Get list of mcas under this proc + TargetHandleList l_mcaList; + getChildAffinityTargets( l_mcaList, + l_memGroup.proc, + CLASS_UNIT, + TYPE_MCA ); + + // Loop through the mcas on this proc + for (const auto & l_mcaTarget : l_mcaList) + { + // Get the chip unit for this mca + ATTR_CHIP_UNIT_type l_mcaUnit = 0; + l_mcaUnit = l_mcaTarget->getAttr<ATTR_CHIP_UNIT>(); + + // Check if this mca is included in the memory group + const uint8_t l_mcMask = 0x80; + if (l_memMcGroup[l_memGroup.group] & (l_mcMask >> l_mcaUnit)) + { + // Get the list of dimms under this mca + TargetHandleList l_dimmList; + getChildAffinityTargets( l_dimmList, + l_mcaTarget, + CLASS_NA, + TYPE_DIMM ); + for (const auto & l_dimmTarget : l_dimmList) + { + if( isNVDIMM(l_dimmTarget) ) + { + l_foundNvdimm = true; + break; + } + } + if( l_foundNvdimm ) { break; } + } + } // for all MCAs + + // If we didn't find a nvdimm, we have a candidate for a valid + // top address + if( l_foundNvdimm ) + { + // Check if top addr is in this group's memory range + if( (o_topAddr >= l_memGroup.membottom) && + (o_topAddr <= l_memGroup.memtop) ) + { + TRACFCOMP(g_trac_nvdimm,"get_top_addr_with_no_nvdimms> Chosen address 0x%llX has nvdimms, cannot be used", + o_topAddr); + o_topAddr = 0; + } + } + else + { + // Since we are sorted by size, this must be the + // largest group without a nvdimm + if( o_topAddr != l_memGroup.memtop ) + { + o_topAddr = l_memGroup.memtop; + TRACFCOMP(g_trac_nvdimm,"get_top_addr_with_no_nvdimms> Choosing address 0x%llX as new top", + o_topAddr); + break; + } + } + } //for all memgroups + + assert( o_topAddr != 0, "get_top_addr_with_no_nvdimms> No valid memory group found without a NVDIMM" ); + + return o_topAddr; +} + + } // end namespace NVDIMM diff --git a/src/usr/isteps/nvdimm/nvdimmdd.H b/src/usr/isteps/nvdimm/nvdimmdd.H index 4d599b38a..88bc388c0 100755 --- a/src/usr/isteps/nvdimm/nvdimmdd.H +++ b/src/usr/isteps/nvdimm/nvdimmdd.H @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2019 */ +/* Contributors Listed Below - COPYRIGHT 2019,2020 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -69,6 +69,7 @@ struct nvdimm_addr_t uint64_t devSize_KB; // in kilobytes uint64_t chipCount; // number of chips making up nvdimm device uint64_t writeCycleTime; // in milliseconds + uint8_t blockSize; // size of write block supported for this nvdimm uint8_t i2cMuxBusSelector; TARGETING::EntityPath i2cMuxPath; @@ -86,12 +87,90 @@ struct nvdimm_addr_t devSize_KB(0), chipCount(0), writeCycleTime(0), + blockSize(0), i2cMuxBusSelector(I2C_MUX::NOT_APPLICABLE), i2cMuxPath() { } }; +/** + * @brief Structure of registers for error log traces + */ +struct nvdimm_reg_t +{ + uint8_t Module_Health; + uint8_t Module_Health_Status0; + uint8_t Module_Health_Status1; + uint8_t CSave_Status; + uint8_t CSave_Info; + uint8_t CSave_Fail_Info0; + uint8_t CSave_Fail_Info1; + uint8_t CSave_Timeout0; + uint8_t CSave_Timeout1; + uint8_t Error_Threshold_Status; + uint8_t NVDimm_Ready; + uint8_t NVDimm_CMD_Status0; + uint8_t Erase_Status; + uint8_t Erase_Fail_Info; + uint8_t Erase_Timeout0; + uint8_t Erase_Timeout1; + uint8_t Abort_CMD_Timeout; + uint8_t Set_ES_Policy_Status; + uint8_t Restore_Status; + uint8_t Restore_Fail_Info; + uint8_t Restore_Timeout0; + uint8_t Restore_Timeout1; + uint8_t Arm_Status; + uint8_t Arm_Fail_Info; + uint8_t Arm_Timeout0; + uint8_t Arm_Timeout1; + uint8_t Set_Event_Notification_Status; + uint8_t Encryption_Config_Status; + + /** + * @brief Construct a default nvdimm_reg_t + */ + nvdimm_reg_t() + : Module_Health(0), + Module_Health_Status0(0), + Module_Health_Status1(0), + CSave_Status(0), + CSave_Info(0), + CSave_Fail_Info0(0), + CSave_Fail_Info1(0), + CSave_Timeout0(0), + CSave_Timeout1(0), + Error_Threshold_Status(0), + NVDimm_Ready(0), + NVDimm_CMD_Status0(0), + Erase_Status(0), + Erase_Fail_Info(0), + Erase_Timeout0(0), + Erase_Timeout1(0), + Abort_CMD_Timeout(0), + Set_ES_Policy_Status(0), + Restore_Status(0), + Restore_Fail_Info(0), + Restore_Timeout0(0), + Restore_Timeout1(0), + Arm_Status(0), + Arm_Fail_Info(0), + Arm_Timeout0(0), + Arm_Timeout1(0), + Set_Event_Notification_Status(0), + Encryption_Config_Status(0) + { + } + + /** + * @brief Default deconstructor of nvdimm_reg_t + */ + ~nvdimm_reg_t() = default; + +}; + + /* * @brief Miscellaneous enums for NVDIMM */ @@ -138,6 +217,49 @@ errlHndl_t nvdimmPerformOp( DeviceFW::OperationType i_opType, int64_t i_accessType, va_list i_args ); +/** +* +* @brief Route the read/write operator (i_opType) to the correct +* nvdimmReadReg/nvdimmWriteReg call. +* +* @details This is essentially a wrapper around the nvdimmPerformOp method +* which is called via the nvdimmReadReg/nvdimmWriteReg call. This +* ensures that the page is set correctly whenever a NVDIMM register +* is accessed. +* +* @param[in] i_opType - Operation Type - See DeviceFW::OperationType in +* driververif.H +* +* @param[in] i_target - Target device. +* +* @param[in/out] io_buffer +* INPUT: Pointer to the data that will be written to the target +* device. +* OUTPUT: Pointer to the data that was read from the target device. +* +* @param[in/out] io_buflen +* INPUT: Length of the buffer to be written to target device. +* OUTPUT: Length of buffer that was written, or length of buffer +* to be read from target device. +* +* @param [in] i_accessType - Access Type - See DeviceFW::AccessType in +* usrif.H +* +* @param [in] i_args - This is an argument list for the device driver +* framework. This argument list consists of the internal offset +* to use on the slave I2C device. +* +* @return errlHndl_t - NULL if successful, otherwise a pointer to the +* error log. +* +*/ +errlHndl_t nvdimmPerformOpRouter( DeviceFW::OperationType i_opType, + TARGETING::Target * i_target, + void * io_buffer, + size_t & io_buflen, + int64_t i_accessType, + va_list i_args ); + /* * @brief On the NV Controller, the page is selected by writing to offset * 0x00 with the page you would like to switch too. e.g. to activate diff --git a/src/usr/isteps/nvdimm/plugins/errludP_nvdimm.H b/src/usr/isteps/nvdimm/plugins/errludP_nvdimm.H index 460add6f3..fdd94e01d 100644 --- a/src/usr/isteps/nvdimm/plugins/errludP_nvdimm.H +++ b/src/usr/isteps/nvdimm/plugins/errludP_nvdimm.H @@ -7,6 +7,7 @@ /* */ /* Contributors Listed Below - COPYRIGHT 2014,2019 */ /* [+] International Business Machines Corp. */ +/* [+] YADRO */ /* */ /* */ /* Licensed under the Apache License, Version 2.0 (the "License"); */ @@ -164,6 +165,84 @@ private: UdParserNvdimmParms & operator=(const UdParserNvdimmParms&); }; +/** + * @class UdParserNvdimmOPParms + * + * Parses UdNvdimmOPParms + */ +class UdParserNvdimmOPParms : public ERRORLOG::ErrlUserDetailsParser +{ +public: + /** + * @brief Constructor + */ + UdParserNvdimmOPParms() {} + + /** + * @brief Destructor + */ + virtual ~UdParserNvdimmOPParms() = default; + + /** + * @brief Parses string user detail data from an error log + * + * @param i_version Version of the data + * @param i_parse ErrlUsrParser object for outputting information + * @param i_pBuffer Pointer to buffer containing detail data + * @param i_buflen Length of the buffer + */ + virtual void parse(errlver_t i_version, + ErrlUsrParser & i_parser, + void * i_pBuffer, + const uint32_t i_buflen) const + { + const uint8_t* l_databuf = static_cast<const uint8_t*>(i_pBuffer); + i_parser.PrintHeading("NVDIMM I2C Register Traces"); + + // Memory Layout (1 byte each) + static const char* l_registers[] = { + "MODULE_HEALTH", + "MODULE_HEALTH_STATUS0", + "MODULE_HEALTH_STATUS1", + "CSAVE_STATUS", + "CSAVE_INFO", + "CSAVE_FAIL_INFO0", + "CSAVE_FAIL_INFO1", + "CSAVE_TIMEOUT_INFO0", + "CSAVE_TIMEOUT_INFO1", + "ERROR_THRESHOLD_STATUS", + "NVDIMM_READY", + "NVDIMM_CMD_STATUS0", + "ERASE_STATUS", + "ERASE_FAIL_INFO", + "ERASE_TIMEOUT0", + "ERASE_TIMEOUT1", + "ABORT_CMD_TIMEOUT", + "SET_ES_POLICY_STATUS", + "RESTORE_STATUS", + "RESTORE_FAIL_INFO", + "RESTORE_TIMEOUT0", + "RESTORE_TIMEOUT1", + "ARM_STATUS", + "ARM_FAIL_INFO", + "ARM_TIMEOUT0", + "ARM_TIMEOUT1", + "SET_EVENT_NOTIFICATION_STATUS", + "ENCRYPTION_CONFIG_STATUS" + }; + + for (uint32_t i = 0; i < i_buflen && + i < sizeof(l_registers) / sizeof(l_registers[0]); ++i) + { + i_parser.PrintNumber(l_registers[i], "%02X", l_databuf[i]); + } + } + + // Disabled + UdParserNvdimmOPParms(const UdParserNvdimmOPParms&) = delete; + UdParserNvdimmOPParms & operator=(UdParserNvdimmOPParms &) = delete; +}; + } // end NVDIMM namespace #endif diff --git a/src/usr/isteps/nvdimm/plugins/nvdimmUdParserFactory.H b/src/usr/isteps/nvdimm/plugins/nvdimmUdParserFactory.H index b27774b13..f208ac060 100644 --- a/src/usr/isteps/nvdimm/plugins/nvdimmUdParserFactory.H +++ b/src/usr/isteps/nvdimm/plugins/nvdimmUdParserFactory.H @@ -38,14 +38,14 @@ namespace NVDIMM { registerParser<NVDIMM::UdParserNvdimmParms> (NVDIMM_UDT_PARAMETERS); + registerParser<NVDIMM::UdParserNvdimmOPParms> + (NVDIMM_OP_PARAMETERS); } - private: - - UserDetailsParserFactory(const UserDetailsParserFactory &); - UserDetailsParserFactory & operator= - (const UserDetailsParserFactory &); + UserDetailsParserFactory(const UserDetailsParserFactory &) = delete; + UserDetailsParserFactory & operator=(UserDetailsParserFactory &) = delete; }; + }; #endif diff --git a/src/usr/isteps/nvdimm/runtime/nvdimm_rt.C b/src/usr/isteps/nvdimm/runtime/nvdimm_rt.C index 267fab07c..e8ad1d9e9 100644 --- a/src/usr/isteps/nvdimm/runtime/nvdimm_rt.C +++ b/src/usr/isteps/nvdimm/runtime/nvdimm_rt.C @@ -25,446 +25,1032 @@ /** * @file nvdimm_rt.C * - * @brief NVDIMM functions only needed for runtime + * @brief NVDIMM functions only needed for runtime. These functions include + * but are not limited to arming/disarming the NVDIMM along with methods + * to poll the arming and check the status of the arming. Checking the + * error state of the NVDIMM, getting a random number with the darn + * instruction and checking the ES or NVM health status. */ + +/// BPM - Backup Power Module + #include <trace/interface.H> #include <errl/errlentry.H> #include <errl/errlmanager.H> +#include <errl/errludstring.H> #include <util/runtime/rt_fwreq_helper.H> #include <targeting/common/attributes.H> #include <targeting/common/commontargeting.H> #include <targeting/common/util.H> #include <targeting/common/utilFilter.H> -#include <usr/runtime/rt_targeting.H> +#include <targeting/runtime/rt_targeting.H> #include <runtime/interface.h> +#include <arch/ppc.H> #include <isteps/nvdimm/nvdimmreasoncodes.H> +#include "../errlud_nvdimm.H" +#include "../nvdimmErrorLog.H" #include <isteps/nvdimm/nvdimm.H> // implements some of these #include "../nvdimm.H" // for g_trac_nvdimm +#include <sys/time.h> //#define TRACUCOMP(args...) TRACFCOMP(args) #define TRACUCOMP(args...) +using namespace TARGETING; +using namespace ERRORLOG; + namespace NVDIMM { +static constexpr uint64_t DARN_ERROR_CODE = 0xFFFFFFFFFFFFFFFFull; +static constexpr uint32_t MAX_DARN_ERRORS = 10; + /** -* @brief Notify PHYP of NVDIMM OCC protection status -*/ -errlHndl_t notifyNvdimmProtectionChange(TARGETING::Target* i_target, - const nvdimm_protection_t i_state) + * @brief Check nvdimm error state + * + * @param[in] i_nvdimm - nvdimm target + * + * @return bool - true if nvdimm is in any error state, false otherwise + */ +bool nvdimmInErrorState(Target *i_nvdimm) { - errlHndl_t l_err = nullptr; + TRACUCOMP(g_trac_nvdimm, ENTER_MRK"nvdimmInErrorState() HUID[%X]",get_huid(i_nvdimm)); - // default to send a not protected status - uint64_t l_nvdimm_protection_state = - hostInterfaces::HBRT_FW_NVDIMM_NOT_PROTECTED; + uint8_t l_statusFlag = i_nvdimm->getAttr<ATTR_NV_STATUS_FLAG>(); + bool l_ret = true; - TRACFCOMP( g_trac_nvdimm, ENTER_MRK - "notifyNvdimmProtectionChange: Target huid 0x%.8X, state %d", - get_huid(i_target), i_state); - do + // Just checking bit 1 for now, need to investigate these + // Should be checking NVDIMM_ARMED instead + if ((l_statusFlag & NSTD_VAL_ERASED) == 0) { - TARGETING::TargetHandleList l_nvdimmTargetList = - TARGETING::getProcNVDIMMs(i_target); + l_ret = false; + } - // Only send command if the processor has an NVDIMM under it - if (l_nvdimmTargetList.empty()) + // Also check the encryption error status + Target* l_sys = nullptr; + targetService().getTopLevelTarget( l_sys ); + assert(l_sys, "nvdimmInErrorState: no TopLevelTarget"); + if (l_sys->getAttr<ATTR_NVDIMM_ENCRYPTION_ENABLE>()) + { + ATTR_NVDIMM_ARMED_type l_armed_state = {}; + l_armed_state = i_nvdimm->getAttr<ATTR_NVDIMM_ARMED>(); + if (l_armed_state.encryption_error_detected) { - TRACFCOMP( g_trac_nvdimm, - "notifyNvdimmProtectionChange: No NVDIMM found under processor 0x%.8X", - get_huid(i_target)); - break; + l_ret = true; } + } + + TRACUCOMP(g_trac_nvdimm, EXIT_MRK"nvdimmInErrorState() HUID[%X]",get_huid(i_nvdimm)); + return l_ret; +} + - TARGETING::ATTR_NVDIMM_ARMED_type l_nvdimm_armed_state = - i_target->getAttr<TARGETING::ATTR_NVDIMM_ARMED>(); +// This could be made a generic utility +errlHndl_t nvdimm_getDarnNumber(size_t i_genSize, uint8_t* o_genData) +{ + assert(i_genSize % sizeof(uint64_t) == 0,"nvdimm_getDarnNumber() bad i_genSize"); - // Only notify protected state if NVDIMM controllers are - // armed and no error was or is detected - if (i_state == NVDIMM::PROTECTED) + errlHndl_t l_err = nullptr; + uint64_t* l_darnData = reinterpret_cast<uint64_t*>(o_genData); + + for (uint32_t l_loop = 0; l_loop < (i_genSize / sizeof(uint64_t)); l_loop++) + { + // Darn could return an error code + uint32_t l_darnErrors = 0; + + while (l_darnErrors < MAX_DARN_ERRORS) { - // Exit without notifying phyp if in error state - if (l_nvdimm_armed_state.error_detected) + // Get a 64-bit random number with the darn instruction + l_darnData[l_loop] = getDarn(); + + if ( l_darnData[l_loop] != DARN_ERROR_CODE ) { - // State can't go to protected after error is detected break; } - // check if we need to rearm the NVDIMM(s) - else if (!l_nvdimm_armed_state.armed) - { - bool nvdimms_armed = - NVDIMM::nvdimmArm(l_nvdimmTargetList); - if (nvdimms_armed) - { - // NVDIMMs are now armed and ready for backup - l_nvdimm_armed_state.armed = 1; - i_target->setAttr<TARGETING::ATTR_NVDIMM_ARMED>(l_nvdimm_armed_state); - - l_nvdimm_protection_state = hostInterfaces::HBRT_FW_NVDIMM_PROTECTED; - } - else - { - // If nvdimm arming failed, - // do NOT post that the dimms are now protected. - - // Remember this error, only try arming once - if (!l_nvdimm_armed_state.error_detected) - { - l_nvdimm_armed_state.error_detected = 1; - i_target->setAttr<TARGETING::ATTR_NVDIMM_ARMED>(l_nvdimm_armed_state); - } - - // Exit without notifying phyp of any protection change - break; - } - } else { - // NVDIMM already armed and no error found - l_nvdimm_protection_state = hostInterfaces::HBRT_FW_NVDIMM_PROTECTED; + l_darnErrors++; } } - else if (i_state == NVDIMM::UNPROTECTED_BECAUSE_ERROR) + + if (l_darnErrors == MAX_DARN_ERRORS) { - // Remember that this NV controller has an error so - // we don't rearm this until next IPL - if (!l_nvdimm_armed_state.error_detected) - { - l_nvdimm_armed_state.error_detected = 1; - i_target->setAttr<TARGETING::ATTR_NVDIMM_ARMED>(l_nvdimm_armed_state); - } - // still notify phyp that NVDIMM is Not Protected + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm_getDarnNumber() reached MAX_DARN_ERRORS"); + /*@ + *@errortype + *@reasoncode NVDIMM_ENCRYPTION_MAX_DARN_ERRORS + *@severity ERRORLOG_SEV_PREDICTIVE + *@moduleid NVDIMM_GET_DARN_NUMBER + *@userdata1 MAX_DARN_ERRORS + *@devdesc Error using darn instruction + *@custdesc NVDIMM encryption error + */ + l_err = new ERRORLOG::ErrlEntry( + ERRORLOG::ERRL_SEV_PREDICTIVE, + NVDIMM_GET_DARN_NUMBER, + NVDIMM_ENCRYPTION_MAX_DARN_ERRORS, + MAX_DARN_ERRORS, + ERRORLOG::ErrlEntry::NO_SW_CALLOUT ); + + l_err->collectTrace(NVDIMM_COMP_NAME); + break; } + } + return l_err; +} - // Get the Proc Chip Id - RT_TARG::rtChipId_t l_chipId = 0; - l_err = RT_TARG::getRtTarget(i_target, l_chipId); - if(l_err) +errlHndl_t nvdimm_getRandom(uint8_t* o_genData) +{ + errlHndl_t l_err = nullptr; + uint8_t l_xtraData[ENC_KEY_SIZE] = {0}; + + do + { + // Get a random number with the darn instruction + l_err = nvdimm_getDarnNumber(ENC_KEY_SIZE, o_genData); + if (l_err) { - TRACFCOMP( g_trac_nvdimm, - ERR_MRK"notifyNvdimmProtectionChange: getRtTarget ERROR" ); break; } - // send the notification msg - if ((nullptr == g_hostInterfaces) || - (nullptr == g_hostInterfaces->firmware_request)) + // Validate and update the random number + // Retry if more randomness required + do { - TRACFCOMP( g_trac_nvdimm, ERR_MRK"notifyNvdimmProtectionChange: " - "Hypervisor firmware_request interface not linked"); + //Get replacement data + l_err = nvdimm_getDarnNumber(ENC_KEY_SIZE, l_xtraData); + if (l_err) + { + break; + } + + }while (nvdimm_keyifyRandomNumber(o_genData, l_xtraData)); + + }while (0); + + return l_err; +} + +/* + * @brief Check the ES (enery source)/backup power module(BPM) health status of + * the individual NVDIMMs supplied in list + * + * @param[in] i_nvdimmTargetList - list of NVDIMMs to check the ES health of + * + * @return false if one or more NVDIMMs fail ES health check, else true + */ +bool nvDimmEsCheckHealthStatus(const TargetHandleList &i_nvdimmTargetList) +{ + TRACFCOMP(g_trac_nvdimm, ENTER_MRK"nvDimmEsCheckHealthStatus(): " + "Target list size(%d)", i_nvdimmTargetList.size()); + + // The minimum ES lifetime value + const uint8_t ES_LIFETIME_MINIMUM_REQUIREMENT = 0x62; // > 97% + + // The ES health check status flags for the different states of an + // ES health check + const uint8_t ES_HEALTH_CHECK_IN_PROGRESS_FLAG = 0x01; // bit 0 + const uint8_t ES_HEALTH_CHECK_SUCCEEDED_FLAG = 0x02; // bit 1 + const uint8_t ES_HEALTH_CHECK_FAILED_FLAG = 0x04; // bit 2 - // need to safely convert struct type into uint32_t - union { - TARGETING::ATTR_NVDIMM_ARMED_type tNvdimmArmed; - uint32_t nvdimmArmed_int; - } armed_state_union; - armed_state_union.tNvdimmArmed = l_nvdimm_armed_state; + // Handle to catch any errors + errlHndl_t l_err(nullptr); + + // The ES health check status from an ES health check call + uint8_t l_esHealthCheck(0); + + // Status of the accumulation of all calls related to the ES health check. + // If any one call is bad/fails, then this will be false, else it stays true + bool l_didEsHealthCheckPass(true); + + // Iterate thru the NVDIMMs checking the ES health status of each one. + // Going with the assumption that the caller waited the allotted time, + // roughly 20 to 30 minutes, after the start of an IPL. + // Success case: + // * ES health check initiated at start of the IPL, caller waited the + // allotted time (20 to 30 mins) before doing a health check, health + // check returned success and the lifetime meets the minimum threshold + // for a new BPM. + // Error cases are: + // * ES health check is in progress, will assume BPM is hung + // * ES health check failed + // * ES health check succeeded but lifetime does not meet a + // certain threshold + // * If none of the above apply (success case and other error cases), + // then assume the ES health check was never initiated at the start + // of the IPL + // For each of these error cases do a predictive callout + for (auto const l_nvdimm : i_nvdimmTargetList) + { + // Retrieve the Health Check status from the BPM + TRACFCOMP(g_trac_nvdimm, INFO_MRK"nvDimmEsCheckHealthStatus(): " + "Reading NVDIMM(0x%.8X) ES health check data, " + "register ES_CMD_STATUS0(0x%.2X)", + get_huid(l_nvdimm), ES_CMD_STATUS0); + + l_err = nvdimmReadReg(l_nvdimm, ES_CMD_STATUS0, l_esHealthCheck); + + if (l_err) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvDimmEsCheckHealthStatus(): " + "NVDIMM(0x%X) failed to read the ES health check " + "data, register ES_CMD_STATUS0(0x%.2X)", + get_huid(l_nvdimm), ES_CMD_STATUS0); + + l_err->setSev(ERRORLOG::ERRL_SEV_PREDICTIVE); + l_err->collectTrace(NVDIMM_COMP_NAME); + errlCommit(l_err, NVDIMM_COMP_ID); + + // Let the caller know something went amiss + l_didEsHealthCheckPass = false; + + // Proceed to next NVDIMM, better luck next time + continue; + } + + // Trace out the returned data for inspection + TRACFCOMP(g_trac_nvdimm, INFO_MRK"nvDimmEsCheckHealthStatus(): " + "NVDIMM(0x%X) returned value(0x%.2X) from the ES health " + "check data, register ES_CMD_STATUS0(0x%.2X)", + get_huid(l_nvdimm), l_esHealthCheck, ES_CMD_STATUS0); + + if (l_esHealthCheck & ES_HEALTH_CHECK_IN_PROGRESS_FLAG) + { + TRACFCOMP( g_trac_nvdimm, ERR_MRK"nvDimmEsCheckHealthStatus(): " + "Assuming caller waited the allotted time before " + "doing an ES health check on NVDIMM(0x%.8X), the BPM " + "is hung doing the ES health check.", + get_huid(l_nvdimm) ); /*@ * @errortype - * @severity ERRL_SEV_PREDICTIVE - * @moduleid NOTIFY_NVDIMM_PROTECTION_CHG - * @reasoncode NVDIMM_NULL_FIRMWARE_REQUEST_PTR - * @userdata1 HUID of processor target - * @userdata2[0:31] Requested protection state - * @userdata2[32:63] Current armed state - * @devdesc Unable to inform PHYP of NVDIMM protection - * @custdesc Internal firmware error + * @severity ERRL_SEV_PREDICTIVE + * @moduleid NVDIMM_ES_HEALTH_CHECK + * @reasoncode NVDIMM_ES_HEALTH_CHECK_IN_PROGRESS_FAILURE + * @userdata1 HUID of NVDIMM target + * @userdata2 ES health check status + * @devdesc Assuming caller waited the allotted time before + * doing an ES health check, then the BPM is hung doing + * the ES health check. + * @custdesc NVDIMM ES health check failed. */ - l_err = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_PREDICTIVE, - NOTIFY_NVDIMM_PROTECTION_CHG, - NVDIMM_NULL_FIRMWARE_REQUEST_PTR, - get_huid(i_target), - TWO_UINT32_TO_UINT64( - l_nvdimm_protection_state, - armed_state_union.nvdimmArmed_int) - ); - - l_err->addProcedureCallout(HWAS::EPUB_PRC_PHYP_CODE, + l_err = new ErrlEntry( ERRL_SEV_PREDICTIVE, + NVDIMM_ES_HEALTH_CHECK, + NVDIMM_ES_HEALTH_CHECK_IN_PROGRESS_FAILURE, + get_huid(l_nvdimm), + l_esHealthCheck, + ErrlEntry::NO_SW_CALLOUT ); + l_err->collectTrace(NVDIMM_COMP_NAME); + nvdimmAddVendorLog(l_nvdimm, l_err); + + // Add a BPM callout + l_err->addPartCallout( l_nvdimm, + HWAS::BPM_PART_TYPE, + HWAS::SRCI_PRIORITY_HIGH); + nvdimmAddPage4Regs(l_nvdimm,l_err); + // Collect the error + errlCommit(l_err, NVDIMM_COMP_ID); + + // Let the caller know something went amiss + l_didEsHealthCheckPass = false; + } + else if (l_esHealthCheck & ES_HEALTH_CHECK_FAILED_FLAG) + { + TRACFCOMP( g_trac_nvdimm, ERR_MRK"nvDimmEsCheckHealthStatus(): " + "Assuming caller waited the allotted time before " + "doing an ES health check on NVDIMM(0x%.8X), the BPM " + "reported a failure.", + get_huid(l_nvdimm) ); + + /*@ + * @errortype + * @severity ERRL_SEV_PREDICTIVE + * @moduleid NVDIMM_ES_HEALTH_CHECK + * @reasoncode NVDIMM_ES_HEALTH_CHECK_REPORTED_FAILURE + * @userdata1 HUID of NVDIMM target + * @userdata2 ES health check status + * @devdesc Assuming caller waited the allotted time before + * doing an ES health check, the BPM reported a failure + * while doing an ES health check. + * @custdesc NVDIMM ES health check failed. + */ + l_err = new ErrlEntry( ERRL_SEV_PREDICTIVE, + NVDIMM_ES_HEALTH_CHECK, + NVDIMM_ES_HEALTH_CHECK_REPORTED_FAILURE, + get_huid(l_nvdimm), + l_esHealthCheck, + ErrlEntry::NO_SW_CALLOUT ); + l_err->collectTrace(NVDIMM_COMP_NAME); + nvdimmAddVendorLog(l_nvdimm, l_err); + + // Add a BPM callout + l_err->addPartCallout( l_nvdimm, + HWAS::BPM_PART_TYPE, + HWAS::SRCI_PRIORITY_HIGH); + nvdimmAddPage4Regs(l_nvdimm,l_err); + // Collect the error + errlCommit(l_err, NVDIMM_COMP_ID); + + // Let the caller know something went amiss + l_didEsHealthCheckPass = false; + } + else if (l_esHealthCheck & ES_HEALTH_CHECK_SUCCEEDED_FLAG) + { + TRACFCOMP(g_trac_nvdimm, INFO_MRK"nvDimmEsCheckHealthStatus(): " + "Reading NVDIMM(0x%.8X) ES lifetime data, " + "register ES_LIFETIME(0x%.2X)", + get_huid(l_nvdimm), ES_LIFETIME); + + // The lifetime percentage + uint8_t l_lifetimePercentage(0); + + // Retrieve the Lifetime Percentage from the BPM + l_err = nvdimmReadReg(l_nvdimm, ES_LIFETIME, l_lifetimePercentage); + + if (l_err) + { + TRACFCOMP( g_trac_nvdimm, ERR_MRK"nvDimmEsCheckHealthStatus(): " + "NVDIMM(0x%.8X) failed to read the " + "ES_LIFETIME(0x%.2X) data", + get_huid(l_nvdimm), + ES_LIFETIME ); + + l_err->setSev(ERRORLOG::ERRL_SEV_PREDICTIVE); + l_err->collectTrace(NVDIMM_COMP_NAME); + errlCommit(l_err, NVDIMM_COMP_ID); + + // Let the caller know something went amiss + l_didEsHealthCheckPass = false; + } + else if (l_lifetimePercentage < ES_LIFETIME_MINIMUM_REQUIREMENT) + { + TRACFCOMP( g_trac_nvdimm, ERR_MRK"nvDimmEsCheckHealthStatus(): " + "ES health check on NVDIMM(0x%.8X) succeeded but " + "the BPM's lifetime(%d) does not meet the minimum " + "requirement(%d) needed to qualify as a new BPM.", + get_huid(l_nvdimm), + l_lifetimePercentage, + ES_LIFETIME_MINIMUM_REQUIREMENT ); + + /*@ + * @errortype + * @severity ERRL_SEV_PREDICTIVE + * @moduleid NVDIMM_ES_HEALTH_CHECK + * @reasoncode NVDIMM_ES_LIFETIME_MIN_REQ_NOT_MET + * @userdata1[00:31] HUID of NVDIMM target + * @userdata1[32:63] ES health check status + * @userdata2[00:31] Retrieved lifetime percentage + * @userdata2[32:63] lifetime minimum requirement + * @devdesc ES health check succeeded but the BPM's + * lifetime does not meet the minimum + * requirement needed to qualify as a + * new BPM. + * @custdesc NVDIMM ES health check failed + */ + l_err = new ErrlEntry( ERRL_SEV_PREDICTIVE, + NVDIMM_ES_HEALTH_CHECK, + NVDIMM_ES_LIFETIME_MIN_REQ_NOT_MET, + TWO_UINT32_TO_UINT64( + get_huid(l_nvdimm), + l_esHealthCheck), + TWO_UINT32_TO_UINT64( + l_lifetimePercentage, + ES_LIFETIME_MINIMUM_REQUIREMENT), + ErrlEntry::NO_SW_CALLOUT ); + l_err->collectTrace(NVDIMM_COMP_NAME); + nvdimmAddVendorLog(l_nvdimm, l_err); + + // Add a BPM callout + l_err->addPartCallout( l_nvdimm, + HWAS::BPM_PART_TYPE, HWAS::SRCI_PRIORITY_HIGH); + nvdimmAddPage4Regs(l_nvdimm,l_err); + // Collect the error + errlCommit(l_err, NVDIMM_COMP_ID); + + // Let the caller know something went amiss + l_didEsHealthCheckPass = false; + } // end else if (l_lifetimePercentage ... + else + { + TRACFCOMP( g_trac_nvdimm, ERR_MRK"nvDimmEsCheckHealthStatus(): " + "Success: ES health check on NVDIMM(0x%.8X) " + "succeeded and the BPM's lifetime(%d) meet's the " + "minimum requirement(%d) needed to qualify as " + "a new BPM.", + get_huid(l_nvdimm), + l_lifetimePercentage, + ES_LIFETIME_MINIMUM_REQUIREMENT ); + } + } // end else if (l_esHealthCheck & ES_HEALTH_CHECK_SUCCEEDED_FLAG) + else // Assume the ES health check was never initiated at + // the start of the IPL. + { + TRACFCOMP( g_trac_nvdimm, ERR_MRK"nvDimmEsCheckHealthStatus(): " + "The ES health check on NVDIMM(0x%.8X) shows no status " + "(in progress, fail or succeed) so assuming it was " + "never initiated at the start of the IPL.", + get_huid(l_nvdimm) ); + + /*@ + * @errortype + * @severity ERRL_SEV_PREDICTIVE + * @moduleid NVDIMM_ES_HEALTH_CHECK + * @reasoncode NVDIMM_ES_HEALTH_CHECK_NEVER_INITIATED + * @userdata1 HUID of NVDIMM target + * @userdata2 ES health check status + * @devdesc The ES health check shows no status (in progress, + * fail or succeed) so assuming it was never initiated + * at the start of the IPL. + * @custdesc NVDIMM ES health check failed. + */ + l_err = new ErrlEntry( ERRL_SEV_PREDICTIVE, + NVDIMM_ES_HEALTH_CHECK, + NVDIMM_ES_HEALTH_CHECK_NEVER_INITIATED, + get_huid(l_nvdimm), + l_esHealthCheck, + ErrlEntry::NO_SW_CALLOUT ); + l_err->collectTrace(NVDIMM_COMP_NAME); + nvdimmAddVendorLog(l_nvdimm, l_err); - break; + // Add a BPM callout + l_err->addPartCallout( l_nvdimm, + HWAS::BPM_PART_TYPE, + HWAS::SRCI_PRIORITY_HIGH); + nvdimmAddPage4Regs(l_nvdimm,l_err); + // Collect the error + errlCommit(l_err, NVDIMM_COMP_ID); + + // Let the caller know something went amiss + l_didEsHealthCheckPass = false; } + } // end for (auto const l_nvdimm : i_nvdimmTargetList) - TRACFCOMP( g_trac_nvdimm, - "notifyNvdimmProtectionChange: 0x%.8X processor NVDIMMS are " - "%s protected (current armed_state: 0x%02X)", - get_huid(i_target), - (l_nvdimm_protection_state == hostInterfaces::HBRT_FW_NVDIMM_PROTECTED)?"now":"NOT", - l_nvdimm_armed_state ); - - // Create the firmware_request request struct to send data - hostInterfaces::hbrt_fw_msg l_req_fw_msg; - memset(&l_req_fw_msg, 0, sizeof(l_req_fw_msg)); // clear it all - - // actual msg size (one type of hbrt_fw_msg) - uint64_t l_req_fw_msg_size = hostInterfaces::HBRT_FW_MSG_BASE_SIZE + - sizeof(l_req_fw_msg.nvdimm_protection_state); - - // Populate the firmware_request request struct with given data - l_req_fw_msg.io_type = - hostInterfaces::HBRT_FW_MSG_TYPE_NVDIMM_PROTECTION; - l_req_fw_msg.nvdimm_protection_state.i_procId = l_chipId; - l_req_fw_msg.nvdimm_protection_state.i_state = - l_nvdimm_protection_state; - - // Create the firmware_request response struct to receive data - hostInterfaces::hbrt_fw_msg l_resp_fw_msg; - uint64_t l_resp_fw_msg_size = sizeof(l_resp_fw_msg); - memset(&l_resp_fw_msg, 0, l_resp_fw_msg_size); - - // Make the firmware_request call - l_err = firmware_request_helper(l_req_fw_msg_size, - &l_req_fw_msg, - &l_resp_fw_msg_size, - &l_resp_fw_msg); - - } while (0); - - TRACFCOMP( g_trac_nvdimm, - EXIT_MRK "notifyNvdimmProtectionChange(%.8X, %d) - ERRL %.8X:%.4X", - get_huid(i_target), i_state, - ERRL_GETEID_SAFE(l_err), ERRL_GETRC_SAFE(l_err) ); + // Should not have any uncommitted errors + assert(l_err == NULL, "nvDimmEsCheckHealthStatus() - unexpected " + "uncommitted error found" ); - return l_err; -} + TRACFCOMP(g_trac_nvdimm, EXIT_MRK"nvDimmEsCheckHealthStatus(): " + "Returning %s", l_didEsHealthCheckPass == true ? "true" : "false"); + + return l_didEsHealthCheckPass; +} // end nvDimmEsCheckHealthStatus /** - * @brief This function polls the command status register for arm completion - * (does not indicate success or fail) + * @brief A wrapper around the call to nvDimmEsCheckHealthStatus * - * @param[in] i_nvdimm - nvdimm target with NV controller + * @see nvDimmEsCheckHealthStatus for more details * - * @param[out] o_poll - total polled time in ms - * - * @return errlHndl_t - Null if successful, otherwise a pointer to - * the error log. + * @return false if one or more NVDIMMs fail an ES health check, else true */ -errlHndl_t nvdimmPollArmDone(TARGETING::Target* i_nvdimm, - uint32_t &o_poll) +bool nvDimmEsCheckHealthStatusOnSystem() { - TRACUCOMP(g_trac_nvdimm, ENTER_MRK"nvdimmPollArmDone() nvdimm[%X]", TARGETING::get_huid(i_nvdimm) ); + TRACFCOMP(g_trac_nvdimm, ENTER_MRK"nvDimmEsCheckHealthStatusOnSystem()"); - errlHndl_t l_err = nullptr; + // Get the list of NVDIMM Targets from the system + TargetHandleList l_nvDimmTargetList; + nvdimm_getNvdimmList(l_nvDimmTargetList); - l_err = nvdimmPollStatus ( i_nvdimm, ARM, o_poll); + // Return status of doing a check health status + bool l_didEsHealthCheckPass = nvDimmEsCheckHealthStatus(l_nvDimmTargetList); - TRACUCOMP(g_trac_nvdimm, EXIT_MRK"nvdimmPollArmDone() nvdimm[%X]", - TARGETING::get_huid(i_nvdimm)); + TRACFCOMP(g_trac_nvdimm, EXIT_MRK"nvDimmEsCheckHealthStatusOnSystem(): " + "Returning %s", l_didEsHealthCheckPass == true ? "true" : "false" ); - return l_err; -} + return l_didEsHealthCheckPass; +} // end nvDimmCheckHealthStatusOnSystem -/** - * @brief This function checks the arm status register to make sure - * the trigger has been armed to ddr_reset_n +/* + * @brief Check the bad flash block percentage against a given maximum allowed. * - * @param[in] i_nvdimm - nvdimm target with NV controller + * @details This returns a tristate - 1 pass, 2 different fails + * If true is returned, then the check passed and + * o_badFlashBlockPercentage will contain what the retrieved + * flash block percentage is. + * If false is returned and the o_badFlashBlockPercentage is zero, then + * the check failed because of a register read fail + * If false is returned and the o_badFlashBlockPercentage is not zero, + * then the check failed because the retrieved bad flash block + * percentage exceeds the given maximum allowed * - * @return errlHndl_t - Null if successful, otherwise a pointer to - * the error log. + * @param[in] i_nvDimm - The NVDIMM to check + * @param[in] i_maxPercentageAllowed - The maximum percentage of bad flash + * block allowed + * @param[out] o_badFlashBlockPercentage - The retrieved bad flash block + * percentage from i_nvDimm, if no + * register read error. + * + * @return false if check failed or register read failed, else true */ -errlHndl_t nvdimmCheckArmSuccess(TARGETING::Target *i_nvdimm) +bool nvDimmCheckBadFlashBlockPercentage(TargetHandle_t i_nvDimm, + const uint8_t i_maxPercentageAllowed, + uint8_t &o_badFlashBlockPercentage) { - TRACUCOMP(g_trac_nvdimm, ENTER_MRK"nvdimmCheckArmSuccess() nvdimm[%X]", - TARGETING::get_huid(i_nvdimm)); + // Cache the HUID of the NVDIMM + uint32_t l_nvDimmHuid = get_huid( i_nvDimm ); - errlHndl_t l_err = nullptr; - uint8_t l_data = 0; + TRACFCOMP(g_trac_nvdimm, ENTER_MRK"nvDimmCheckBadFlashBlockPercentage(): " + "NVDIMM(0x%.4X), max bad flash blocks allowed(%d)", + l_nvDimmHuid, + i_maxPercentageAllowed); + + // The status of the check on the bad block percentage + bool l_didBadFlashBlockPercentageCheckPass(true); + + // The retrieved flash block percentage from register, initialize to zero + o_badFlashBlockPercentage = 0; + + // Handle to catch any errors + errlHndl_t l_err(nullptr); + + // Retrieve the percentage of bad blocks and validate + TRACDCOMP(g_trac_nvdimm, INFO_MRK"nvDimmCheckBadFlashBlockPercentage(): " + "Reading NVDIMM(0x%.8X) percentage of bad blocks from " + "register FLASH_BAD_BLK_PCT(0x%.4X)", + l_nvDimmHuid, FLASH_BAD_BLK_PCT); - l_err = nvdimmReadReg(i_nvdimm, ARM_STATUS, l_data); + l_err = nvdimmReadReg(i_nvDimm, + FLASH_BAD_BLK_PCT, + o_badFlashBlockPercentage); if (l_err) { - TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmCheckArmSuccess() nvdimm[%X]" - "failed to read arm status reg!",TARGETING::get_huid(i_nvdimm)); + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvDimmCheckBadFlashBlockPercentage(): " + "FAIL: NVDIMM(0x%.8X) failed to read the percentage of " + "bad blocks from register FLASH_BAD_BLK_PCT(0x%.4X), " + "marking as a fail", + l_nvDimmHuid, FLASH_BAD_BLK_PCT); + + l_err->setSev(ERRORLOG::ERRL_SEV_PREDICTIVE); + l_err->collectTrace(NVDIMM_COMP_NAME); + errlCommit(l_err, NVDIMM_COMP_ID); + + // Set up the fail state, so caller can determine that the fail was + // due to a register read error + l_didBadFlashBlockPercentageCheckPass = false; + o_badFlashBlockPercentage = 0; } - else if ((l_data & ARM_SUCCESS) != ARM_SUCCESS) + else { + // Trace out the returned data for inspection + TRACDCOMP(g_trac_nvdimm, INFO_MRK"nvDimmCheckBadFlashBlockPercentage(): " + "NVDIMM(0x%.8X) returned value (%d) from the " + "percentage of bad blocks, register " + "FLASH_BAD_BLK_PCT(0x%.4X)", + l_nvDimmHuid, + o_badFlashBlockPercentage, + FLASH_BAD_BLK_PCT); - TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmCheckArmSuccess() nvdimm[%X]" - "failed to arm!",TARGETING::get_huid(i_nvdimm)); - /*@ - *@errortype - *@reasoncode NVDIMM_ARM_FAILED - *@severity ERRORLOG_SEV_PREDICTIVE - *@moduleid NVDIMM_SET_ARM - *@userdata1[0:31] Related ops (0xff = NA) - *@userdata1[32:63] Target Huid - *@userdata2 <UNUSED> - *@devdesc Encountered error arming the catastrophic save - * trigger on NVDIMM. Make sure an energy source - * is connected to the NVDIMM and the ES policy - * is set properly - *@custdesc NVDIMM encountered error arming save trigger - */ - l_err = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_PREDICTIVE, - NVDIMM_SET_ARM, - NVDIMM_ARM_FAILED, - TWO_UINT32_TO_UINT64(ARM, TARGETING::get_huid(i_nvdimm)), - 0x0, - ERRORLOG::ErrlEntry::NO_SW_CALLOUT ); - - l_err->collectTrace(NVDIMM_COMP_NAME, 256 ); - - // Failure to arm could mean internal NV controller error or - // even error on the battery pack. NVDIMM will lose persistency - // if failed to arm trigger - l_err->addPartCallout( i_nvdimm, - HWAS::NV_CONTROLLER_PART_TYPE, - HWAS::SRCI_PRIORITY_HIGH); - l_err->addPartCallout( i_nvdimm, - HWAS::BPM_PART_TYPE, - HWAS::SRCI_PRIORITY_MED); - l_err->addPartCallout( i_nvdimm, - HWAS::BPM_CABLE_PART_TYPE, - HWAS::SRCI_PRIORITY_MED); - } + // Check to see if the bad flash block percentage + // exceeds maximum allowed. + if (o_badFlashBlockPercentage > i_maxPercentageAllowed) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvDimmCheckBadFlashBlockPercentage(): " + "FAIL: For NVDIMM (0x%.8X), the percentage of bad " + "flash blocks (%d), read from register " + "FLASH_BAD_BLK_PCT(0x%.4X), exceeds the maximum " + "percentage of bad flash blocks allowed (%d), marking " + "this as a fail", + l_nvDimmHuid, + o_badFlashBlockPercentage, + FLASH_BAD_BLK_PCT, + i_maxPercentageAllowed); - TRACUCOMP(g_trac_nvdimm, EXIT_MRK"nvdimmCheckArmSuccess() nvdimm[%X] ret[%X]", - TARGETING::get_huid(i_nvdimm), l_data); + // Set up the fail state, so caller can determine that the fail was + // due to percentage exceeding the max percentage allowed. + // Note: Leave the value in o_badFlashBlockPercentage so caller + // can inspect, if they wish + l_didBadFlashBlockPercentageCheckPass = false; + } + else + { + TRACFCOMP(g_trac_nvdimm, INFO_MRK"nvDimmCheckBadFlashBlockPercentage(): " + "SUCCESS: For NVDIMM (0x%.8X), the percentage of bad " + "flash blocks (%d) is less than or meets the maximum " + "percentage of bad flash blocks allowed (%d), " + "marking this as a pass", + l_nvDimmHuid, + o_badFlashBlockPercentage, + i_maxPercentageAllowed); - return l_err; + // Set up the pass state + // Note: Leave the value in o_badFlashBlockPercentage so caller + // can inspect, if they wish + l_didBadFlashBlockPercentageCheckPass = true; + } // end if (l_badFlashBlockPercentage > i_maxPercentageAllowed) + } // end if (l_err) ... else + + TRACFCOMP(g_trac_nvdimm, EXIT_MRK"nvDimmCheckBadFlashBlockPercentage(): " + "Returning %s", + l_didBadFlashBlockPercentageCheckPass == true ? "true" : "false" ); + + return l_didBadFlashBlockPercentageCheckPass; } -bool nvdimmArm(TARGETING::TargetHandleList &i_nvdimmTargetList) +/* + * @brief Check the flash error count against a given maximum allowed. + * + * @details This returns a tristate - 1 pass, 2 different fails + * If true is returned, then the check passed and + * o_readFlashErrorCount will contain what the retrieved + * flash error count is. + * If false is returned and the o_readFlashErrorCount is zero, then + * the check failed because of a register read fail + * If false is returned and the o_readFlashErrorCount is not zero, + * then the check failed because the retrieved flash error + * count exceeds the given maximum allowed + * + * @param[in] i_nvDimm - The NVDIMM to check + * @param[in] i_maxFlashErrorsAllowed - The maximum number of flash errors + * allowed + * @param[out] o_readFlashErrorCount - The retrieved bad flash error + * count from i_nvDimm, if no + * register read error. + * + * @return false if check failed or register read failed, else true + */ +bool nvDimmCheckFlashErrorCount(TargetHandle_t i_nvDimm, + const uint32_t i_maxFlashErrorsAllowed, + uint32_t &o_readFlashErrorCount) { - bool o_arm_successful = true; + // Cache the HUID of the NVDIMM + uint32_t l_nvDimmHuid = get_huid( i_nvDimm ); - TRACFCOMP(g_trac_nvdimm, ENTER_MRK"nvdimmArm() %d", - i_nvdimmTargetList.size()); + TRACFCOMP(g_trac_nvdimm, ENTER_MRK"nvDimmCheckFlashErrorCount(): " + "NVDIMM(0x%.4X), max flash errors allowed(%d)", + l_nvDimmHuid, + i_maxFlashErrorsAllowed); - errlHndl_t l_err = nullptr; + // The status of the check on the flash error count + bool l_didFlashErrorCountCheckPass(true); - for (auto const l_nvdimm : i_nvdimmTargetList) + // The retrieved flash error count from register, initialize to zero + o_readFlashErrorCount = 0; + + // Handle to catch any errors + errlHndl_t l_err(nullptr); + + // The retrieved flash error count from a register + uint8_t l_readFlashErrorCountByte(0); + + // Read the flash error count registers starting from MSB to LSB + for (int16_t l_flashErrorRegister = FLASH_ERROR_COUNT2; + l_flashErrorRegister >= FLASH_ERROR_COUNT0; + --l_flashErrorRegister) { - // skip if the nvdimm is in error state - if (NVDIMM::nvdimmInErrorState(l_nvdimm)) - { - // error state means arming not successful - o_arm_successful = false; - continue; - } + // Reset this for every iteration, may be redundant + l_readFlashErrorCountByte = 0; + + TRACDCOMP(g_trac_nvdimm, INFO_MRK"nvDimmCheckFlashErrorCount(): " + "Reading NVDIMM(0x%.8X) flash error count from " + "register FLASH_ERROR_COUNT(0x%.4X)", + l_nvDimmHuid, l_flashErrorRegister); + + l_err = nvdimmReadReg(i_nvDimm, + static_cast<i2cReg >(l_flashErrorRegister), + l_readFlashErrorCountByte); - l_err = nvdimmSetESPolicy(l_nvdimm); if (l_err) { - o_arm_successful = false; - nvdimmSetStatusFlag(l_nvdimm, NSTD_ERR_NOBKUP); + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvDimmCheckFlashErrorCount(): " + "FAIL: NVDIMM(0x%.8X) failed to read flash error " + "count from register FLASH_ERROR_COUNT(0x%.4X) " + "marking as a fail", + l_nvDimmHuid, l_flashErrorRegister); - // Committing the error as we don't want this to interrupt - // the boot. This will notify the user that action is needed - // on this module l_err->setSev(ERRORLOG::ERRL_SEV_PREDICTIVE); - l_err->collectTrace(NVDIMM_COMP_NAME, 1024); - errlCommit( l_err, NVDIMM_COMP_ID ); - continue; + l_err->collectTrace(NVDIMM_COMP_NAME); + errlCommit(l_err, NVDIMM_COMP_ID); + + // Set up the fail state, so caller can determine that the fail was + // due to a register read error + l_didFlashErrorCountCheckPass = false; + o_readFlashErrorCount = 0; + + break; } - l_err = NVDIMM::nvdimmChangeArmState(l_nvdimm, ARM_TRIGGER); - // If we run into any error here we will just - // commit the error log and move on. Let the - // system continue to boot and let the user - // salvage the data - if (l_err) + // If we get here, then the read was successful + // Append the read flash error count byte to the LSB of the + // aggregated flash error count bytes. + o_readFlashErrorCount = (o_readFlashErrorCount << 8) | + l_readFlashErrorCountByte; + + TRACDCOMP(g_trac_nvdimm, INFO_MRK"nvDimmCheckFlashErrorCount(): " + "NVDIMM(0x%.8X) returned value (0x%.2X) from the " + "partial flash error count, register " + "FLASH_ERROR_COUNT(0x%.4X)", + l_nvDimmHuid, + l_readFlashErrorCountByte, + l_flashErrorRegister); + + } // end for (int16_t l_flashErrorRegister = FLASH_ERROR_COUNT2; ... + + // If o_readFlashErrorCount is not zero, then register read was successful + if (o_readFlashErrorCount) + { + TRACDCOMP(g_trac_nvdimm, INFO_MRK"nvDimmCheckFlashErrorCount(): " + "NVDIMM(0x%.8X) flash error count = %d ", + l_nvDimmHuid, o_readFlashErrorCount); + + // Check the validity of the flash error count + if (o_readFlashErrorCount > i_maxFlashErrorsAllowed) { - NVDIMM::nvdimmSetStatusFlag(l_nvdimm, NVDIMM::NSTD_ERR_NOBKUP); - // Committing the error as we don't want this to interrupt - // the boot. This will notify the user that action is needed - // on this module - l_err->setSev(ERRORLOG::ERRL_SEV_PREDICTIVE); - l_err->collectTrace(NVDIMM_COMP_NAME, 1024); - errlCommit( l_err, NVDIMM_COMP_ID ); - o_arm_successful = false; - continue; + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvDimmCheckFlashErrorCount(): " + "FAIL: For NVDIMM (0x%.8X), the flash error count (%d), " + "read from registers FLASH_ERROR_COUNT0(0x%.4X), " + "FLASH_ERROR_COUNT1(0x%.4X) and FLASH_ERROR_COUNT2(0x%.4X), " + "exceeds the maximum number of flash " + "errors allowed (%d), marking this as a fail", + l_nvDimmHuid, + o_readFlashErrorCount, + FLASH_ERROR_COUNT0, + FLASH_ERROR_COUNT1, + FLASH_ERROR_COUNT2, + i_maxFlashErrorsAllowed); + + // Set up the fail state, so caller can determine that the fail was + // due to error count exceeding the max errors allowed. + // Note: Leave the value in o_readFlashErrorCount so caller + // can inspect, if they wish + l_didFlashErrorCountCheckPass = false; } + else + { + TRACFCOMP(g_trac_nvdimm, INFO_MRK"nvDimmCheckFlashErrorCount(): " + "SUCCESS: For NVDIMM(0x%.8X), the flash error counts " + "(%d) is less than or meets the maximum number of " + "errors allowed (%d), marking this as a pass", + l_nvDimmHuid, + o_readFlashErrorCount, + i_maxFlashErrorsAllowed); - // Arm happens one module at a time. No need to set any offset on the counter - uint32_t l_poll = 0; - l_err = nvdimmPollArmDone(l_nvdimm, l_poll); - if (l_err) + // Set up the pass state + // Note: Leave the value in o_readFlashErrorCount so caller + // can inspect, if they wish + l_didFlashErrorCountCheckPass = true; + } + } // end if (o_readFlashErrorCount) + + TRACFCOMP(g_trac_nvdimm, EXIT_MRK"nvDimmCheckFlashErrorCount(): " + "Returning %s", + l_didFlashErrorCountCheckPass == true ? "true" : "false" ); + + return l_didFlashErrorCountCheckPass; +} + +/* + * @brief Check the NVM (non-volatile memory)/flash health of the individual + * NVDIMMs supplied in list. + * + * @param[in] i_nvdimmTargetList - list of NVDIMMs to check the health of flash + * + * @return false if one or more NVDIMMs fail NVM health check, else true + */ +bool nvDimmNvmCheckHealthStatus(const TargetHandleList &i_nvDimmTargetList) +{ + TRACFCOMP(g_trac_nvdimm, ENTER_MRK"nvDimmNvmCheckHealthStatus(): " + "Target list size(%d)", i_nvDimmTargetList.size()); + + // The following maximums are the same values used by SMART's + // manufacturing and recommended that we use. + // The maximum percentage of bad flash blocks + // Fail if over 19% of bad flash blocks is encountered + const uint8_t MAXIMUM_PERCENTAGE_OF_BAD_FLASH_BLOCKS_ALLOWED = 19; + // The maximum number of flash memory errors allowed + // Fail if over 300 flash memory errors is encountered + const uint32_t MAXIMUM_NUMBER_OF_FLASH_MEMORY_ERRORS_ALLOWED = 300; + + // Status of the accumulation of all calls related to the NVM health check. + // If any one call is bad/fails, then this will be false, else it stays true + bool l_didNvmHealthCheckPass(true); + + // Handle to catch any errors + errlHndl_t l_err(nullptr); + + // The retrieved flash block percentage from register + uint8_t l_badFlashBlockPercentage(0); + // The retrieved flash error count from register + uint32_t l_flashErrorCount(0); + + // The status of the checks on the percentage of bad blocks and + // flash error count + // Default to true + bool l_badFlashBlockPercentageCheckPassed(true); + bool l_flashErrorCountCheckPassed(true); + + // Iterate thru the supplied NVDIMMs checking the health of the NVM + for (auto const l_nvDimm : i_nvDimmTargetList) + { + // Cache the HUID of the NVDIMM + uint32_t l_nvDimmHuid = get_huid( l_nvDimm ); + + // Reset these for every NVDIMM that is checked + l_badFlashBlockPercentage = 0; + l_flashErrorCount = 0; + l_badFlashBlockPercentageCheckPassed = true; + l_flashErrorCountCheckPassed = true; + + // Check the validity of bad flash block percentage + if (!nvDimmCheckBadFlashBlockPercentage( + l_nvDimm, + MAXIMUM_PERCENTAGE_OF_BAD_FLASH_BLOCKS_ALLOWED, + l_badFlashBlockPercentage)) { - NVDIMM::nvdimmSetStatusFlag(l_nvdimm, NVDIMM::NSTD_ERR_NOBKUP); - // Committing the error as we don't want this to interrupt - // the boot. This will notify the user that action is needed - // on this module - l_err->setSev(ERRORLOG::ERRL_SEV_PREDICTIVE); - l_err->collectTrace(NVDIMM_COMP_NAME, 1024); - errlCommit( l_err, NVDIMM_COMP_ID ); - o_arm_successful = false; - continue; + // Set this to false to indicate that the overall check on the + // NVDIMMs had at least one failure + l_didNvmHealthCheckPass = false; + + // If no data in the variable l_badFlashBlockPercentage, then + // this is a read register fail. Move onto the next NVDIMM + // this is a dud + if (!l_badFlashBlockPercentage) + { + continue; + } + + // Set the check to false, to facilitate error reporting + l_badFlashBlockPercentageCheckPassed = false; } - l_err = nvdimmCheckArmSuccess(l_nvdimm); - if (l_err) + // Check the validity of the flash error count + if (!nvDimmCheckFlashErrorCount( + l_nvDimm, + MAXIMUM_NUMBER_OF_FLASH_MEMORY_ERRORS_ALLOWED, + l_flashErrorCount)) { - NVDIMM::nvdimmSetStatusFlag(l_nvdimm, NVDIMM::NSTD_ERR_NOBKUP); - // Committing the error as we don't want this to interrupt - // the boot. This will notify the user that action is needed - // on this module - l_err->setSev(ERRORLOG::ERRL_SEV_PREDICTIVE); - l_err->collectTrace(NVDIMM_COMP_NAME, 1024); - errlCommit( l_err, NVDIMM_COMP_ID ); - o_arm_successful = false; - continue; + // Set this to false to indicate that the overall check on the + // NVDIMMs had at least one failure + l_didNvmHealthCheckPass = false; + + // If no data in the variable l_flashErrorCount, then + // this is a read register fail. Move onto the next NVDIMM + // this is a dud + if (!l_flashErrorCount) + { + continue; + } + + // Set the check to false, to facilitate error reporting + l_flashErrorCountCheckPassed = false; } - // After arming the trigger, erase the image to prevent the possible - // stale image getting the restored on the next boot in case of failed - // save. - l_err = nvdimmEraseNF(l_nvdimm); - if (l_err) + /// Now we assess the health of the flash based on data gathered above + if ( !l_badFlashBlockPercentageCheckPassed || + !l_flashErrorCountCheckPassed ) { - NVDIMM::nvdimmSetStatusFlag(l_nvdimm, NVDIMM::NSTD_ERR_NOBKUP); - // Committing the error as we don't want this to interrupt - // the boot. This will notify the user that action is needed - // on this module - l_err->setSev(ERRORLOG::ERRL_SEV_PREDICTIVE); - l_err->collectTrace(NVDIMM_COMP_NAME, 1024); - errlCommit( l_err, NVDIMM_COMP_ID ); - o_arm_successful = false; + // First set the NVDIMM HUID to the first 32 bits of user data 1 + uint64_t l_badFlashBlockPercentageUserData1 = + TWO_UINT32_TO_UINT64(l_nvDimmHuid, 0); - // If the erase failed let's disarm the trigger - l_err = nvdimmChangeArmState(l_nvdimm, DISARM_TRIGGER); - if (l_err) + // If an issue with the bad flash block percentage, then append + // data to user data 1 + if (!l_badFlashBlockPercentageCheckPassed && + l_badFlashBlockPercentage) { - TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmArm() nvdimm[%X], error disarming the nvdimm!", - TARGETING::get_huid(l_nvdimm)); - l_err->setSev(ERRORLOG::ERRL_SEV_PREDICTIVE); - l_err->collectTrace(NVDIMM_COMP_NAME, 1024); - errlCommit(l_err, NVDIMM_COMP_ID); + // Setting the HUID here is redundant but easier than trying to + // do some clever code that will set the HUID for user data 1 + // when this path is not taken, but the next check on the flash + // error count is taken + l_badFlashBlockPercentageUserData1 = + TWO_UINT32_TO_UINT64(l_nvDimmHuid, + TWO_UINT16_TO_UINT32( + l_badFlashBlockPercentage, + MAXIMUM_PERCENTAGE_OF_BAD_FLASH_BLOCKS_ALLOWED)); } - continue; + // If an issue with the flash error count, then set user + // data 2 to contain the flash error count value + uint64_t l_flashErrorCountUserData2(0); + if (!l_flashErrorCountCheckPassed && + l_flashErrorCount) + { + l_flashErrorCountUserData2 = + TWO_UINT32_TO_UINT64(l_flashErrorCount, + MAXIMUM_NUMBER_OF_FLASH_MEMORY_ERRORS_ALLOWED); + } + + /*@ + * @errortype + * @severity ERRL_SEV_PREDICTIVE + * @moduleid NVDIMM_NVM_HEALTH_CHECK + * @reasoncode NVDIMM_NVM_HEALTH_CHECK_FAILED + * @userdata1[0:31] HUID of NVDIMM target + * @userdata1[32:47] The retrieved bad flash block percentage, + * if error with, else 0 + * @userdata1[48:63] The maximum percentage of bad flash blocks + * allowed, if bad flash block percentage + * exceeds this maximum, else 0 + * @userdata2[0:31] The retrieved flash error count, + * if error with, else 0 + * @userdata2[32:63] The maximum number of flash errors + * allowed, if flash error exceeds this + * maximum, else 0 + * @devdesc Either the NVDIMM NVM bad flash block + * percentage exceeded the maximum percentage + * allowed or the NVDIMM NVM number of flash + * error exceeds the maximum count allowed + * or both. + * @custdesc NVDIMM NVM health check failed. + */ + l_err = new ErrlEntry( ERRL_SEV_PREDICTIVE, + NVDIMM_NVM_HEALTH_CHECK, + NVDIMM_NVM_HEALTH_CHECK_FAILED, + l_badFlashBlockPercentageUserData1, + l_flashErrorCountUserData2, + ErrlEntry::NO_SW_CALLOUT ); + + l_err->collectTrace(NVDIMM_COMP_NAME); + nvdimmAddVendorLog(l_nvDimm, l_err); + + // Add a DIMM callout + l_err->addHwCallout( l_nvDimm, + HWAS::SRCI_PRIORITY_HIGH, + HWAS::NO_DECONFIG, + HWAS::GARD_NULL ); + + // Collect the error + errlCommit(l_err, NVDIMM_COMP_ID); + + // Let the caller know something went amiss + l_didNvmHealthCheckPass = false; } - } + else + { + // This NVDIMM passed the NVM health check + TRACFCOMP(g_trac_nvdimm, INFO_MRK"nvDimmNvmCheckHealthStatus(): " + "Success: NVDIMM (0x%.8X) passed the NVM health check.", + l_nvDimmHuid); + } // end if ( !l_badFlashBlockPercentageCheckPassed .. else + } // end for (auto const l_nvdimm : i_nvdimmTargetList) - TRACFCOMP(g_trac_nvdimm, EXIT_MRK"nvdimmArm() returning %d", - o_arm_successful); - return o_arm_successful; -} + // Should not have any uncommitted errors + assert(l_err == NULL, "nvDimmNvmCheckHealthStatus() - unexpected " + "uncommitted error found"); + + TRACFCOMP(g_trac_nvdimm,EXIT_MRK"nvDimmNvmCheckHealthStatus(): Returning %s", + l_didNvmHealthCheckPass == true ? "true" : "false" ); + + return l_didNvmHealthCheckPass; +} // end nvDimmNvmCheckHealthStatus /** - * @brief Check nvdimm error state + * @brief A wrapper around the call to nvDimmNvmCheckHealthStatus * - * @param[in] i_nvdimm - nvdimm target + * @see nvDimmNvmCheckHealthStatus for more details * - * @return bool - true if nvdimm is in any error state, false otherwise + * @return false if one or more NVDIMMs fail an NVM health check, else true */ -bool nvdimmInErrorState(TARGETING::Target *i_nvdimm) +bool nvDimmNvmCheckHealthStatusOnSystem() { - TRACUCOMP(g_trac_nvdimm, ENTER_MRK"nvdimmInErrorState() HUID[%X]",TARGETING::get_huid(i_nvdimm)); + TRACFCOMP(g_trac_nvdimm, ENTER_MRK"nvDimmNvmCheckHealthStatusOnSystem()"); - uint8_t l_statusFlag = i_nvdimm->getAttr<TARGETING::ATTR_NV_STATUS_FLAG>(); - bool l_ret = true; + // Get the list of NVDIMM Targets from the system + TargetHandleList l_nvDimmTargetList; + nvdimm_getNvdimmList(l_nvDimmTargetList); - if ((l_statusFlag & NSTD_ERR) == 0) - l_ret = false; + // Return status of doing a check health status + bool l_didNvmHealthCheckPass = nvDimmNvmCheckHealthStatus(l_nvDimmTargetList); - TRACUCOMP(g_trac_nvdimm, EXIT_MRK"nvdimmInErrorState() HUID[%X]",TARGETING::get_huid(i_nvdimm)); - return l_ret; + TRACFCOMP(g_trac_nvdimm, EXIT_MRK"nvDimmNvmCheckHealthStatusOnSystem(): " + "Returning %s", l_didNvmHealthCheckPass == true ? "true" : "false" ); + + return l_didNvmHealthCheckPass; +} // end nvDimmCheckHealthStatusOnSystem + + +/** + * @brief Send NV_STATUS to host + */ +void nvdimmSendNvStatus() +{ + // Send NV_STATUS for all nvdimms + TargetHandleList l_nvdimmTargetList; + nvdimm_getNvdimmList(l_nvdimmTargetList); + for (const auto & l_nvdimm : l_nvdimmTargetList) + { + errlHndl_t l_err = nullptr; + l_err = notifyNvdimmProtectionChange(l_nvdimm,SEND_NV_STATUS); + if (l_err) + { + errlCommit(l_err, NVDIMM_COMP_ID); + } + } } + +struct registerNvdimmRt +{ + registerNvdimmRt() + { + // Register function to call at end of RT init + postInitCalls_t * rt_post = getPostInitCalls(); + rt_post->callSendNvStatus = &nvdimmSendNvStatus; + } +}; + +registerNvdimmRt g_registerNvdimmRt; + } // end NVDIMM namespace |