diff options
author | Fadi Kassem <fmkassem@us.ibm.com> | 2015-05-11 14:00:36 -0500 |
---|---|---|
committer | Guillermo J. Silva <guilsilv@us.ibm.com> | 2015-05-12 13:57:31 -0500 |
commit | 524c25822287e0a0f95a58529c5c9a4881185f07 (patch) | |
tree | f0c4d5e865a524807a29031878254879da911b64 /src | |
parent | 1d036a6fcfc3cde742d58555c079fb9726f5ebd9 (diff) | |
download | talos-occ-524c25822287e0a0f95a58529c5c9a4881185f07.tar.gz talos-occ-524c25822287e0a0f95a58529c5c9a4881185f07.zip |
Fix issue where false errorlogs are reported.
Change-Id: Ifa5e6812001bf83de19b5b68d5b98a7acdc1dd06
Reviewed-on: http://gfw160.aus.stglabs.ibm.com:8080/gerrit/17694
Tested-by: FSP CI Jenkins
Reviewed-by: Guillermo J. Silva <guilsilv@us.ibm.com>
Tested-by: Guillermo J. Silva <guilsilv@us.ibm.com>
Diffstat (limited to 'src')
-rwxr-xr-x | src/occ/cmdh/cmdh_fsp_cmds.c | 30 | ||||
-rwxr-xr-x | src/occ/errl/errl.c | 11 | ||||
-rwxr-xr-x | src/occ/thread/chom.c | 2 |
3 files changed, 40 insertions, 3 deletions
diff --git a/src/occ/cmdh/cmdh_fsp_cmds.c b/src/occ/cmdh/cmdh_fsp_cmds.c index d15decb..bb1f12a 100755 --- a/src/occ/cmdh/cmdh_fsp_cmds.c +++ b/src/occ/cmdh/cmdh_fsp_cmds.c @@ -167,6 +167,19 @@ errlHndl_t cmdh_tmgt_poll (const cmdh_fsp_cmd_t * i_cmd_ptr, l_poll_rsp->errl_address = getErrlOCIAddrByID(l_poll_rsp->errl_id); l_poll_rsp->errl_length = getErrlLengthByID(l_poll_rsp->errl_id); + //If errl_id is not 0, then neither address or length should be zero. + //This should not happen, but if it does tmgt will create an error log that + //includes the data at the errl slot address given. + //NOTE: One cause for a false errlog id is corruption of data in one errl slot + // due to writing data greater than the size of the previous slot. For + // example writing the CallHome errorlog (3kb) into a regular sized (2kb) slot. + if ( (l_poll_rsp->errl_id != 0) && + ((l_poll_rsp->errl_address == 0) || (l_poll_rsp->errl_length == 0))) + { + TRAC_ERR("An error ID has been sent via poll but the address or size is 0. " + "ErrlId:0x%X, sz:0x%X, address:0x%X.", + l_poll_rsp->errl_id, l_poll_rsp->errl_length, l_poll_rsp->errl_address); + } l_poll_rsp->data_length[0] = CONVERT_UINT16_UINT8_HIGH(CMDH_POLL_RESP_LEN_V0); l_poll_rsp->data_length[1] = CONVERT_UINT16_UINT8_LOW(CMDH_POLL_RESP_LEN_V0); l_rc = ERRL_RC_SUCCESS; @@ -248,6 +261,22 @@ ERRL_RC cmdh_poll_v10(cmdh_fsp_rsp_t * o_rsp_ptr) l_poll_rsp->errl_address = getErrlOCIAddrByID(l_poll_rsp->errl_id); // Byte 13 - 14: l_poll_rsp->errl_length = getErrlLengthByID(l_poll_rsp->errl_id); + + //If errl_id is not 0, then neither address or length should be zero. + //This should not happen, but if it does tmgt will create an error log that + //includes the data at the errl slot address given that can be used for debug. + //NOTE: One cause for a false errlog id is corruption of data in one errl slot + // due to writing data greater than the size of the previous slot. For + // example writing the CallHome errorlog (3kb) into a regular sized (2kb) slot. + // Make sure to verify the order of the memory allocation for the errl slots. + if ( (l_poll_rsp->errl_id != 0) && + ((l_poll_rsp->errl_address == 0) || (l_poll_rsp->errl_length == 0))) + { + TRAC_ERR("An error ID has been sent via poll but the address or size is 0. " + "ErrlId:0x%X, sz:0x%X, address:0x%X.", + l_poll_rsp->errl_id, l_poll_rsp->errl_length, l_poll_rsp->errl_address); + } + // Byte 15 - 16: reserved. // Byte 17 - 32 (16 bytes): OCC level memcpy( (void *) l_poll_rsp->occ_level, (void *) &G_occ_buildname[0], 16); @@ -260,6 +289,7 @@ ERRL_RC cmdh_poll_v10(cmdh_fsp_rsp_t * o_rsp_ptr) // Byte 40: l_poll_rsp->sensor_dblock_version = 0x01; //Currently only 0x01 is supported. + //l_rsp_index is used as an index into o_rsp_ptr uint16_t l_rsp_index = CMDH_POLL_RESP_LEN_V10; //////////////////// diff --git a/src/occ/errl/errl.c b/src/occ/errl/errl.c index ca0a279..cd75621 100755 --- a/src/occ/errl/errl.c +++ b/src/occ/errl/errl.c @@ -79,6 +79,8 @@ uint8_t getErrSlotNumAndErrId( uint8_t l_rc = ERRL_INVALID_SLOT; uint32_t l_mask = ERRL_SLOT_MASK_DEFAULT; + //Use severity to determine what slots are available for the given + //type of errorlog severity. switch ( i_severity ) { case ERRL_SEV_INFORMATIONAL: @@ -100,16 +102,18 @@ uint8_t getErrSlotNumAndErrId( if ( l_mask != ERRL_SLOT_MASK_DEFAULT ) { // 1. Find an available slot + // l_slotBitWord represents the available slots given the severity type. uint8_t l_slot = ERRL_INVALID_SLOT; uint32_t l_slotBitWord = ~(G_occErrSlotBits | l_mask); SsxMachineContext l_ctx; - // 2. use assembly cntlzw to get slot & (disable/enable interrupts) + // 2. use assembly cntlzw (count leading zeros) to get available slot based on + // severity type, and (disable/enable interrupts) ssx_critical_section_enter(SSX_NONCRITICAL, &l_ctx); __asm__ __volatile__ ( "cntlzw %0, %1;" : "=r" (l_slot) : "r" (l_slotBitWord)); ssx_critical_section_exit(&l_ctx); - // slot is valid + // A slot is available and valid if ( l_slot < ERRL_MAX_SLOTS ) { ssx_critical_section_enter(SSX_NONCRITICAL, &l_ctx); @@ -118,8 +122,11 @@ uint8_t getErrSlotNumAndErrId( *o_timeStamp = ssx_timebase_get(); // save of counter and then increment it // Note: Internal caller so assuming valid pointer + + //Provide next ErrorId; G_occErrIdCounter should never be 0. *o_errlId = ((++G_occErrIdCounter) == 0) ? ++G_occErrIdCounter : G_occErrIdCounter; + //Set slot bit in list of used up slots. G_occErrSlotBits |= (ERRL_SLOT_SHIFT >> l_slot); ssx_critical_section_exit(&l_ctx); diff --git a/src/occ/thread/chom.c b/src/occ/thread/chom.c index 9c8f85e..b75bda8 100755 --- a/src/occ/thread/chom.c +++ b/src/occ/thread/chom.c @@ -380,7 +380,7 @@ void chom_gen_periodic_log() THRD_MID_GEN_CALLHOME_LOG, // modId GEN_CALLHOME_LOG, // reasoncode OCC_NO_EXTENDED_RC, // Extended reason code - ERRL_SEV_INFORMATIONAL, // Severity + ERRL_SEV_CALLHOME_DATA, // Severity; will be changed to informational in errl.c NULL, // Trace Buf 0, // Trace Size 0, // userdata1 |