summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authormbroyles <mbroyles@us.ibm.com>2018-01-10 16:18:59 -0600
committerMartha Broyles <mbroyles@us.ibm.com>2018-01-15 13:42:20 -0500
commit2397cb606cda005ae0fdd8455a827450fb4d8b4f (patch)
treefc19b43040853cc2878a4f0003bc2458128b4781
parentd868b77dfc6afd90d4fea874ae4a1175fffffaed (diff)
downloadtalos-occ-2397cb606cda005ae0fdd8455a827450fb4d8b4f.tar.gz
talos-occ-2397cb606cda005ae0fdd8455a827450fb4d8b4f.zip
Handle PGPE timeouts as workaround for prolonged droop events
Add "CLIP" information to poll response Fix incorrectly throttling due to power when all cores are in stop 2 or greater Change-Id: I502cc65ad8c4cffd7f9a1442fd4de185f3cac6e2 RTC: 183700 Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/51741 Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com> Reviewed-by: Christopher J. Cain <cjcain@us.ibm.com> Reviewed-by: Andres A. Lugo-Reyes <aalugore@us.ibm.com> Reviewed-by: Martha Broyles <mbroyles@us.ibm.com>
-rw-r--r--src/include/registers/ocb_firmware_registers.h6
-rw-r--r--src/occ_405/amec/amec_controller.c29
-rwxr-xr-xsrc/occ_405/amec/amec_freq.c56
-rwxr-xr-xsrc/occ_405/amec/amec_master_smh.c202
-rwxr-xr-xsrc/occ_405/amec/amec_sys.h4
-rwxr-xr-xsrc/occ_405/cmdh/cmdh_fsp_cmds.c22
-rwxr-xr-xsrc/occ_405/cmdh/cmdh_fsp_cmds.h1
-rwxr-xr-xsrc/occ_405/cmdh/cmdh_fsp_cmds_datacnfg.c31
-rwxr-xr-xsrc/occ_405/cmdh/cmdh_snapshot.c8
-rwxr-xr-xsrc/occ_405/common.c28
-rw-r--r--src/occ_405/common.h3
-rwxr-xr-xsrc/occ_405/errl/errl.h4
-rw-r--r--src/occ_405/occ_service_codes.h2
-rwxr-xr-xsrc/occ_405/occbuildname.c2
-rw-r--r--src/occ_405/pgpe/pgpe_interface.c268
-rwxr-xr-xsrc/occ_405/proc/proc_data_control.c128
-rwxr-xr-xsrc/occ_405/state.c206
-rw-r--r--src/occ_405/wof/wof.c76
18 files changed, 682 insertions, 394 deletions
diff --git a/src/include/registers/ocb_firmware_registers.h b/src/include/registers/ocb_firmware_registers.h
index 010ad02..82e94d5 100644
--- a/src/include/registers/ocb_firmware_registers.h
+++ b/src/include/registers/ocb_firmware_registers.h
@@ -1414,7 +1414,8 @@ typedef union ocb_occflg {
uint32_t gpu0_reset_status : 1;
uint32_t gpu1_reset_status : 1;
uint32_t gpu2_reset_status : 1;
- uint32_t reserved_occ : 3;
+ uint32_t reserved_occ : 2;
+ uint32_t pm_reset_suppress : 1;
uint32_t wof_hcode_mode : 2;
uint32_t active_quad_update : 1;
uint32_t request_occ_safe : 1;
@@ -1422,7 +1423,8 @@ typedef union ocb_occflg {
uint32_t request_occ_safe : 1;
uint32_t active_quad_update : 1;
uint32_t wof_hcode_mode : 2;
- uint32_t reserved_occ : 3;
+ uint32_t pm_reset_suppress : 1;
+ uint32_t reserved_occ : 2;
uint32_t gpu2_reset_status : 1;
uint32_t gpu1_reset_status : 1;
uint32_t gpu0_reset_status : 1;
diff --git a/src/occ_405/amec/amec_controller.c b/src/occ_405/amec/amec_controller.c
index f310208..530f53f 100644
--- a/src/occ_405/amec/amec_controller.c
+++ b/src/occ_405/amec/amec_controller.c
@@ -457,16 +457,25 @@ uint16_t amec_controller_speed2freq (const uint16_t i_speed, const uint16_t i_fm
/*------------------------------------------------------------------------*/
/* Code */
/*------------------------------------------------------------------------*/
- l_temp16 = i_fmax;
- l_tempreg = (uint16_t)i_speed;
- l_temp32 = ((uint32_t)l_tempreg)*((uint32_t)l_temp16);
- l_temp16 = (uint16_t)1000;
- l_divide32[1] = (uint32_t)l_temp16;
- l_divide32[0] = (uint32_t)l_temp32;
- l_divide32[0] /= l_divide32[1];
- l_temp32 = l_divide32[0];
- l_freq = (uint16_t)l_temp32; /* freq will always fit in 16 bits */
-
+ // to handle max freq changing (i.e. mode change) between now and running amec_slv_proc_voting_box
+ // if speed is unconstrained set freq to unconstrained so voting box will use
+ // the most recent maximum frequency
+ if(i_speed >= g_amec->sys.max_speed)
+ {
+ l_freq = 0xFFFF;
+ }
+ else
+ {
+ l_temp16 = i_fmax;
+ l_tempreg = (uint16_t)i_speed;
+ l_temp32 = ((uint32_t)l_tempreg)*((uint32_t)l_temp16);
+ l_temp16 = (uint16_t)1000;
+ l_divide32[1] = (uint32_t)l_temp16;
+ l_divide32[0] = (uint32_t)l_temp32;
+ l_divide32[0] /= l_divide32[1];
+ l_temp32 = l_divide32[0];
+ l_freq = (uint16_t)l_temp32; /* freq will always fit in 16 bits */
+ }
return l_freq;
}
diff --git a/src/occ_405/amec/amec_freq.c b/src/occ_405/amec/amec_freq.c
index d1e8aad..51b04dc 100755
--- a/src/occ_405/amec/amec_freq.c
+++ b/src/occ_405/amec/amec_freq.c
@@ -282,6 +282,10 @@ void amec_slv_proc_voting_box(void)
uint16_t k = 0;
uint16_t l_chip_fmax = g_amec->sys.fmax;
uint16_t l_core_freq = 0;
+ uint16_t l_core_freq_max = 0; // max freq across all cores
+ uint16_t l_core_freq_min = g_amec->sys.fmax; // min freq across all cores
+ uint32_t l_current_reason = 0; // used for debug purposes
+ static uint32_t L_last_reason = 0; // used for debug purposes
uint32_t l_chip_reason = 0;
uint32_t l_core_reason = 0;
amec_proc_voting_reason_t l_kvm_throt_reason = NO_THROTTLE;
@@ -306,9 +310,6 @@ void amec_slv_proc_voting_box(void)
// This function implements the voting box to decide which input gets the right
// to actuate the system.
- //Reset the maximum core frequency requested prior to recalculation.
- g_amec->proc[0].core_max_freq = 0;
-
// PPB_FMAX
if(g_amec->proc[0].pwr_votes.ppb_fmax < l_chip_fmax)
{
@@ -516,6 +517,12 @@ void amec_slv_proc_voting_box(void)
//STORE core frequency and reason
g_amec->proc[0].core[k].f_request = l_core_freq;
g_amec->proc[0].core[k].f_reason = l_core_reason;
+ if(l_core_freq < l_core_freq_min)
+ {
+ // store the new lowest frequency and reason to be used after all cores checked
+ l_core_freq_min = l_core_freq;
+ l_current_reason = l_core_reason;
+ }
// Update the Amester parameter telling us the reason. Needed for
// parameter array.
@@ -557,9 +564,9 @@ void amec_slv_proc_voting_box(void)
}
#endif
- if(l_core_freq > g_amec->proc[0].core_max_freq)
+ if(l_core_freq > l_core_freq_max)
{
- g_amec->proc[0].core_max_freq = l_core_freq;
+ l_core_freq_max = l_core_freq;
}
} // if core present and not offline
else
@@ -570,6 +577,15 @@ void amec_slv_proc_voting_box(void)
}
}//End of for loop
+ // update max core frequency if not 0 i.e. all cores offline (stop 2 or greater)
+ // this is used by power capping alg, updating to 0 will cause power throttling when not needed
+ if(l_core_freq_max)
+ {
+ g_amec->proc[0].core_max_freq = l_core_freq_max;
+ // update the overall reason driving frequency across all cores
+ g_amec->proc[0].f_reason = l_current_reason;
+ }
+
//check if there was a throttle reason change
if(l_kvm_throt_reason != G_amec_opal_proc_throt_reason)
{
@@ -582,6 +598,36 @@ void amec_slv_proc_voting_box(void)
ssx_semaphore_post(&G_dcomThreadWakeupSem);
}
}
+ // For debug... if lower than max update vars returned in poll response to give clipping reason
+ g_amec->proc[0].core_min_freq = l_core_freq_min;
+ if(l_core_freq_min < g_amec->sys.fmax)
+ {
+ if(l_current_reason == L_last_reason)
+ {
+ // same reason INC counter
+ if(g_amec->proc[0].current_clip_count != 0xFF)
+ {
+ g_amec->proc[0].current_clip_count++;
+ }
+ }
+ else
+ {
+ // new reason update history and set counter to 1
+ L_last_reason = l_current_reason;
+ g_amec->proc[0].current_clip_count = 1;
+ if( (g_amec->proc[0].chip_f_reason_history & l_current_reason) == 0)
+ {
+ g_amec->proc[0].chip_f_reason_history |= l_current_reason;
+ TRAC_IMP("First time throttling for reason[0x%08X] History[0x%08X] freq = %d",
+ l_current_reason, g_amec->proc[0].chip_f_reason_history, l_core_freq_min);
+ }
+ }
+ }
+ else // no active clipping
+ {
+ L_last_reason = 0;
+ g_amec->proc[0].current_clip_count = 0;
+ }
}
// Function Specification
diff --git a/src/occ_405/amec/amec_master_smh.c b/src/occ_405/amec/amec_master_smh.c
index 586a465..33d6c13 100755
--- a/src/occ_405/amec/amec_master_smh.c
+++ b/src/occ_405/amec/amec_master_smh.c
@@ -40,6 +40,7 @@
#include "dcom.h"
#include <amec_sensors_power.h>
#include <cmdh_fsp_cmds.h> // For G_apss_ch_to_function
+#include "common.h" // For ignore_pgpe_error()
//*************************************************************************/
// Externs
@@ -399,110 +400,121 @@ void amec_mst_check_under_pcap(void)
/*------------------------------------------------------------------------*/
/* Code */
/*------------------------------------------------------------------------*/
-
- // Check if done everything possible to shed power and power still above a hard power cap
- // ppb_fmax = Fmin and PWRSYS > Node power cap and
- // Node power cap >= hard_min_pcap AND memory is throttled
- if((g_amec->proc[0].pwr_votes.ppb_fmax == g_amec->sys.fmin) &&
- (AMECSENSOR_PTR(PWRSYS)->sample > g_amec->pcap.active_node_pcap) &&
- (g_amec->pcap.active_node_pcap >= G_sysConfigData.pcap.hard_min_pcap) &&
- (g_amec->pcap.active_mem_level != 0) )
+ do
{
-
- G_over_cap_count++;
-
- // GPUs take longer for power limit to take effect if GPUs are present need to use
- // a longer wait time before logging an error and resetting
- if( ( (!G_first_num_gpus_sys) && (G_over_cap_count >= PCAP_FAILURE_THRESHOLD) ) ||
- ( (G_first_num_gpus_sys) && (G_over_cap_count >= PCAP_GPU_FAILURE_THRESHOLD) ) )
+ // Check if done everything possible to shed power and power still above a hard power cap
+ // ppb_fmax = Fmin and PWRSYS > Node power cap and
+ // Node power cap >= hard_min_pcap AND memory is throttled
+ if((g_amec->proc[0].pwr_votes.ppb_fmax == g_amec->sys.fmin) &&
+ (AMECSENSOR_PTR(PWRSYS)->sample > g_amec->pcap.active_node_pcap) &&
+ (g_amec->pcap.active_node_pcap >= G_sysConfigData.pcap.hard_min_pcap) &&
+ (g_amec->pcap.active_mem_level != 0) )
{
- TRAC_ERR("Failure to maintain power cap: Power Cap = %d ,"
- "PWRSYS = %d",g_amec->pcap.active_node_pcap,
- AMECSENSOR_PTR(PWRSYS)->sample);
+ // Check if we are to ignore pgpe errors meaning the PGPE cannot set frequency which could
+ // cause this over power event. This will not cover if a different OCC is not able to shed
+ // power due to PGPE which would require to add this status to occ-occ communication
+ if(ignore_pgpe_error())
+ {
+ // make sure count is cleared to give time for frequency to be set once PGPE can set it
+ G_over_cap_count = 0;
+ INCREMENT_ERR_HISTORY(ERRH_OVER_PCAP_IGNORED);
+ break;
+ }
- // Trace power per APSS channel to have the best breakdown for debug
- // compress traces to 4 max to save space on OP systems
- for (i = 0; i < MAX_APSS_ADC_CHANNELS; i++)
+ G_over_cap_count++;
+
+ // GPUs take longer for power limit to take effect if GPUs are present need to use
+ // a longer wait time before logging an error and resetting
+ if( ( (!G_first_num_gpus_sys) && (G_over_cap_count >= PCAP_FAILURE_THRESHOLD) ) ||
+ ( (G_first_num_gpus_sys) && (G_over_cap_count >= PCAP_GPU_FAILURE_THRESHOLD) ) )
{
- l_apss_func_id = G_apss_ch_to_function[i];
+ TRAC_ERR("Failure to maintain power cap: Power Cap = %d ,"
+ "PWRSYS = %d",g_amec->pcap.active_node_pcap,
+ AMECSENSOR_PTR(PWRSYS)->sample);
- if((l_apss_func_id != ADC_RESERVED) &&
- (l_apss_func_id != ADC_12V_SENSE) &&
- (l_apss_func_id != ADC_GND_REMOTE_SENSE) &&
- (l_apss_func_id != ADC_12V_STANDBY_CURRENT) )
+ // Trace power per APSS channel to have the best breakdown for debug
+ // compress traces to 4 max to save space on OP systems
+ for (i = 0; i < MAX_APSS_ADC_CHANNELS; i++)
{
- l_trace[l_trace_idx] = (i << 24) | (l_apss_func_id << 16) | (AMECSENSOR_PTR(PWRAPSSCH0 + i)->sample);
- l_trace_idx++;
+ l_apss_func_id = G_apss_ch_to_function[i];
+
+ if((l_apss_func_id != ADC_RESERVED) &&
+ (l_apss_func_id != ADC_12V_SENSE) &&
+ (l_apss_func_id != ADC_GND_REMOTE_SENSE) &&
+ (l_apss_func_id != ADC_12V_STANDBY_CURRENT) )
+ {
+ l_trace[l_trace_idx] = (i << 24) | (l_apss_func_id << 16) | (AMECSENSOR_PTR(PWRAPSSCH0 + i)->sample);
+ l_trace_idx++;
+ }
+ }
+ while(l_trace_idx != 0)
+ {
+ if(l_trace_idx >=4)
+ {
+ TRAC_ERR("APSS channel/FuncID/Power: [%08X], [%08X], [%08X], [%08X]",
+ l_trace[l_trace_idx-1], l_trace[l_trace_idx-2], l_trace[l_trace_idx-3], l_trace[l_trace_idx-4]);
+ l_trace_idx -= 4;
+ }
+ else if(l_trace_idx == 3)
+ {
+ TRAC_ERR("APSS channel/FuncID/Power: [%08X], [%08X], [%08X]",
+ l_trace[l_trace_idx-1], l_trace[l_trace_idx-2], l_trace[l_trace_idx-3]);
+ l_trace_idx = 0;
+ }
+ else if(l_trace_idx == 2)
+ {
+ TRAC_ERR("APSS channel/FuncID/Power: [%08X], [%08X]",
+ l_trace[l_trace_idx-1], l_trace[l_trace_idx-2]);
+ l_trace_idx = 0;
+ }
+ else // l_trace_idx == 1
+ {
+ TRAC_ERR("APSS channel/FuncID/Power: [%08X]",
+ l_trace[l_trace_idx-1]);
+ l_trace_idx = 0;
+ }
}
- }
- while(l_trace_idx != 0)
- {
- if(l_trace_idx >=4)
- {
- TRAC_ERR("APSS channel/FuncID/Power: [%08X], [%08X], [%08X], [%08X]",
- l_trace[l_trace_idx-1], l_trace[l_trace_idx-2], l_trace[l_trace_idx-3], l_trace[l_trace_idx-4]);
- l_trace_idx -= 4;
- }
- else if(l_trace_idx == 3)
- {
- TRAC_ERR("APSS channel/FuncID/Power: [%08X], [%08X], [%08X]",
- l_trace[l_trace_idx-1], l_trace[l_trace_idx-2], l_trace[l_trace_idx-3]);
- l_trace_idx = 0;
- }
- else if(l_trace_idx == 2)
- {
- TRAC_ERR("APSS channel/FuncID/Power: [%08X], [%08X]",
- l_trace[l_trace_idx-1], l_trace[l_trace_idx-2]);
- l_trace_idx = 0;
- }
- else // l_trace_idx == 1
- {
- TRAC_ERR("APSS channel/FuncID/Power: [%08X]",
- l_trace[l_trace_idx-1]);
- l_trace_idx = 0;
- }
- }
- /* @
- * @errortype
- * @moduleid AMEC_MST_CHECK_UNDER_PCAP
- * @reasoncode POWER_CAP_FAILURE
- * @userdata1 Power Cap
- * @userdata2 PWRSYS (Node Power)
- * @devdesc Failure to maintain max power limits
- *
- */
- l_err = createErrl( AMEC_MST_CHECK_UNDER_PCAP,
- POWER_CAP_FAILURE,
- ERC_AMEC_UNDER_PCAP_FAILURE,
- ERRL_SEV_PREDICTIVE,
- NULL,
- DEFAULT_TRACE_SIZE,
- g_amec->pcap.active_node_pcap,
- AMECSENSOR_PTR(PWRSYS)->sample);
-
- //Callout to firmware
- addCalloutToErrl(l_err,
- ERRL_CALLOUT_TYPE_COMPONENT_ID,
- ERRL_COMPONENT_ID_FIRMWARE,
- ERRL_CALLOUT_PRIORITY_HIGH);
-
- //Callout to APSS
- addCalloutToErrl(l_err,
- ERRL_CALLOUT_TYPE_HUID,
- G_sysConfigData.apss_huid,
- ERRL_CALLOUT_PRIORITY_HIGH);
-
- //Reset OCC
- REQUEST_RESET(l_err);
+ /* @
+ * @errortype
+ * @moduleid AMEC_MST_CHECK_UNDER_PCAP
+ * @reasoncode POWER_CAP_FAILURE
+ * @userdata1 Power Cap
+ * @userdata2 PWRSYS (Node Power)
+ * @devdesc Failure to maintain max power limits
+ *
+ */
+ l_err = createErrl( AMEC_MST_CHECK_UNDER_PCAP,
+ POWER_CAP_FAILURE,
+ ERC_AMEC_UNDER_PCAP_FAILURE,
+ ERRL_SEV_PREDICTIVE,
+ NULL,
+ DEFAULT_TRACE_SIZE,
+ g_amec->pcap.active_node_pcap,
+ AMECSENSOR_PTR(PWRSYS)->sample);
+
+ //Callout to firmware
+ addCalloutToErrl(l_err,
+ ERRL_CALLOUT_TYPE_COMPONENT_ID,
+ ERRL_COMPONENT_ID_FIRMWARE,
+ ERRL_CALLOUT_PRIORITY_HIGH);
+
+ //Callout to APSS
+ addCalloutToErrl(l_err,
+ ERRL_CALLOUT_TYPE_HUID,
+ G_sysConfigData.apss_huid,
+ ERRL_CALLOUT_PRIORITY_HIGH);
+
+ //Reset OCC
+ REQUEST_RESET(l_err);
+ }
}
- }
- else
- {
- // Clear counter
- G_over_cap_count = 0;
- }
-
+ else
+ {
+ // Clear counter
+ G_over_cap_count = 0;
+ }
+ }while(0);
return;
}
diff --git a/src/occ_405/amec/amec_sys.h b/src/occ_405/amec/amec_sys.h
index e86a000..d253889 100755
--- a/src/occ_405/amec/amec_sys.h
+++ b/src/occ_405/amec/amec_sys.h
@@ -535,6 +535,10 @@ typedef struct
// Calculations & Interim Data
uint16_t core_max_freq; // Maximum requested freq for all cores on chip.
+ uint16_t core_min_freq; // for debug. Minimum requested freq for all cores on chip.
+ uint8_t current_clip_count; // for debug. #consecutive ticks core_max_freq is below max possible for same reason
+ uint32_t chip_f_reason_history; // for debug. bit mask history of all frequency reason(s) for the chip
+ uint32_t f_reason; // for debug. current reason across all cores driving the lowest f request
// Parameters used through Amester interface
// Note: keep core arrays here, not in per-cores structure so one parameter
diff --git a/src/occ_405/cmdh/cmdh_fsp_cmds.c b/src/occ_405/cmdh/cmdh_fsp_cmds.c
index 90c6d3b..b1205f2 100755
--- a/src/occ_405/cmdh/cmdh_fsp_cmds.c
+++ b/src/occ_405/cmdh/cmdh_fsp_cmds.c
@@ -682,6 +682,28 @@ ERRL_RC cmdh_poll_v20(cmdh_fsp_rsp_t * o_rsp_ptr)
}
l_sensorHeader.count++;
+ l_extnSensorList[l_sensorHeader.count].name = EXTN_NAME_CLIP;
+ // get Pstate for the current minimum maximum frequency OCC is allowing
+ // actual frequency is driven down by the lowest max frequency across all cores
+ freq = g_amec->proc[0].core_min_freq;
+ if (freq > 0)
+ {
+ l_extnSensorList[l_sensorHeader.count].data[0] = proc_freq2pstate(freq);
+ }
+ else
+ {
+ l_extnSensorList[l_sensorHeader.count].data[0] = 0xFF;
+ }
+
+ // current counter will be 0 if not currently clipping
+ l_extnSensorList[l_sensorHeader.count].data[1] = g_amec->proc[0].current_clip_count;
+ // clip history reason
+ l_extnSensorList[l_sensorHeader.count].data[2] = CONVERT_UINT32_UINT8_UPPER_HIGH(g_amec->proc[0].chip_f_reason_history);
+ l_extnSensorList[l_sensorHeader.count].data[3] = CONVERT_UINT32_UINT8_UPPER_LOW(g_amec->proc[0].chip_f_reason_history);
+ l_extnSensorList[l_sensorHeader.count].data[4] = CONVERT_UINT32_UINT8_LOWER_HIGH(g_amec->proc[0].chip_f_reason_history);
+ l_extnSensorList[l_sensorHeader.count].data[5] = CONVERT_UINT32_UINT8_LOWER_LOW(g_amec->proc[0].chip_f_reason_history);
+ l_sensorHeader.count++;
+
// add any non-0 error history counts
for(l_err_hist_idx=0; l_err_hist_idx < ERR_HISTORY_SIZE; l_err_hist_idx++)
{
diff --git a/src/occ_405/cmdh/cmdh_fsp_cmds.h b/src/occ_405/cmdh/cmdh_fsp_cmds.h
index feb424a..ae85339 100755
--- a/src/occ_405/cmdh/cmdh_fsp_cmds.h
+++ b/src/occ_405/cmdh/cmdh_fsp_cmds.h
@@ -69,6 +69,7 @@ typedef enum
#define EXTN_NAME_FNOM 0x464E4F4D // "FNOM"
#define EXTN_NAME_FTURBO 0x46540000 // "FT"
#define EXTN_NAME_FUTURBO 0x46555400 // "FUT"
+#define EXTN_NAME_CLIP 0x434C4950 // "CLIP"
#define EXTN_NAME_ERRHIST 0x45525248 // "ERRH"
#define MAX_EXTN_SENSORS 32
diff --git a/src/occ_405/cmdh/cmdh_fsp_cmds_datacnfg.c b/src/occ_405/cmdh/cmdh_fsp_cmds_datacnfg.c
index e60f3c7..950466e 100755
--- a/src/occ_405/cmdh/cmdh_fsp_cmds_datacnfg.c
+++ b/src/occ_405/cmdh/cmdh_fsp_cmds_datacnfg.c
@@ -260,6 +260,7 @@ errlHndl_t data_store_freq_data(const cmdh_fsp_cmd_t * i_cmd_ptr,
uint32_t l_mode_data_sz;
uint16_t l_freq = 0;
uint16_t l_table[OCC_MODE_COUNT] = {0};
+ uint16_t l_pgpe_max_freq_mhz = (G_oppb.frequency_max_khz / 1000);
do
{
@@ -303,13 +304,13 @@ errlHndl_t data_store_freq_data(const cmdh_fsp_cmd_t * i_cmd_ptr,
break;
}
- // This should never happen but verify that nominal frequency is <= G_proc_fmax_mhz
- if(l_freq > G_proc_fmax_mhz)
+ // This should never happen but verify that nominal frequency is <= OPPB max
+ if(l_freq > l_pgpe_max_freq_mhz)
{
CMDH_TRAC_ERR("Nominal Frequency[%d] (MHz)) is higher than "
- "G_proc_fmax_mhz[%d], clipping Nominal Frequency",
- l_freq, G_proc_fmax_mhz);
- l_freq = G_proc_fmax_mhz;
+ "OPPB max[%d], clipping Nominal Frequency",
+ l_freq, l_pgpe_max_freq_mhz);
+ l_freq = l_pgpe_max_freq_mhz;
}
l_table[OCC_MODE_NOMINAL] = l_freq;
@@ -325,13 +326,13 @@ errlHndl_t data_store_freq_data(const cmdh_fsp_cmd_t * i_cmd_ptr,
l_table[OCC_MODE_NOMINAL]);
l_freq = l_table[OCC_MODE_NOMINAL];
}
- // Verify that turbo frequency is <= G_proc_fmax_mhz
- else if(l_freq > G_proc_fmax_mhz)
+ // Verify that turbo frequency is <= OPPB max
+ else if(l_freq > l_pgpe_max_freq_mhz)
{
CMDH_TRAC_ERR("Turbo Frequency[%d] (MHz)) is higher than "
- "G_proc_fmax_mhz[%d], clip Turbo Frequency",
- l_freq, G_proc_fmax_mhz);
- l_freq = G_proc_fmax_mhz;
+ "OPPB max[%d], clip Turbo Frequency",
+ l_freq, l_pgpe_max_freq_mhz);
+ l_freq = l_pgpe_max_freq_mhz;
}
l_table[OCC_MODE_TURBO] = l_freq;
CMDH_TRAC_INFO("Turbo frequency = %d MHz", l_freq);
@@ -351,13 +352,13 @@ errlHndl_t data_store_freq_data(const cmdh_fsp_cmd_t * i_cmd_ptr,
// Bytes 9-10 Ultr Turbo Frequency Point
l_freq = (l_buf[6] << 8 | l_buf[7]);
- // Verify that ultra turbo frequency is <= G_proc_fmax_mhz
- if(l_freq > G_proc_fmax_mhz)
+ // Verify that ultra turbo frequency is <= OPPB max
+ if(l_freq > l_pgpe_max_freq_mhz)
{
CMDH_TRAC_ERR("Ultra Turbo Frequency[%d] (MHz) is higher than PGPE's "
- "Max freq (G_proc_fmax_mhz[%d]) clip Ultra Turbo Frequency",
- l_freq, G_proc_fmax_mhz);
- l_freq = G_proc_fmax_mhz;
+ "Max freq (OPPB max[%d]) clip Ultra Turbo Frequency",
+ l_freq, l_pgpe_max_freq_mhz);
+ l_freq = l_pgpe_max_freq_mhz;
}
// Check if (H)TMGT will let WOF run, else clear flags
diff --git a/src/occ_405/cmdh/cmdh_snapshot.c b/src/occ_405/cmdh/cmdh_snapshot.c
index ecb9afc..b52858f 100755
--- a/src/occ_405/cmdh/cmdh_snapshot.c
+++ b/src/occ_405/cmdh/cmdh_snapshot.c
@@ -74,7 +74,6 @@ VOID cmdh_snapshot_find_oldest_newest(uint8_t *o_oldest,
(g_cmdh_snapshot_array[0].current_id == 0) &&
(g_cmdh_snapshot_array[CMDH_SNAPSHOT_MAX_INDEX].current_id == 0))
{
- TRAC_INFO("cmdh_snapshot_find_oldest_newest: Entry 0 is the oldest and newest");
*o_oldest = 0;
*o_newest = 0;
break;
@@ -141,7 +140,6 @@ ERRL_RC cmdh_snapshot_buffer_nonite(const cmdh_fsp_cmd_t *i_cmd_ptr,
// Check case where there are no snapshot buffers available.
if (g_cmdh_snapshot_cur_index == CMDH_SNAPSHOT_DEFAULT_CUR_INDEX)
{
- TRAC_INFO("cmdh_snapshot_buffer_nonite: No snapshot buffer available.");
break;
}
@@ -170,8 +168,6 @@ ERRL_RC cmdh_snapshot_buffer_nonite(const cmdh_fsp_cmd_t *i_cmd_ptr,
if (i == CMDH_SNAPSHOT_MAX)
{
- TRAC_INFO("cmdh_snapshot_buffer_nonite: Requested buffer:%u not found so sending back %u",
- l_cmd_ptr->requested_id, l_rsp_ptr->newest_id);
l_req_idx = l_newest;
}
}
@@ -319,8 +315,6 @@ errlHndl_t cmdh_snapshot_sync(const cmdh_fsp_cmd_t * i_cmd_ptr,
break;
}
- TRAC_INFO("cmdh_snapshot_sync: Snapshot buffer has been reset!");
-
l_resp_ptr->data_length[0] = 0;
l_resp_ptr->data_length[1] = 0;
G_rsp_status = 0;
@@ -370,8 +364,6 @@ void cmdh_snapshot_callback(void * arg)
if (g_cmdh_snapshot_reset)
{
- TRAC_INFO("cmdh_snapshot_callback: Initializing snapshot buffer and data.");
-
memset(g_cmdh_snapshot_array, 0, sizeof(g_cmdh_snapshot_array));
g_cmdh_snapshot_cur_id = 0;
memset(L_cim_buf,0,sizeof(cmdh_snapshot_buffer_t));
diff --git a/src/occ_405/common.c b/src/occ_405/common.c
index fd031e2..e7eb6d7 100755
--- a/src/occ_405/common.c
+++ b/src/occ_405/common.c
@@ -246,4 +246,32 @@ bool notify_host(const ext_intr_reason_t i_reason)
return notify_success;
}
+// Called prior to logging any error related to the PGPE or Pstate control
+// i.e. PGPE communication, maintaining power cap...
+// During prolonged droop events the PGPE can be non-responsive and don't have frequency control so doing a pm reset will
+// not help. The PGPE will set a bit in the OCC FLAGS register to indicate when in this condition for the OCC to ignore errors
+// Returns true if the error should be ignored
+bool ignore_pgpe_error(void)
+{
+ static bool L_last_ignore_error = false;
+ bool l_ignore_error = false;
+ ocb_occflg_t occ_flags = {0};
+
+ // Check if the bit to ignore errors is set in the OCC Flags register
+ occ_flags.value = in32(OCB_OCCFLG);
+
+ if (occ_flags.fields.pm_reset_suppress == 1)
+ {
+ l_ignore_error = true;
+ }
+
+ // Trace if this is a change from the last time this was called
+ if (L_last_ignore_error != l_ignore_error)
+ {
+ TRAC_ERR("ignore_pgpe_error: OCCFLG pm_reset_suppress was %d and is now %d", L_last_ignore_error, l_ignore_error);
+ L_last_ignore_error = l_ignore_error;
+ }
+
+ return l_ignore_error;
+}
diff --git a/src/occ_405/common.h b/src/occ_405/common.h
index 787af35..06ab5df 100644
--- a/src/occ_405/common.h
+++ b/src/occ_405/common.h
@@ -49,4 +49,7 @@ void task_misc_405_checks(task_t *i_self);
// Returns true if notification was sent, false if interrupt already outstanding
bool notify_host(const ext_intr_reason_t i_reason);
+// Returns true if PGPE error should be ignored
+bool ignore_pgpe_error(void);
+
#endif // _common_h
diff --git a/src/occ_405/errl/errl.h b/src/occ_405/errl/errl.h
index 1218b92..042841e 100755
--- a/src/occ_405/errl/errl.h
+++ b/src/occ_405/errl/errl.h
@@ -295,6 +295,10 @@ typedef enum {
ERRH_24X7_DISABLED = 0x18,
ERRH_CEFF_RATIO_VDD_EXCURSION = 0x19,
ERRH_AVSBUS_VDD_TEMPERATURE = 0x1A,
+ ERRH_OVER_PCAP_IGNORED = 0x1B,
+ ERRH_VFRT_TIMEOUT_IGNORED = 0x1C,
+ ERRH_WOF_CONTROL_TIMEOUT_IGNORED = 0x1D,
+ ERRH_PSTATE_CHANGE_IGNORED = 0x1E,
ERR_HISTORY_SIZE = 0x20
} ERR_HISTORY_INDEX;
diff --git a/src/occ_405/occ_service_codes.h b/src/occ_405/occ_service_codes.h
index 1c00381..f2d3417 100644
--- a/src/occ_405/occ_service_codes.h
+++ b/src/occ_405/occ_service_codes.h
@@ -287,7 +287,7 @@ enum occExtReasonCode
ERC_GPU_READ_PWR_LIMIT_FAILURE = 0x0101,
ERC_GPU_SET_PWR_LIMIT_FAILURE = 0x0102,
- ERC_STATE_FROM_ALL_TO_STB_FAILURE = 0x0123,
+ ERC_STATE_FROM_OBS_TO_ACT_FAILURE = 0x0123,
ERC_STATE_FROM_ACT_TO_CHR_FAILURE = 0x0124,
ERC_STATE_FROM_CHR_TO_ACT_FAILURE = 0x0125,
ERC_STATE_FROM_CHR_TO_OBS_FAILURE = 0x0126,
diff --git a/src/occ_405/occbuildname.c b/src/occ_405/occbuildname.c
index a6e6ec5..9248044 100755
--- a/src/occ_405/occbuildname.c
+++ b/src/occ_405/occbuildname.c
@@ -34,6 +34,6 @@ volatile const char G_occ_buildname[16] __attribute__((section(".buildname"))) =
#else
-volatile const char G_occ_buildname[16] __attribute__((section(".buildname"))) = /*<BuildName>*/ "op_occ_171215a\0" /*</BuildName>*/ ;
+volatile const char G_occ_buildname[16] __attribute__((section(".buildname"))) = /*<BuildName>*/ "op_occ_180115a\0" /*</BuildName>*/ ;
#endif
diff --git a/src/occ_405/pgpe/pgpe_interface.c b/src/occ_405/pgpe/pgpe_interface.c
index eaaa340..13e1957 100644
--- a/src/occ_405/pgpe/pgpe_interface.c
+++ b/src/occ_405/pgpe/pgpe_interface.c
@@ -37,6 +37,7 @@
#include "ssx.h"
#include "wof.h"
#include "amec_sys.h"
+#include "common.h" // For ignore_pgpe_error()
// Maximum waiting time (usec) for clip update IPC task
#define CLIP_UPDATE_TIMEOUT 100 // maximum waiting time (usec) for clip update IPC task
@@ -420,26 +421,29 @@ int pgpe_set_clip_blocking(Pstate i_pstate)
if(wait_time > CLIP_UPDATE_TIMEOUT)
{
// an earlier clip update IPC call has not completed, trace and log an error
- TRAC_ERR("pgpe_set_clip_blocking: clip update IPC task is not Idle");
-
- /*
- * @errortype
- * @moduleid PGPE_SET_CLIP_BLOCKING_MOD
- * @reasoncode PGPE_FAILURE
- * @userdata4 ERC_PGPE_CLIP_NOT_IDLE
- * @devdesc pgpe clip update not idle
- */
- err = createErrl(
- PGPE_SET_CLIP_BLOCKING_MOD, //ModId
- PGPE_FAILURE, //Reasoncode
- ERC_PGPE_CLIP_NOT_IDLE, //Extended reason code
- ERRL_SEV_PREDICTIVE, //Severity
- NULL, //Trace Buf
- DEFAULT_TRACE_SIZE, //Trace Size
- 0, //Userdata1
- 0 //Userdata2
- );
-
+ // only trace and log an error if we are not to ignore
+ if(!ignore_pgpe_error())
+ {
+ TRAC_ERR("pgpe_set_clip_blocking: clip update IPC task is not Idle");
+
+ /*
+ * @errortype
+ * @moduleid PGPE_SET_CLIP_BLOCKING_MOD
+ * @reasoncode PGPE_FAILURE
+ * @userdata4 ERC_PGPE_CLIP_NOT_IDLE
+ * @devdesc pgpe clip update not idle
+ */
+ err = createErrl(
+ PGPE_SET_CLIP_BLOCKING_MOD, //ModId
+ PGPE_FAILURE, //Reasoncode
+ ERC_PGPE_CLIP_NOT_IDLE, //Extended reason code
+ ERRL_SEV_PREDICTIVE, //Severity
+ NULL, //Trace Buf
+ DEFAULT_TRACE_SIZE, //Trace Size
+ 0, //Userdata1
+ 0 //Userdata2
+ );
+ }
rc = PGPE_FAILURE;
break;
}
@@ -475,26 +479,29 @@ int pgpe_set_clip_blocking(Pstate i_pstate)
{
if(wait_time > CLIP_UPDATE_TIMEOUT)
{
- TRAC_ERR("pgpe_set_clip_blocking: clip update IPC task timeout!");
-
- /*
- * @errortype
- * @moduleid PGPE_SET_CLIP_BLOCKING_MOD
- * @reasoncode GPE_REQUEST_TASK_TIMEOUT
- * @userdata4 OCC_NO_EXTENDED_RC
- * @devdesc pgpe clip update timeout
- */
- err = createErrl(
- PGPE_SET_CLIP_BLOCKING_MOD, //ModId
- GPE_REQUEST_TASK_TIMEOUT, //Reasoncode
- OCC_NO_EXTENDED_RC, //Extended reason code
- ERRL_SEV_PREDICTIVE, //Severity
- NULL, //Trace Buf
- DEFAULT_TRACE_SIZE, //Trace Size
- 0, //Userdata1
- 0 //Userdata2
- );
-
+ // only trace and log an error if we are not to ignore
+ if(!ignore_pgpe_error())
+ {
+ TRAC_ERR("pgpe_set_clip_blocking: clip update IPC task timeout!");
+
+ /*
+ * @errortype
+ * @moduleid PGPE_SET_CLIP_BLOCKING_MOD
+ * @reasoncode GPE_REQUEST_TASK_TIMEOUT
+ * @userdata4 OCC_NO_EXTENDED_RC
+ * @devdesc pgpe clip update timeout
+ */
+ err = createErrl(
+ PGPE_SET_CLIP_BLOCKING_MOD, //ModId
+ GPE_REQUEST_TASK_TIMEOUT, //Reasoncode
+ OCC_NO_EXTENDED_RC, //Extended reason code
+ ERRL_SEV_PREDICTIVE, //Severity
+ NULL, //Trace Buf
+ DEFAULT_TRACE_SIZE, //Trace Size
+ 0, //Userdata1
+ 0 //Userdata2
+ );
+ }
rc = GPE_REQUEST_TASK_TIMEOUT;
break;
}
@@ -514,7 +521,7 @@ int pgpe_set_clip_blocking(Pstate i_pstate)
// IPC task completed. check for errors
if ( G_clip_update_parms.msg_cb.rc != PGPE_RC_SUCCESS )
{
- // clip update IPC call has not completed, trace and log an error
+ // clip update IPC call failed, trace and log an error
TRAC_ERR("pgpe_set_clip_blocking: clip update IPC task "
"returned an error [0x%08X]",
G_clip_update_parms.msg_cb.rc);
@@ -542,10 +549,11 @@ int pgpe_set_clip_blocking(Pstate i_pstate)
}
} while (0);
+ // request reset
if(err)
{
REQUEST_RESET(err);
- }
+ }
return(rc);
}
@@ -577,35 +585,39 @@ int pgpe_clip_update(void)
{
if(l_wait_time > CLIP_UPDATE_TIMEOUT)
{
- // an earlier clip update IPC call has not completed, trace and log an error
- TRAC_ERR("pgpe_clip_update: clip update IPC task is not Idle");
-
- /*
- * @errortype
- * @moduleid PGPE_CLIP_UPDATE_MOD
- * @reasoncode PGPE_FAILURE
- * @userdata1 0
- * @userdata4 ERC_PGPE_CLIP_NOT_IDLE
- * @devdesc pgpe clip update not idle
- */
- err = createErrl(
- PGPE_CLIP_UPDATE_MOD, //ModId
- PGPE_FAILURE, //Reasoncode
- ERC_PGPE_CLIP_NOT_IDLE, //Extended reason code
- ERRL_SEV_PREDICTIVE, //Severity
- NULL, //Trace Buf
- DEFAULT_TRACE_SIZE, //Trace Size
- 0, //Userdata1
- 0 //Userdata2
- );
+ // only trace and log an error if we are not to ignore
+ if(!ignore_pgpe_error())
+ {
+ // an earlier clip update IPC call has not completed, trace and log an error
+ TRAC_ERR("pgpe_clip_update: clip update IPC task is not Idle");
+
+ /*
+ * @errortype
+ * @moduleid PGPE_CLIP_UPDATE_MOD
+ * @reasoncode PGPE_FAILURE
+ * @userdata1 0
+ * @userdata4 ERC_PGPE_CLIP_NOT_IDLE
+ * @devdesc pgpe clip update not idle
+ */
+ err = createErrl(
+ PGPE_CLIP_UPDATE_MOD, //ModId
+ PGPE_FAILURE, //Reasoncode
+ ERC_PGPE_CLIP_NOT_IDLE, //Extended reason code
+ ERRL_SEV_PREDICTIVE, //Severity
+ NULL, //Trace Buf
+ DEFAULT_TRACE_SIZE, //Trace Size
+ 0, //Userdata1
+ 0 //Userdata2
+ );
- // Callout firmware
- addCalloutToErrl(err,
- ERRL_CALLOUT_TYPE_COMPONENT_ID,
- ERRL_COMPONENT_ID_FIRMWARE,
- ERRL_CALLOUT_PRIORITY_HIGH);
+ // Callout firmware
+ addCalloutToErrl(err,
+ ERRL_CALLOUT_TYPE_COMPONENT_ID,
+ ERRL_COMPONENT_ID_FIRMWARE,
+ ERRL_CALLOUT_PRIORITY_HIGH);
- commitErrl(&err);
+ commitErrl(&err);
+ }
ext_rc = ERC_PGPE_CLIP_NOT_IDLE;
break;
@@ -765,35 +777,38 @@ int pgpe_start_suspend(uint8_t action, PMCR_OWNER owner)
// be idle when called.
if(!async_request_is_idle(&G_start_suspend_req.request))
{
- TRAC_ERR("pgpe_start_suspend: Start suspend task NOT Idle");
-
- /*
- * @errortype
- * @moduleid PGPE_START_SUSPEND_MOD
- * @reasoncode PGPE_FAILURE
- * @userdata1 0
- * @userdata4 ERC_PGPE_START_SUSPEND_NOT_IDLE
- * @devdesc pgpe start suspend task not idle
- */
- err = createErrl(
- PGPE_START_SUSPEND_MOD, //ModId
- PGPE_FAILURE, //Reasoncode
- ERC_PGPE_START_SUSPEND_NOT_IDLE, //Extended reason code
- ERRL_SEV_PREDICTIVE, //Severity
- NULL, //Trace Buf
- DEFAULT_TRACE_SIZE, //Trace Size
- 0, //Userdata1
- 0 //Userdata2
- );
+ // only trace and log an error if we are not to ignore
+ if(!ignore_pgpe_error())
+ {
+ TRAC_ERR("pgpe_start_suspend: Start suspend task NOT Idle");
- // Callout firmware
- addCalloutToErrl(err,
- ERRL_CALLOUT_TYPE_COMPONENT_ID,
- ERRL_COMPONENT_ID_FIRMWARE,
- ERRL_CALLOUT_PRIORITY_HIGH);
+ /*
+ * @errortype
+ * @moduleid PGPE_START_SUSPEND_MOD
+ * @reasoncode PGPE_FAILURE
+ * @userdata1 0
+ * @userdata4 ERC_PGPE_START_SUSPEND_NOT_IDLE
+ * @devdesc pgpe start suspend task not idle
+ */
+ err = createErrl(
+ PGPE_START_SUSPEND_MOD, //ModId
+ PGPE_FAILURE, //Reasoncode
+ ERC_PGPE_START_SUSPEND_NOT_IDLE, //Extended reason code
+ ERRL_SEV_PREDICTIVE, //Severity
+ NULL, //Trace Buf
+ DEFAULT_TRACE_SIZE, //Trace Size
+ 0, //Userdata1
+ 0 //Userdata2
+ );
- commitErrl(&err);
+ // Callout firmware
+ addCalloutToErrl(err,
+ ERRL_CALLOUT_TYPE_COMPONENT_ID,
+ ERRL_COMPONENT_ID_FIRMWARE,
+ ERRL_CALLOUT_PRIORITY_HIGH);
+ commitErrl(&err);
+ }
ext_rc = ERC_PGPE_START_SUSPEND_NOT_IDLE;
}
@@ -895,36 +910,39 @@ int pgpe_pmcr_set(void)
// This check is a safety feature in case caller didn't check IPC is idle.
if(!async_request_is_idle(&G_pmcr_set_req.request))
{
- // an earlier PMCR update IPC call has not completed, trace and log an error
- TRAC_ERR("pgpe_pmcr_set: PMCR update IPC task is not Idle");
-
- /*
- * @errortype
- * @moduleid PGPE_PMCR_SET_MOD
- * @reasoncode PGPE_FAILURE
- * @userdata1 0
- * @userdata4 ERC_PGPE_SET_PMCR_NOT_IDLE
- * @devdesc pgpe pmcr set not idle
- */
- err = createErrl(
- PGPE_PMCR_SET_MOD, //ModId
- PGPE_FAILURE, //Reasoncode
- ERC_PGPE_SET_PMCR_NOT_IDLE, //Extended reason code
- ERRL_SEV_PREDICTIVE, //Severity
- NULL, //Trace Buf
- DEFAULT_TRACE_SIZE, //Trace Size
- 0, //Userdata1
- 0 //Userdata2
- );
+ // an earlier PMCR update IPC call has not completed
+ // only trace and log an error if we are not to ignore
+ if(!ignore_pgpe_error())
+ {
+ TRAC_ERR("pgpe_pmcr_set: PMCR update IPC task is not Idle");
- // Callout firmware
- addCalloutToErrl(err,
- ERRL_CALLOUT_TYPE_COMPONENT_ID,
- ERRL_COMPONENT_ID_FIRMWARE,
- ERRL_CALLOUT_PRIORITY_HIGH);
+ /*
+ * @errortype
+ * @moduleid PGPE_PMCR_SET_MOD
+ * @reasoncode PGPE_FAILURE
+ * @userdata1 0
+ * @userdata4 ERC_PGPE_SET_PMCR_NOT_IDLE
+ * @devdesc pgpe pmcr set not idle
+ */
+ err = createErrl(
+ PGPE_PMCR_SET_MOD, //ModId
+ PGPE_FAILURE, //Reasoncode
+ ERC_PGPE_SET_PMCR_NOT_IDLE, //Extended reason code
+ ERRL_SEV_PREDICTIVE, //Severity
+ NULL, //Trace Buf
+ DEFAULT_TRACE_SIZE, //Trace Size
+ 0, //Userdata1
+ 0 //Userdata2
+ );
- commitErrl(&err);
+ // Callout firmware
+ addCalloutToErrl(err,
+ ERRL_CALLOUT_TYPE_COMPONENT_ID,
+ ERRL_COMPONENT_ID_FIRMWARE,
+ ERRL_CALLOUT_PRIORITY_HIGH);
+ commitErrl(&err);
+ }
ext_rc = ERC_PGPE_SET_PMCR_NOT_IDLE;
break;
}
@@ -1007,7 +1025,7 @@ int set_nominal_pstate(void)
// Make sure the set PMCR task is idle.
if(!async_request_is_idle(&G_pmcr_set_req.request))
{
- TRAC_ERR("set_nominal_pstate: Set PMCR task not idle!");
+ TRAC_ERR("set_nominal_pstate: Set PMCR task not idle! OCCFLG[0x%08X]", in32(OCB_OCCFLG));
l_rc = ERC_PGPE_SET_PMCR_NOT_IDLE;
break;
}
@@ -1019,7 +1037,7 @@ int set_nominal_pstate(void)
// This should not be called if Pstate protocol is in transition
if(G_proc_pstate_status == PSTATES_IN_TRANSITION)
{
- TRAC_ERR("set_nominal_pstate: Pstate protocol in transtion!");
+ TRAC_ERR("set_nominal_pstate: Pstate protocol in transtion! OCCFLG[0x%08X]", in32(OCB_OCCFLG));
l_rc = ERC_PGPE_START_SUSPEND_NOT_IDLE;
break;
}
@@ -1037,7 +1055,7 @@ int set_nominal_pstate(void)
if((ssx_timebase_get() - l_start) > l_timeout)
{
l_rc = ERC_PGPE_TASK_TIMEOUT;
- TRAC_ERR("set_nominal_pstate: Timeout waiting for Pstates to be enabled");
+ TRAC_ERR("set_nominal_pstate: Timeout waiting for Pstates to be enabled! OCCFLG[0x%08X]", in32(OCB_OCCFLG));
break;
}
ssx_sleep(SSX_MICROSECONDS(10));
diff --git a/src/occ_405/proc/proc_data_control.c b/src/occ_405/proc/proc_data_control.c
index 891f80c..ebd907d 100755
--- a/src/occ_405/proc/proc_data_control.c
+++ b/src/occ_405/proc/proc_data_control.c
@@ -40,14 +40,19 @@
#include "rtls_service_codes.h"
#include "proc_pstate.h"
#include "occ_util.h"
+#include "common.h" // For ignore_pgpe_error()
-// The the GPE parameter fields for PGPE IPC calls.
+// The GPE parameter fields for PGPE IPC calls.
extern GPE_BUFFER(ipcmsg_clip_update_t G_clip_update_parms);
extern GPE_BUFFER(ipcmsg_set_pmcr_t G_pmcr_set_parms);
extern GpeRequest G_clip_update_req;
extern GpeRequest G_pmcr_set_req;
+// number of ticks to wait on clip/pmcr request to complete before checking to log an error
+// this must give the PGPE at least 1ms, doubling that time to 2ms to be safe
+#define SUPPRESS_PGPE_ERR_WAIT_TICKS 4 // 2ms
+
extern bool G_state_transition_occuring; // A state transition is currently going on?
// a global flag used by task_core_data_control() to indicate
@@ -68,9 +73,15 @@ bool G_allowPstates = FALSE;
void task_core_data_control( task_t * i_task )
{
errlHndl_t err = NULL; //Error handler
- static bool L_trace_logged = false; // trace logging to avoid unnecessarily repeatig logs
+ static bool L_trace_logged = false; // trace logging to avoid unnecessarily repeating logs
+ static bool L_current_timeout_recorded = FALSE;
Pstate l_pstate;
static uint64_t L_last = 0xFFFFFFFFFFFFFFFF;
+ static uint64_t L_ignore_wait_count = 0; // number of consecutive ticks IPC task failed
+ bool l_check_failure = false;
+ int l_request_is_idle = 0;
+ uint8_t l_request_rc = 0;
+ enum occExtReasonCode l_ext_rc = OCC_NO_EXTENDED_RC;
// Once a state transition process starts, task data control
// stops updating the PMCR/CLIPS updates, this way, the state
@@ -123,7 +134,7 @@ void task_core_data_control( task_t * i_task )
G_active_to_observation_ready = true;
}
}
- }
+ } // if in state transition
else
{
L_trace_logged = false;
@@ -131,24 +142,23 @@ void task_core_data_control( task_t * i_task )
if (G_allowPstates)
{
// perform Pstate/clip control if previous IPC call completed successfully
- // if not idle, ignore cycle
- // if an error was returned, log an error, and request reset
- if(G_sysConfigData.system_type.kvm) // OPAL system
+ if(G_sysConfigData.system_type.kvm) // OPAL system uses clip update request
{
+ l_request_is_idle = async_request_is_idle(&G_clip_update_req.request);
+ l_request_rc = G_clip_update_parms.msg_cb.rc;
+
// confirm that the clip update IPC from last cycle
// has successfully completed on PGPE (with no errors)
- if( async_request_is_idle(&G_clip_update_req.request) && //clip_update/set_clip_ranges completed
- (G_clip_update_parms.msg_cb.rc == PGPE_RC_SUCCESS) ) // with no errors
+ if( (l_request_is_idle) && //clip_update/set_clip_ranges completed
+ (l_request_rc == PGPE_RC_SUCCESS) ) // with no errors
{
//call PGPE IPC function to update the clips
pgpe_clip_update();
}
- else if(G_clip_update_parms.msg_cb.rc != PGPE_RC_SUCCESS)
+ else
{
- // an earlier clip update IPC call has not completed, trace and log an error
- TRAC_ERR("task_core_data_control: clip update IPC task returned an error, %d",
- G_clip_update_parms.msg_cb.rc);
-
+ l_check_failure = true;
+ l_ext_rc = ERC_PGPE_CLIP_FAILURE;
/*
* @errortype
* @moduleid RTLS_TASK_CORE_DATA_CONTROL_MOD
@@ -158,23 +168,15 @@ void task_core_data_control( task_t * i_task )
* @userdata4 ERC_PGPE_CLIP_FAILURE
* @devdesc pgpe clip update returned an error
*/
- err = createErrl(
- RTLS_TASK_CORE_DATA_CONTROL_MOD, //ModId
- PGPE_FAILURE, //Reasoncode
- ERC_PGPE_CLIP_FAILURE, //Extended reason code
- ERRL_SEV_PREDICTIVE, //Severity
- NULL, //Trace Buf
- DEFAULT_TRACE_SIZE, //Trace Size
- G_clip_update_parms.msg_cb.rc, //Userdata1
- async_request_is_idle(&G_clip_update_req.request) //Userdata2
- );
}
}
else
{
- // NON OPAL System, OCC owns PMCR:
- if( async_request_is_idle(&G_pmcr_set_req.request) && // PMCR IPC from last TICK completed
- (G_pmcr_set_parms.msg_cb.rc == PGPE_RC_SUCCESS) ) // with no errors
+ // NON OPAL System, OCC owns PMCR and uses PMCR set request
+ l_request_is_idle = async_request_is_idle(&G_pmcr_set_req.request);
+ l_request_rc = G_pmcr_set_parms.msg_cb.rc;
+ if( (l_request_is_idle) && // PMCR IPC from last TICK completed
+ (l_request_rc == PGPE_RC_SUCCESS) ) // with no errors
{
//The previous Non-OPAL PGPE request succeeded
uint64_t pstateList = 0;
@@ -190,19 +192,17 @@ void task_core_data_control( task_t * i_task )
if (L_last != pstateList)
{
L_last = pstateList;
- TRAC_IMP("task_core_data_control: calling pmcr_set() w/pstates: 0x%08X%04X",
- WORD_HIGH(pstateList), WORD_LOW(pstateList)>>16);
+ TRAC_INFO("task_core_data_control: calling pmcr_set() w/pstates: 0x%08X%04X",
+ WORD_HIGH(pstateList), WORD_LOW(pstateList)>>16);
//call PGPE IPC function to update Pstates
pgpe_pmcr_set();
}
}
- else if(G_pmcr_set_parms.msg_cb.rc != PGPE_RC_SUCCESS)
+ else
{
- // an earlier clip update IPC call has not completed, trace and log an error
- TRAC_ERR("task_core_data_control: pstate update IPC task returned an error, %d",
- G_pmcr_set_parms.msg_cb.rc);
-
+ l_check_failure = true;
+ l_ext_rc = ERC_PGPE_SET_PMCR_FAILURE;
/*
* @errortype
* @moduleid RTLS_TASK_CORE_DATA_CONTROL_MOD
@@ -212,27 +212,69 @@ void task_core_data_control( task_t * i_task )
* @userdata4 ERC_PGPE_SET_PMCR_FAILURE
* @devdesc pgpe PMCR set returned an error
*/
+ }
+ }
+ } // if pstates allowed
+
+ // Common error handling for all systems
+ if(l_check_failure)
+ {
+ // an earlier clip update IPC call has not completed
+ L_ignore_wait_count++;
+
+ // Only log the error if we are not to ignore PGPE errors and have
+ // waited enough time for the PGPE to give this indication
+ if(L_ignore_wait_count >= SUPPRESS_PGPE_ERR_WAIT_TICKS)
+ {
+ if(!ignore_pgpe_error())
+ {
+ TRAC_ERR("task_core_data_control: pstate update IPC task did not complete successfully, idle?[%d] rc[%08X]",
+ l_request_is_idle, l_request_rc);
+
err = createErrl(
RTLS_TASK_CORE_DATA_CONTROL_MOD, //ModId
PGPE_FAILURE, //Reasoncode
- ERC_PGPE_SET_PMCR_FAILURE, //Extended reason code
+ l_ext_rc, //Extended reason code
ERRL_SEV_PREDICTIVE, //Severity
NULL, //Trace Buf
DEFAULT_TRACE_SIZE, //Trace Size
- G_pmcr_set_parms.msg_cb.rc, //Userdata1
- async_request_is_idle(&G_pmcr_set_req.request) //Userdata2
+ l_request_rc, //Userdata1
+ l_request_is_idle //Userdata2
);
+
+ //Add firmware callout
+ addCalloutToErrl(err,
+ ERRL_CALLOUT_TYPE_COMPONENT_ID,
+ ERRL_COMPONENT_ID_FIRMWARE,
+ ERRL_CALLOUT_PRIORITY_HIGH);
+
+ //Add processor callout
+ addCalloutToErrl(err,
+ ERRL_CALLOUT_TYPE_HUID,
+ G_sysConfigData.proc_huid,
+ ERRL_CALLOUT_PRIORITY_MED);
+
+ // commit error log
+ REQUEST_RESET(err);
+ }
+ else
+ {
+ // Wait forever for PGPE to respond
+ // Put a mark on the wall so we know we hit this state
+ if(!L_current_timeout_recorded)
+ {
+ INCREMENT_ERR_HISTORY(ERRH_PSTATE_CHANGE_IGNORED);
+ L_current_timeout_recorded = TRUE;
+ }
}
}
}
- // else pstates not allowed yet
-
- if(err)
+ else
{
- // commit error log
- REQUEST_RESET(err);
+ // no error, clear the error wait count
+ L_ignore_wait_count = 0;
+ L_current_timeout_recorded = FALSE;
}
- }
-
+ } // else not in a state transition
return;
}
diff --git a/src/occ_405/state.c b/src/occ_405/state.c
index 4c0e169..ea8219a 100755
--- a/src/occ_405/state.c
+++ b/src/occ_405/state.c
@@ -224,7 +224,8 @@ errlHndl_t SMGR_standby_to_characterization()
if(rc)
{
- TRAC_ERR("SMGR: failed to set pstate clips.");
+ TRAC_ERR("SMGR: failed to set pstate clips. rc[0x%08X] OCCFLG[0x%08X]",
+ rc, in32(OCB_OCCFLG));
break;
}
else // successfully set clips; enable pstates, then start transition
@@ -235,7 +236,8 @@ errlHndl_t SMGR_standby_to_characterization()
if(rc)
{
- TRAC_ERR("SMGR: failed to start the pstate protocol for char owner on PGPE.");
+ TRAC_ERR("SMGR: failed to start the pstate protocol for char owner on PGPE. rc[0x%08X] OCCFLG[0x%08X]",
+ rc, in32(OCB_OCCFLG));
break;
}
else // Clips set and pstates started successfully, start transition
@@ -318,7 +320,8 @@ errlHndl_t SMGR_all_to_standby()
// check for timeout while waiting for pgpe_start_suspend() IPC completion
if(wait_time >= WAIT_PGPE_TASK_TIMEOUT)
{
- TRAC_ERR("SMGR_all_to_standby: Timeout waiting for Pstates start/suspend IPC task");
+ TRAC_ERR("SMGR_all_to_standby: Timeout waiting for Pstates start/suspend IPC task. OCCFLG[0x%08X]",
+ in32(OCB_OCCFLG));
}
// Stop Pstates if enabled
else if(G_proc_pstate_status == PSTATES_ENABLED)
@@ -326,7 +329,8 @@ errlHndl_t SMGR_all_to_standby()
rc = pgpe_start_suspend(PGPE_ACTION_PSTATE_STOP, G_proc_pmcr_owner);
if(rc)
{
- TRAC_ERR("SMGR_all_to_standby: Failed to stop the pstate protocol on PGPE. rc[%08X]", rc);
+ TRAC_ERR("SMGR_all_to_standby: Failed to stop the pstate protocol on PGPE. rc[0x%08X] OCCFLG[0x%08X]",
+ rc, in32(OCB_OCCFLG));
}
}
@@ -372,7 +376,8 @@ errlHndl_t SMGR_characterization_to_observation()
rc = pgpe_set_clip_blocking(l_pstate);
if(rc)
{
- TRAC_ERR("SMGR_char_to_obs: failed to set pstate clip to legacy turbo rc[%08X]", rc);
+ TRAC_ERR("SMGR_char_to_obs: failed to set pstate clip to legacy turbo rc[%08X] OCCFLG[0x%08X]",
+ rc, in32(OCB_OCCFLG));
break;
}
else // clips set to legacy turbo; stop pstate protocol
@@ -380,7 +385,8 @@ errlHndl_t SMGR_characterization_to_observation()
rc = pgpe_start_suspend(PGPE_ACTION_PSTATE_STOP, G_proc_pmcr_owner);
if(rc)
{
- TRAC_ERR("SMGR_char_to_obs: Failed to stop pstate protocol rc[%08X]", rc);
+ TRAC_ERR("SMGR_char_to_obs: Failed to stop pstate protocol rc[%08X] OCCFLG[0x%08X]",
+ rc, in32(OCB_OCCFLG));
break;
}
else // Clips tightened successfully, and pstates disabled: perform transition
@@ -452,13 +458,15 @@ errlHndl_t SMGR_observation_to_characterization()
"since OCC is not active ready.");
break;
}
+
// set pstate clips
l_pstate = proc_freq2pstate(G_proc_fmax_mhz);
rc = pgpe_set_clip_blocking(l_pstate);
if(rc)
{
- TRAC_ERR("SMGR_obs_to_char: failed to set pstate clips rc[%08X]", rc);
+ TRAC_ERR("SMGR_obs_to_char: failed to set pstate clips rc[0x%08X] OCCFLG[0x%08X]",
+ rc, in32(OCB_OCCFLG));
break;
}
else // successfully set clips; enable pstates, then start transition
@@ -468,7 +476,8 @@ errlHndl_t SMGR_observation_to_characterization()
if(rc)
{
- TRAC_ERR("SMGR_obs_to_char: failed to start pstate protocol rc[%08X]", rc);
+ TRAC_ERR("SMGR_obs_to_char: failed to start pstate protocol rc[0x%08X] OCCFLG[0x%08X]",
+ rc, in32(OCB_OCCFLG));
break;
}
else // Clips set successfully and pstates enabled; complete transition
@@ -527,6 +536,7 @@ errlHndl_t SMGR_observation_to_active()
static bool L_error_logged = FALSE; // To prevent trace and error log happened over and over
int l_extRc = OCC_NO_EXTENDED_RC;
int l_rc = 0;
+ uint32_t l_user_data = 0;
Pstate l_pstate;
// clear mnfg quad pstate request to default OCC to control all quads
@@ -559,7 +569,20 @@ errlHndl_t SMGR_observation_to_active()
if(l_rc)
{
- TRAC_ERR("SMGR_obs_to_active: Set Pstate clips failed rc[%08X]", l_rc);
+ /* @
+ * @errortype
+ * @moduleid MAIN_STATE_TRANSITION_MID
+ * @reasoncode INTERNAL_FAILURE
+ * @userdata1 OCB_OCCFLG
+ * @userdata2 l_rc
+ * @userdata4 ERC_PGPE_CLIP_FAILURE
+ * @devdesc Failure seting Pstate clips on observation to active transition
+ */
+
+ l_extRc = ERC_PGPE_CLIP_FAILURE;
+ l_user_data = in32(OCB_OCCFLG);
+ TRAC_ERR("SMGR_obs_to_active: Set Pstate clips failed rc[0x%08X] OCCFLG[0x%08X]",
+ l_rc, l_user_data);
break;
}
else // Clips set with no errors, enable Pstates on PGPE
@@ -609,7 +632,20 @@ errlHndl_t SMGR_observation_to_active()
if(l_rc)
{
- TRAC_ERR("SMGR_obs_to_active: Failed to start pstate protocol rc[%08X]", l_rc);
+ /* @
+ * @errortype
+ * @moduleid MAIN_STATE_TRANSITION_MID
+ * @reasoncode INTERNAL_FAILURE
+ * @userdata1 OCB_OCCFLG
+ * @userdata2 l_rc
+ * @userdata4 ERC_PGPE_START_SUSPEND_FAILURE
+ * @devdesc Failure enabling pstates on observation to active transition
+ */
+
+ l_extRc = ERC_PGPE_START_SUSPEND_FAILURE;
+ l_user_data = in32(OCB_OCCFLG);
+ TRAC_ERR("SMGR_obs_to_active: Failed to start pstate protocol rc[0x%08X] OCCFLG[0x%08X]",
+ l_rc, l_user_data);
break;
}
}
@@ -621,15 +657,24 @@ errlHndl_t SMGR_observation_to_active()
{
if ((ssx_timebase_get() - start) > timeout)
{
+ /* @
+ * @errortype
+ * @moduleid MAIN_STATE_TRANSITION_MID
+ * @reasoncode INTERNAL_FAILURE
+ * @userdata1 OCB_OCCFLG
+ * @userdata2 l_rc
+ * @userdata4 ERC_GENERIC_TIMEOUT
+ * @devdesc Timeout waiting for pstate enable on observation to active transition
+ */
+
l_rc = 1;
+ l_extRc = ERC_GENERIC_TIMEOUT;
+ l_user_data = in32(OCB_OCCFLG);
if(FALSE == L_error_logged)
{
TRAC_ERR("SMGR_obs_to_active: Timeout waiting for Pstates to be enabled, "
- "chips_present[%02x], Cores Present [%08x]",
- G_sysConfigData.is_occ_present,
- (uint32_t) ((in64(OCB_CCSR)) >> 32));
+ "OCCFLG[0x%08X]", l_user_data);
}
- l_extRc = ERC_GENERIC_TIMEOUT;
break;
}
ssx_sleep(SSX_MICROSECONDS(10));
@@ -660,35 +705,6 @@ errlHndl_t SMGR_observation_to_active()
{
TRAC_ERR("SMGR: Observation to Active Transition Failed, because pstates are not enabled");
}
-
- if(l_rc && FALSE == L_error_logged)
- {
- L_error_logged = TRUE;
- /* @
- * @errortype
- * @moduleid MAIN_STATE_TRANSITION_MID
- * @reasoncode INTERNAL_FAILURE
- * @userdata1 SMGR_MASK_ACTIVE_READY
- * @userdata2 valid states
- * @userdata4 ERC_GENERIC_TIMEOUT
- * @devdesc Failed changing from observation to active
- */
- l_errlHndl = createErrl(MAIN_STATE_TRANSITION_MID, //modId
- INTERNAL_FAILURE, //reasoncode
- l_extRc, //Extended reason code
- ERRL_SEV_UNRECOVERABLE, //Severity
- NULL, //Trace Buf
- DEFAULT_TRACE_SIZE, //Trace Size
- SMGR_MASK_ACTIVE_READY, //userdata1
- SMGR_validate_get_valid_states());//userdata2
-
- // Callout firmware
- addCalloutToErrl(l_errlHndl,
- ERRL_CALLOUT_TYPE_COMPONENT_ID,
- ERRL_COMPONENT_ID_FIRMWARE,
- ERRL_CALLOUT_PRIORITY_HIGH);
- }
-
}
else // We have no cores configured
{
@@ -705,6 +721,19 @@ errlHndl_t SMGR_observation_to_active()
} // Active Ready
else
{
+ /* @
+ * @errortype
+ * @moduleid MAIN_STATE_TRANSITION_MID
+ * @reasoncode INTERNAL_FAILURE
+ * @userdata1 SMGR_MASK_ACTIVE_READY
+ * @userdata2 l_rc
+ * @userdata4 ERC_STATE_FROM_OBS_TO_ACT_FAILURE
+ * @devdesc Failed changing from observation to active due to OCC not ready
+ */
+
+ l_rc = 2;
+ l_extRc = ERC_STATE_FROM_OBS_TO_ACT_FAILURE;
+ l_user_data = SMGR_MASK_ACTIVE_READY;
TRAC_ERR("SMGR: Observation to Active Transition Failed, "
"OCC is not Active Ready cnfgdata=0x%08x, reqd=0x%08x",
DATA_get_present_cnfgdata(),
@@ -712,6 +741,24 @@ errlHndl_t SMGR_observation_to_active()
}
} while (0);
+ if(l_rc && (FALSE == L_error_logged))
+ {
+ L_error_logged = TRUE;
+ l_errlHndl = createErrl(MAIN_STATE_TRANSITION_MID, //modId
+ INTERNAL_FAILURE, //reasoncode
+ l_extRc, //Extended reason code
+ ERRL_SEV_UNRECOVERABLE, //Severity
+ NULL, //Trace Buf
+ DEFAULT_TRACE_SIZE, //Trace Size
+ l_user_data, //userdata1
+ l_rc); //userdata2
+
+ // Callout firmware
+ addCalloutToErrl(l_errlHndl,
+ ERRL_CALLOUT_TYPE_COMPONENT_ID,
+ ERRL_COMPONENT_ID_FIRMWARE,
+ ERRL_CALLOUT_PRIORITY_HIGH);
+ }
return l_errlHndl;
}
@@ -746,7 +793,8 @@ errlHndl_t SMGR_characterization_to_active()
}
if(rc)
{
- TRAC_ERR("SMGR_char_to_active: Failed to change PMCR ownership rc[%08X]", rc);
+ TRAC_ERR("SMGR_char_to_active: Failed to change PMCR ownership rc[0x%08X] OCCFLG[0x%08X]",
+ rc, in32(OCB_OCCFLG));
break;
}
@@ -762,7 +810,8 @@ errlHndl_t SMGR_characterization_to_active()
if ((ssx_timebase_get() - start) > timeout)
{
rc = 1;
- TRAC_ERR("SMGR_char_to_active: Timeout waiting for PMCR ownership change");
+ TRAC_ERR("SMGR_char_to_active: Timeout waiting for PMCR ownership change. OCCFLG[0x%08X]",
+ in32(OCB_OCCFLG));
break;
}
ssx_sleep(SSX_MICROSECONDS(10));
@@ -838,6 +887,7 @@ errlHndl_t SMGR_characterization_to_active()
errlHndl_t SMGR_active_to_observation()
{
int rc = 0;
+ enum occExtReasonCode ext_rc = OCC_NO_EXTENDED_RC;
errlHndl_t l_errlHndl = NULL;
uint8_t wait_time = 0;
@@ -864,7 +914,8 @@ errlHndl_t SMGR_active_to_observation()
// check for timeout while waiting for pgpe_start_suspend() IPC completion
if(wait_time > WAIT_PGPE_TASK_TIMEOUT)
{
- TRAC_ERR("SMGR_act_to_obs: Timeout waiting for G_active_to_observation_ready flag.");
+ TRAC_ERR("SMGR_act_to_obs: Timeout waiting for G_active_to_observation_ready flag. OCCFLG[0x%08X]",
+ in32(OCB_OCCFLG));
/* @
* @errortype
@@ -874,21 +925,7 @@ errlHndl_t SMGR_active_to_observation()
* @userdata4 ERC_PGPE_ACTIVE_TO_OBSERVATION_TIMEOUT
* @devdesc timeout waiting for pstates start/suspend task
*/
- l_errlHndl = createErrl(MAIN_STATE_TRANSITION_MID, //modId
- PGPE_FAILURE, //reasoncode
- ERC_PGPE_ACTIVE_TO_OBSERVATION_TIMEOUT, //Extended reason code
- ERRL_SEV_UNRECOVERABLE, //Severity
- NULL, //Trace Buf
- DEFAULT_TRACE_SIZE, //Trace Size
- wait_time, //userdata1
- 0); //userdata2
-
- // Callout firmware
- addCalloutToErrl(l_errlHndl,
- ERRL_CALLOUT_TYPE_COMPONENT_ID,
- ERRL_COMPONENT_ID_FIRMWARE,
- ERRL_CALLOUT_PRIORITY_HIGH);
-
+ ext_rc = ERC_PGPE_ACTIVE_TO_OBSERVATION_TIMEOUT;
rc = PGPE_FAILURE;
break;
}
@@ -912,7 +949,8 @@ errlHndl_t SMGR_active_to_observation()
// check for timeout while waiting for Pstate clips IPC completion
if(wait_time > WAIT_PGPE_TASK_TIMEOUT)
{
- TRAC_ERR("SMGR_act_to_obs: Timeout waiting for clip update IPC task");
+ TRAC_ERR("SMGR_act_to_obs: Timeout waiting for clip update IPC task OCCFLG[0x%08X]",
+ in32(OCB_OCCFLG));
/* @
* @errortype
@@ -922,21 +960,7 @@ errlHndl_t SMGR_active_to_observation()
* @userdata4 ERC_PGPE_TASK_TIMEOUT
* @devdesc timeout waiting for pstates start/suspend task
*/
- l_errlHndl = createErrl(MAIN_STATE_TRANSITION_MID, //modId
- PGPE_FAILURE, //reasoncode
- ERC_PGPE_TASK_TIMEOUT, //Extended reason code
- ERRL_SEV_UNRECOVERABLE, //Severity
- NULL, //Trace Buf
- DEFAULT_TRACE_SIZE, //Trace Size
- wait_time, //userdata1
- 0); //userdata2
-
- // Callout firmware
- addCalloutToErrl(l_errlHndl,
- ERRL_CALLOUT_TYPE_COMPONENT_ID,
- ERRL_COMPONENT_ID_FIRMWARE,
- ERRL_CALLOUT_PRIORITY_HIGH);
-
+ ext_rc = ERC_PGPE_TASK_TIMEOUT;
rc = PGPE_FAILURE;
break;
}
@@ -944,7 +968,10 @@ errlHndl_t SMGR_active_to_observation()
rc = pgpe_start_suspend(PGPE_ACTION_PSTATE_STOP, G_proc_pmcr_owner);
if(rc)
{
- TRAC_ERR("SMGR_act_to_obs: failed to stop the pstate protocol on PGPE.");
+ TRAC_ERR("SMGR_act_to_obs: failed to stop the pstate protocol on PGPE rc[0x%08X] OCCFLG[0x%08X]",
+ rc, in32(OCB_OCCFLG));
+ ext_rc = ERC_PGPE_TASK_TIMEOUT;
+ rc = PGPE_FAILURE;
break;
}
else // Pstates Disabled and clips set successfully, perform state transition
@@ -968,6 +995,20 @@ errlHndl_t SMGR_active_to_observation()
if(rc)
{
TRAC_ERR("SMGR: Failed with rc = %d to switch to Observation state", rc);
+ l_errlHndl = createErrl(MAIN_STATE_TRANSITION_MID, //modId
+ rc, //reasoncode
+ ext_rc, //Extended reason code
+ ERRL_SEV_UNRECOVERABLE, //Severity
+ NULL, //Trace Buf
+ DEFAULT_TRACE_SIZE, //Trace Size
+ wait_time, //userdata1
+ 0); //userdata2
+
+ // Callout firmware
+ addCalloutToErrl(l_errlHndl,
+ ERRL_CALLOUT_TYPE_COMPONENT_ID,
+ ERRL_COMPONENT_ID_FIRMWARE,
+ ERRL_CALLOUT_PRIORITY_HIGH);
}
else
{
@@ -1003,7 +1044,8 @@ errlHndl_t SMGR_active_to_characterization()
if(rc)
{
- TRAC_ERR("SMGR_act_to_char: failed to set pstate clips.");
+ TRAC_ERR("SMGR_act_to_char: failed to set pstate clips. rc[0x%08X] OCCFLG[0x%08X]",
+ rc, in32(OCB_OCCFLG));
break;
}
else // clips set successfully, keep pstates enabled, but change ownership
@@ -1012,7 +1054,8 @@ errlHndl_t SMGR_active_to_characterization()
if(rc)
{
- TRAC_ERR("SMGR: failed to change PMCR ownership.");
+ TRAC_ERR("SMGR: failed to change PMCR ownership. rc[0x%08X] OCCFLG[0x%08X]",
+ rc, in32(OCB_OCCFLG));
break;
}
else // Request successfully scheduled on PGPE now verify it completed
@@ -1025,7 +1068,8 @@ errlHndl_t SMGR_active_to_characterization()
if ((ssx_timebase_get() - start) > timeout)
{
rc = 1;
- TRAC_ERR("SMGR_active_to_char: Timeout waiting for PMCR ownership change");
+ TRAC_ERR("SMGR_active_to_char: Timeout waiting for PMCR ownership change. rc[0x%08X] OCCFLG[0x%08X]",
+ rc, in32(OCB_OCCFLG));
break;
}
ssx_sleep(SSX_MICROSECONDS(10));
diff --git a/src/occ_405/wof/wof.c b/src/occ_405/wof/wof.c
index f6d1050..9d58239 100644
--- a/src/occ_405/wof/wof.c
+++ b/src/occ_405/wof/wof.c
@@ -36,6 +36,7 @@
#include <wof.h>
#include <amec_freq.h>
#include <pgpe_interface.h>
+#include "common.h" // For ignore_pgpe_error()
//******************************************************************************
// External Globals
//******************************************************************************
@@ -132,6 +133,11 @@ void call_wof_main( void )
// Variable to ensure we do not keep trying to send the wof control
static bool L_wof_control_last_chance = false;
+ // Variable to keep track of logging timeouts being ignored
+ // Since WOF runs every 4ms we have already waited more than the required 1ms for PGPE
+ // to set the bit to give ignore indication so no additional timer needed before checking
+ static bool L_current_timeout_recorded = false;
+
// Variable to keep track of PState enablement to prevent setting/clearing
// wof_disabled bit every iteration.
static uint8_t L_pstate_protocol_off = 0;
@@ -222,19 +228,29 @@ void call_wof_main( void )
if( (!async_request_is_idle(&G_wof_vfrt_req.request)) ||
(g_wof->vfrt_state != STANDBY) )
{
- if( L_vfrt_last_chance == 0 )
+ if( (L_vfrt_last_chance == 0) && (!ignore_pgpe_error()) )
{
INTR_TRAC_ERR("WOF Disabled!"
" Init VFRT request timeout");
set_clear_wof_disabled( SET, WOF_RC_VFRT_REQ_TIMEOUT);
}
- else
+ else if(L_vfrt_last_chance != 0)
{
INTR_TRAC_INFO("initial VFRT NOT idle."
" %d more chance(s)",
L_vfrt_last_chance );
L_vfrt_last_chance--;
}
+ else
+ {
+ // Wait forever for PGPE to respond
+ // Put a mark on the wall so we know we hit this state
+ if(!L_current_timeout_recorded)
+ {
+ INCREMENT_ERR_HISTORY(ERRH_VFRT_TIMEOUT_IGNORED);
+ L_current_timeout_recorded = TRUE;
+ }
+ }
}
break;
@@ -247,23 +263,36 @@ void call_wof_main( void )
enable_success = enable_wof();
if( !enable_success )
{
- if( L_wof_control_last_chance )
+ // Treat as an error only if not currently ignoring PGPE failures
+ if( L_wof_control_last_chance && (!ignore_pgpe_error()) )
{
INTR_TRAC_ERR("WOF Disabled! Control req timeout(1)");
set_clear_wof_disabled(SET, WOF_RC_CONTROL_REQ_TIMEOUT);
}
- else
+ else if(!L_wof_control_last_chance)
{
INTR_TRAC_ERR("One more chance for WOF "
"control request(1)");
L_wof_control_last_chance = true;
}
+ else
+ {
+ // Wait forever for PGPE to respond
+ // Put a mark on the wall so we know we hit this state
+ if(!L_current_timeout_recorded)
+ {
+ INCREMENT_ERR_HISTORY(ERRH_WOF_CONTROL_TIMEOUT_IGNORED);
+ L_current_timeout_recorded = TRUE;
+ }
+ }
}
else
{
// Reset the last chance variable
// Init state updated in enable_wof
L_wof_control_last_chance = false;
+
+ L_current_timeout_recorded = FALSE;
}
break;
@@ -271,17 +300,32 @@ void call_wof_main( void )
// check if request is still processing.
if( !async_request_is_idle(&G_wof_control_req.request) )
{
- if( L_wof_control_last_chance )
+ // Treat as an error only if not currently ignoring PGPE failures
+ if( L_wof_control_last_chance && (!ignore_pgpe_error()) )
{
INTR_TRAC_ERR("WOF Disabled! Control req timeout(2)");
set_clear_wof_disabled(SET, WOF_RC_CONTROL_REQ_TIMEOUT);
}
- else
+ else if(!L_wof_control_last_chance)
{
INTR_TRAC_ERR("One more chance for WOF "
"control request(2)");
L_wof_control_last_chance = true;
}
+ else
+ {
+ // Wait forever for PGPE to respond
+ // Put a mark on the wall so we know we hit this state
+ if(!L_current_timeout_recorded)
+ {
+ INCREMENT_ERR_HISTORY(ERRH_WOF_CONTROL_TIMEOUT_IGNORED);
+ L_current_timeout_recorded = TRUE;
+ }
+ }
+ }
+ else
+ {
+ L_current_timeout_recorded = FALSE;
}
// Init state updated in wof_control_callback
break;
@@ -303,8 +347,22 @@ void call_wof_main( void )
{
if( L_vfrt_last_chance == 0 )
{
- INTR_TRAC_ERR("WOF Disabled! VFRT req timeout");
- set_clear_wof_disabled(SET,WOF_RC_VFRT_REQ_TIMEOUT);
+ // Treat as an error only if not currently ignoring PGPE failures
+ if(!ignore_pgpe_error())
+ {
+ INTR_TRAC_ERR("WOF Disabled! VFRT req timeout");
+ set_clear_wof_disabled(SET,WOF_RC_VFRT_REQ_TIMEOUT);
+ }
+ else
+ {
+ // Wait forever for PGPE to respond
+ // Put a mark on the wall so we know we hit this state
+ if(!L_current_timeout_recorded)
+ {
+ INCREMENT_ERR_HISTORY(ERRH_VFRT_TIMEOUT_IGNORED);
+ L_current_timeout_recorded = TRUE;
+ }
+ }
}
else
{
@@ -315,6 +373,8 @@ void call_wof_main( void )
}
else
{
+ L_current_timeout_recorded = FALSE;
+
// Request is idle. Run wof algorithm
wof_main();
OpenPOWER on IntegriCloud