diff options
author | Stephan Broyles <sbroyles@us.ibm.com> | 2014-11-05 19:09:37 -0600 |
---|---|---|
committer | Stephan Broyles <sbroyles@us.ibm.com> | 2014-11-05 19:22:32 -0600 |
commit | 9976c207cdb20871880bd2f4cf123cf4cb6a8b0f (patch) | |
tree | 1cf9ed8f23085e6fe3e0e6046fc30dcb7e02ccf2 /src/occ/cent | |
parent | 2f8ce357b89d361b5091d88aea91416011b73ccb (diff) | |
download | talos-occ-9976c207cdb20871880bd2f4cf123cf4cb6a8b0f.tar.gz talos-occ-9976c207cdb20871880bd2f4cf123cf4cb6a8b0f.zip |
Added remaining occ files.
Change-Id: I91a748d3dcf3161a6a3eedcb376fcaf1e4dfe655
Diffstat (limited to 'src/occ/cent')
-rwxr-xr-x | src/occ/cent/centaur_control.c | 698 | ||||
-rwxr-xr-x | src/occ/cent/centaur_control.h | 100 | ||||
-rwxr-xr-x | src/occ/cent/centaur_data.c | 1194 | ||||
-rwxr-xr-x | src/occ/cent/centaur_data.h | 180 | ||||
-rwxr-xr-x | src/occ/cent/centaur_data_service_codes.h | 64 |
5 files changed, 2236 insertions, 0 deletions
diff --git a/src/occ/cent/centaur_control.c b/src/occ/cent/centaur_control.c new file mode 100755 index 0000000..1a154fd --- /dev/null +++ b/src/occ/cent/centaur_control.c @@ -0,0 +1,698 @@ +/****************************************************************************** +// @file centaur_control.c +// @brief Control of Centaur Chips/Procedures/Data. +*/ +/****************************************************************************** + * + * @page ChangeLogs Change Logs + * @section _centaur_control_c centaur_control.c + * @verbatim + * + * Flag Def/Fea Userid Date Description + * ------- ---------- -------- ---------- ---------------------------------- + * @th031 878471 thallet 04/15/2013 Centaur Throttles + * @th045 893135 thallet 07/26/2013 Updated for new Centaur Procedures + * @gm006 SW224414 milesg 09/16/2013 Reset and FFDC improvements + * @rt001 901927 tapiar 10/03/2013 Fix src tags + * @gm016 909061 milesg 12/10/2013 Support memory throttling due to temperature + * @gm017 909636 milesg 12/17/2013 Changes from mem throttle review + * @gm023 913865 milesg 01/31/2014 Centaur GPE times out + * @wb001 919163 wilbryan 03/06/2014 Updating error call outs, descriptions, and severities + * + * @endverbatim + * + *///*************************************************************************/ + +//************************************************************************* +// Includes +//************************************************************************* +#include "centaur_control.h" +#include "centaur_data.h" +#include "pgp_async.h" +#include "threadSch.h" +#include "pmc_register_addresses.h" +#include "centaur_data_service_codes.h" +#include "occ_service_codes.h" +#include "errl.h" +#include "trac.h" +#include "rtls.h" +#include "apss.h" +#include "state.h" +#include "gpe_scom.h" +#include "centaur_firmware_registers.h" +#include "centaur_register_addresses.h" +#include "amec_sys.h" + +//************************************************************************* +// Externs +//************************************************************************* + +//************************************************************************* +// Macros +//************************************************************************* + +//************************************************************************* +// Defines/Enums +//************************************************************************* + +// Used for Centaur Initialization of Registers +typedef enum +{ + NM_THROTTLE_MBA01 = 0, + NM_THROTTLE_MBA23 = 1, + MBS_THROTTLE_SYNC = 2, + NUM_CENT_THROTTLE_SCOMS = 3, +} eCentaurThrottleRegs; + + +//************************************************************************* +// Structures +//************************************************************************* + +//************************************************************************* +// Globals +//************************************************************************* + +//Pore flex request for the GPE job that is used for centaur init. +PoreFlex G_centaur_control_pore_req; +GPE_BUFFER(GpeScomParms G_centaur_control_reg_parms); +GPE_BUFFER(scomList_t G_centaurThrottle[NUM_CENT_THROTTLE_SCOMS]); + +//Centaur structures used for task data pointers. +centaur_control_task_t G_centaur_control_task = +{ + .startCentaur = 0, // First Centaur + .prevCentaur = 7, // Previous Centaur written to + .curCentaur = 0, // Current Centaur + .endCentaur = 7, // Last Centaur + .traceThresholdFlags = 0, // Trace Throttle Flags +}; + +// Per-MBA N/M Throttle Register "M & N" Values +centaur_throttle_t G_centaurThrottleLimits[MAX_NUM_CENTAURS][NUM_MBAS_PER_CENTAUR]; + +//bitmap of configured MBA's (2 per centaur, lsb is centaur 0/mba 0) +uint16_t G_configured_mbas = 0; + +//************************************************************************* +// Function Prototypes +//************************************************************************* + +//************************************************************************* +// Functions +//************************************************************************* + +//gm016 +////////////////////////// +// Function Specification +// +// Name: centaurThrottle_convert2Numerator +// +// Description: Converts a throttle percentage into an 'N' value that can +// be written to the hardware. +// +// +// Flow: ???? FN= ???? +// +// Thread: RTL +// +// End Function Specification +uint16_t centaurThrottle_convert2Numerator(uint16_t i_throttle, uint8_t i_cent, uint8_t i_mba) +{ +#define CENTAUR_THROTTLE_100_PERCENT_VALUE 1000 + + uint32_t l_nvalue = 0; + centaur_throttle_t* l_mba = &G_centaurThrottleLimits[i_cent][i_mba]; + + if(MBA_CONFIGURED(i_cent, i_mba)) + { + // Convert the throttle ( actually in units of 0.1 %) to a "N" value + l_nvalue = (l_mba->max_n_per_mba * i_throttle) / + CENTAUR_THROTTLE_100_PERCENT_VALUE; + + //Clip to per-mba min and max values + if(l_nvalue < l_mba->min_n_per_mba) + { + l_nvalue = l_mba->min_n_per_mba; + } + if(l_nvalue > l_mba->max_n_per_mba) + { + l_nvalue = l_mba->max_n_per_mba; + } + } + + return (uint16_t)l_nvalue; +} + +////////////////////////// +// Function Specification +// +// Name: cent_update_nlimits +// +// Description: Updates the memory throttle settings for; +// 1) new settings from FSP +// 2) change to/from TURBO or DPS mode +// 3) enter/exit oversubscription +// +// +// Flow: ???? FN= ???? +// +// Thread: RTL +// +// End Function Specification +#define CENT_TRACE_THROTTLE_DELAY 8 +void cent_update_nlimits(uint32_t i_cent) +{ + /*------------------------------------------------------------------------*/ + /* Local Variables */ + /*------------------------------------------------------------------------*/ + static uint32_t L_trace_throttle_count = 0; + uint16_t l_mba01_mba_maxn, l_mba01_chip_maxn, l_mba23_mba_maxn, l_mba23_chip_maxn; + /*------------------------------------------------------------------------*/ + /* Code */ + /*------------------------------------------------------------------------*/ + + do + { + centaur_throttle_t* l_active_limits01 = + &G_centaurThrottleLimits[i_cent][0]; + centaur_throttle_t* l_active_limits23 = + &G_centaurThrottleLimits[i_cent][1]; + mem_throt_config_data_t* l_state_limits01 = + &G_sysConfigData.mem_throt_limits[i_cent][0]; + mem_throt_config_data_t* l_state_limits23 = + &G_sysConfigData.mem_throt_limits[i_cent][1]; + + //Minimum N value is not state dependent + l_active_limits01->min_n_per_mba = l_state_limits01->min_ot_n_per_mba; + l_active_limits23->min_n_per_mba = l_state_limits23->min_ot_n_per_mba; + + //oversubscription? + if(AMEC_INTF_GET_OVERSUBSCRIPTION()) + { + l_mba01_mba_maxn = l_state_limits01->ovs_n_per_mba; + l_mba01_chip_maxn = l_state_limits01->ovs_n_per_chip; + l_mba23_mba_maxn = l_state_limits23->ovs_n_per_mba; + l_mba23_chip_maxn = l_state_limits23->ovs_n_per_chip; + } + else if(CURRENT_MODE() == OCC_MODE_NOMINAL) + { + l_mba01_mba_maxn = l_state_limits01->nom_n_per_mba; + l_mba01_chip_maxn = l_state_limits01->nom_n_per_chip; + l_mba23_mba_maxn = l_state_limits23->nom_n_per_mba; + l_mba23_chip_maxn = l_state_limits23->nom_n_per_chip; + } + else //DPS, TURBO, FFO, and SPS modes will use these settings + { + l_mba01_mba_maxn = l_state_limits01->turbo_n_per_mba; + l_mba01_chip_maxn = l_state_limits01->turbo_n_per_chip; + l_mba23_mba_maxn = l_state_limits23->turbo_n_per_mba; + l_mba23_chip_maxn = l_state_limits23->turbo_n_per_chip; + } + + l_active_limits01->max_n_per_chip = l_mba01_chip_maxn; + l_active_limits23->max_n_per_chip = l_mba23_chip_maxn; + + //Trace when the MBA max N value changes + if((l_mba01_mba_maxn != l_active_limits01->max_n_per_mba) || + (l_mba23_mba_maxn != l_active_limits23->max_n_per_mba)) + { + l_active_limits01->max_n_per_mba = l_mba01_mba_maxn; + l_active_limits23->max_n_per_mba = l_mba23_mba_maxn; + + //Don't trace every MBA changing, just one + if(!L_trace_throttle_count) + { + L_trace_throttle_count = CENT_TRACE_THROTTLE_DELAY; + TRAC_IMP("New MBA throttle max|min N values: mba01[0x%08x], mba23[0x%08x]", + (uint32_t)((l_mba01_mba_maxn << 16) | l_active_limits01->min_n_per_mba), + (uint32_t)((l_mba23_mba_maxn << 16) | l_active_limits23->min_n_per_mba)); + break; + } + } + + if(L_trace_throttle_count) + { + L_trace_throttle_count--; + } + + }while(0); +} + + +// Function Specification +// +// Name: task_centaur_control +// +// Description: Collect centaur data. The task is used for centaur data +// collection +// +// Flow: 02/27/13 FN=task_centaur_control +// +// End Function Specification +#define CENTAUR_CONTROL_SCOM_TIMEOUT 16 //wait up to 16 ticks before logging timeout failure +void task_centaur_control( task_t * i_task ) +{ + errlHndl_t l_err = NULL; // Error handler + int rc = 0; // Return code + uint32_t l_cent; + amec_centaur_t *l_cent_ptr = NULL; + static uint8_t L_scom_timeout[MAX_NUM_CENTAURS] = {0}; //track # of consecutive failures + static bool L_gpe_scheduled = FALSE; + static uint8_t L_gpe_fail_logged = 0; + static bool L_gpe_idle_traced = FALSE; + static bool L_gpe_had_1_tick = FALSE; + + // Pointer to the task data structure + centaur_control_task_t * l_centControlTask = + (centaur_control_task_t *) i_task->data_ptr; + + + // Pointer to parameter field for GPE request + GpeScomParms * l_parms = + (GpeScomParms *)(l_centControlTask->gpe_req.parameter); + + do + { + l_cent = l_centControlTask->curCentaur; + l_cent_ptr = &g_amec->proc[0].memctl[l_cent].centaur; + + //First, check to see if the previous GPE request still running + //A request is considered idle if it is not attached to any of the + //asynchronous request queues + if( !(async_request_is_idle(&l_centControlTask->gpe_req.request)) ) + { + L_scom_timeout[l_cent]++; + //This can happen due to variability in when the task runs + if(!L_gpe_idle_traced && L_gpe_had_1_tick) + { + TRAC_INFO("task_centaur_control: GPE is still running. cent[%d]", l_cent); + l_centControlTask->traceThresholdFlags |= CENTAUR_CONTROL_GPE_STILL_RUNNING; + L_gpe_idle_traced = TRUE; + } + L_gpe_had_1_tick = TRUE; + break; + } + else + { + //Request is idle + L_gpe_had_1_tick = FALSE; + if(L_gpe_idle_traced) + { + TRAC_INFO("task_centaur_control: GPE completed. cent[%d]", l_cent); + L_gpe_idle_traced = FALSE; + } + } + + //check scom status + if(L_gpe_scheduled) + { + if(!async_request_completed(&l_centControlTask->gpe_req.request) || l_parms->rc) + { + + if(!(L_gpe_fail_logged & (CENTAUR0_PRESENT_MASK >> l_cent))) + { + L_gpe_fail_logged |= CENTAUR0_PRESENT_MASK >> l_cent; + TRAC_ERR("task_centaur_control: gpe_scom_centaur failed. l_cent=%d rc=%x, index=0x%08x", l_cent, l_parms->rc, l_parms->errorIndex); + + /* @ + * @errortype + * @moduleid CENT_TASK_CONTROL_MOD + * @reasoncode CENT_SCOM_ERROR + * @userdata1 rc - Return code of scom operation + * @userdata2 index of scom operation that failed + * @userdata4 OCC_NO_EXTENDED_RC + * @devdesc OCC access to centaur failed + */ + l_err = createErrl( + CENT_TASK_CONTROL_MOD, // modId + CENT_SCOM_ERROR, // reasoncode + OCC_NO_EXTENDED_RC, // Extended reason code + ERRL_SEV_PREDICTIVE, // Severity + NULL, // Trace Buf + DEFAULT_TRACE_SIZE, // Trace Size + l_parms->rc, // userdata1 + l_parms->errorIndex // userdata2 + ); + + addUsrDtlsToErrl(l_err, //io_err + (uint8_t *) &(l_centControlTask->gpe_req.ffdc), //i_dataPtr, + sizeof(PoreFfdc), //i_size + ERRL_USR_DTL_STRUCT_VERSION_1, //version + ERRL_USR_DTL_BINARY_DATA); //type + + //callout the centaur + addCalloutToErrl(l_err, + ERRL_CALLOUT_TYPE_HUID, + G_sysConfigData.centaur_huids[l_cent], + ERRL_CALLOUT_PRIORITY_MED); + + //callout the processor + addCalloutToErrl(l_err, + ERRL_CALLOUT_TYPE_HUID, + G_sysConfigData.proc_huid, + ERRL_CALLOUT_PRIORITY_MED); + + commitErrl(&l_err); + }//if(l_gpe_fail_logged & (CENTAUR0_PRESENT_MASK >> l_cent)) + + //Request failed. Keep count of failures and request a reset if we reach a + //max retry count + L_scom_timeout[l_cent]++; + if(L_scom_timeout[l_cent] == CENTAUR_CONTROL_SCOM_TIMEOUT) + { + break; + } + + }//if(!async_request_completed(&l_centControlTask->gpe_req.request) || l_parms->rc) + else + { + //request completed successfully. reset the timeout. + L_scom_timeout[l_cent] = 0; + } + }//if(L_gpe_scheduled) + + //The previous GPE job completed. Now get ready for the next job. + L_gpe_scheduled = FALSE; + + //Update current centaur if we didn't fail + if ( l_cent >= l_centControlTask->endCentaur ) + { + l_cent = l_centControlTask->startCentaur; + } + else + { + l_cent++; + } + l_centControlTask->curCentaur = l_cent; + l_cent_ptr = &g_amec->proc[0].memctl[l_cent].centaur; + + //If centaur is not present or neither MBA is configured then skip it. + if(!CENTAUR_PRESENT(l_cent) || + (!MBA_CONFIGURED(l_cent, 0) && !MBA_CONFIGURED(l_cent, 1))) + { + break; + } + + //update min/max settings for both MBA's according to ovs and mode + cent_update_nlimits(l_cent); + + //calculate new N values + centaur_mba_farb3qn_t l_mbafarbq; + uint16_t l_mba01_n_per_mba = + centaurThrottle_convert2Numerator(g_amec->mem_speed_request, l_cent, 0); + uint16_t l_mba23_n_per_mba = + centaurThrottle_convert2Numerator(g_amec->mem_speed_request, l_cent, 1); + uint16_t l_mba01_n_per_chip = G_centaurThrottleLimits[l_cent][0].max_n_per_chip; + uint16_t l_mba23_n_per_chip = G_centaurThrottleLimits[l_cent][1].max_n_per_chip; + amec_cent_mem_speed_t l_mba01_speed; + amec_cent_mem_speed_t l_mba23_speed; + + //combine chip and mba settings (16 bit) in to a single 32bit value + l_mba01_speed.mba_n = l_mba01_n_per_mba; + l_mba01_speed.chip_n = l_mba01_n_per_chip; + l_mba23_speed.mba_n = l_mba23_n_per_mba; + l_mba23_speed.chip_n = l_mba23_n_per_chip; + + + // Check if the throttle value has been updated since the last + // time we sent it. If it has, then send a new value, otherwise + // do nothing. + if ( ( l_mba01_speed.word32 == l_cent_ptr->portpair[0].last_mem_speed_sent.word32 ) && + ( l_mba23_speed.word32 == l_cent_ptr->portpair[1].last_mem_speed_sent.word32 ) + ) + { + break; + } + + //TRAC_INFO("task_centaur_control: New centaur[%d] throttle values mba01[0x%08x], mba23[0x%08x], throt[%d] ", + // l_cent, + // l_mba01_speed.word32, + // l_mba23_speed.word32, + // g_amec->mem_speed_request); + + /// Set up Centuar Scom Registers - array of Scoms + /// [0]: N/M Throttle MBA01 + /// [1]: N/M Throttle MBA23 + /// [2]: MB SYNC + + //only write to MBA01 if configured + if(MBA_CONFIGURED(l_cent, 0)) + { + /// [0]: Set up N/M throttle MBA01 + G_centaurThrottle[NM_THROTTLE_MBA01].commandType = GPE_SCOM_RMW; + G_centaurThrottle[NM_THROTTLE_MBA01].instanceNumber = l_cent; + // Set up value to be written + l_mbafarbq.fields.cfg_nm_n_per_mba = l_mba01_n_per_mba; + l_mbafarbq.fields.cfg_nm_n_per_chip = l_mba01_n_per_chip; + G_centaurThrottle[NM_THROTTLE_MBA01].data = l_mbafarbq.value; + } + else + { + G_centaurThrottle[NM_THROTTLE_MBA01].commandType = GPE_SCOM_NOP; + } + + //only write to MBA23 if configured + if(MBA_CONFIGURED(l_cent, 1)) + { + /// [1]: Set up N/M throttle MBA23 + G_centaurThrottle[NM_THROTTLE_MBA23].commandType = GPE_SCOM_RMW; + G_centaurThrottle[NM_THROTTLE_MBA23].instanceNumber = l_cent; + // Set up value to be written + l_mbafarbq.fields.cfg_nm_n_per_mba = l_mba23_n_per_mba; + l_mbafarbq.fields.cfg_nm_n_per_chip = l_mba23_n_per_chip; + G_centaurThrottle[NM_THROTTLE_MBA23].data = l_mbafarbq.value; + } + else + { + G_centaurThrottle[NM_THROTTLE_MBA23].commandType = GPE_SCOM_NOP; + } + + + /// [2]: Set up the SYNC + /// + /// See Section 2.1.4.1 Centaur Sync Operations in the + /// Pgp Memory Controller Workbook to determine how to fill + /// in the data field: + /// https://farm0125.rtp.stglabs.ibm.com/ServerASIC/Centaur/PgP_MCS_Unit/MC_Workbook?action=AttachFile&do=get&target=p8mc_dd1_wb_061213.pdf + /// 0:7 select mask of MCS units + /// 8:15 select the sync type (12 = N/M throttle) + /// 57:63 must be zeros to address DW0 in cacheline + //G_centaurThrottle[MBS_THROTTLE_SYNC].commandType = GPE_SCOM_NOP; + G_centaurThrottle[MBS_THROTTLE_SYNC].commandType = GPE_SCOM_CENTAUR_SYNC_ALL; + G_centaurThrottle[MBS_THROTTLE_SYNC].data = CENTAUR_RESET_N_M_THROTTLE_COUNTER_SYNC | + CENTAUR_MYSTERY_SYNC; //This is the "PC" sync bit + + /// Set up GPE parameters + l_parms->scomList = (uint32_t) (&G_centaurThrottle); + l_parms->entries = 3; + l_parms->options = 0; + l_parms->rc = 0; + l_parms->errorIndex = 0; + + // Update the last sent throttle value, this will get + // cleared if the GPE does not complete successfully. + l_cent_ptr->portpair[0].last_mem_speed_sent.word32 = l_mba01_speed.word32; + l_cent_ptr->portpair[1].last_mem_speed_sent.word32 = l_mba23_speed.word32; + + + // Pore flex schedule gpe_scom_centaur + // Check pore_flex_schedule return code if error + // then request OCC reset. + rc = pore_flex_schedule( &(l_centControlTask->gpe_req) ); + if( rc ) + { + //Error in schedule gpe get centaur data + TRAC_ERR("task_centaur_control: Failed to schedule gpe rc=%x", rc); + + /* @ + * @errortype + * @moduleid CENT_TASK_CONTROL_MOD + * @reasoncode SSX_GENERIC_FAILURE + * @userdata1 rc - Return code of failing function + * @userdata2 0 + * @userdata4 ERC_CENTAUR_PORE_FLEX_SCHEDULE_FAILURE + * @devdesc OCC Failed to schedule a GPE job for centaur + */ + l_err = createErrl( + CENT_TASK_CONTROL_MOD, // modId + SSX_GENERIC_FAILURE, // reasoncode + ERC_CENTAUR_PORE_FLEX_SCHEDULE_FAILURE, // Extended reason code + ERRL_SEV_UNRECOVERABLE, // Severity + NULL, // Trace Buf + DEFAULT_TRACE_SIZE, // Trace Size + rc, // userdata1 + l_parms->rc // userdata2 + ); + + addUsrDtlsToErrl(l_err, //io_err + (uint8_t *) &(l_centControlTask->gpe_req.ffdc), //i_dataPtr, + sizeof(PoreFfdc), //i_size + ERRL_USR_DTL_STRUCT_VERSION_1, //version + ERRL_USR_DTL_BINARY_DATA); //type + + REQUEST_RESET(l_err); //This will add a firmware callout for us + break; + } + + L_gpe_scheduled = TRUE; + } while(0); + + if(L_scom_timeout[l_cent] == CENTAUR_CONTROL_SCOM_TIMEOUT) + { + TRAC_ERR("task_centaur_control: Timeout scomming centaur[%d]", l_cent); + /* @ + * @errortype + * @moduleid CENT_TASK_CONTROL_MOD + * @reasoncode INTERNAL_FAILURE + * @userdata1 centaur number + * @userdata2 0 + * @userdata4 OCC_NO_EXTENDED_RC + * @devdesc Timed out trying to set the memory throttle settings + * throttle settings. + */ + l_err = createErrl( + CENT_TASK_CONTROL_MOD, // modId + INTERNAL_FAILURE, // reasoncode + OCC_NO_EXTENDED_RC, // Extended reason code + ERRL_SEV_PREDICTIVE, // Severity + NULL, // Trace Buf + DEFAULT_TRACE_SIZE, // Trace Size + l_cent, // userdata1 + 0 // userdata2 + ); + + addUsrDtlsToErrl(l_err, //io_err + (uint8_t *) &(l_centControlTask->gpe_req.ffdc), //i_dataPtr, + sizeof(PoreFfdc), //i_size + ERRL_USR_DTL_STRUCT_VERSION_1, //version + ERRL_USR_DTL_BINARY_DATA); //type + + //callout the centaur + addCalloutToErrl(l_err, + ERRL_CALLOUT_TYPE_HUID, + G_sysConfigData.centaur_huids[l_cent], + ERRL_CALLOUT_PRIORITY_MED); + + //callout the processor + addCalloutToErrl(l_err, + ERRL_CALLOUT_TYPE_HUID, + G_sysConfigData.proc_huid, + ERRL_CALLOUT_PRIORITY_MED); + + REQUEST_RESET(l_err); + } + + return; +} + + +// Function Specification +// +// Name: centaur_control_init +// +// Description: Do one-time setup for centaur control task +// +// +// +// Precondition: We must have determined the present centuars already +// +// Flow: 02/27/13 FN=init_task_centaur_control +// +// End Function Specification +void centaur_control_init( void ) +{ + errlHndl_t l_err = NULL; + int l_rc_gpe = 0; + centaur_mba_farb3qn_t l_mbafarbq; + + do + { + //initialize the active throttle limits + memset(G_centaurThrottleLimits, 0, sizeof(G_centaurThrottleLimits)); + + //Do one-time setup items for the task here. + + //-------------------------------------------------- + // Set up Centuar Regs + // [0]: for MBAFARBQ0 + // [1]: for MBAFARBQ1 + //-------------------------------------------------- + // + G_centaurThrottle[NM_THROTTLE_MBA01].scom = CENTAUR_MBA_FARB3Q0; + G_centaurThrottle[NM_THROTTLE_MBA23].scom = CENTAUR_MBA_FARB3Q1; + l_mbafarbq.value = 0; + l_mbafarbq.fields.cfg_nm_n_per_mba = -1; //all bits set + l_mbafarbq.fields.cfg_nm_n_per_chip = -1; //all bits set + G_centaurThrottle[NM_THROTTLE_MBA01].mask = l_mbafarbq.value; + G_centaurThrottle[NM_THROTTLE_MBA23].mask = l_mbafarbq.value; + + // Set up GPE parameters + G_centaur_control_reg_parms.rc = 0; + G_centaur_control_reg_parms.entries = 0; + G_centaur_control_reg_parms.scomList = (uint32_t) (&G_centaurThrottle[0]); + G_centaur_control_reg_parms.options = 0; + G_centaur_control_reg_parms.errorIndex = 0; + + //-------------------------------------------------- + // Initializes PoreFlex for Centaur Control Task, but + // doesn't actually run anything until RTL + //-------------------------------------------------- + l_rc_gpe = pore_flex_create( + &G_centaur_control_task.gpe_req, // gpe_req for the task + &G_pore_gpe1_queue, // queue + gpe_scom_centaur, // entry point + (uint32_t) &G_centaur_control_reg_parms, // parm for the task + SSX_WAIT_FOREVER, // gm023 + NULL, // callback + NULL, // callback argument + 0 ); // options + if(l_rc_gpe) + { + break; + } + + } + while(0); + + + if( l_rc_gpe ) + { + //If fail to create pore flex object then there is a problem. + TRAC_ERR("centaur_control_init: Failed to initialize centaur control task [l_rc_gpe=0x%x]", l_rc_gpe); + + /* @ + * @errortype + * @moduleid CENTAUR_INIT_MOD + * @reasoncode SSX_GENERIC_FAILURE + * @userdata1 l_rc_gpe - Return code of failing function + * @userdata2 0 + * @userdata4 ERC_CENTAUR_PORE_FLEX_CREATE_FAILURE + * @devdesc Failed to initialize GPE routine + */ + l_err = createErrl( + CENTAUR_INIT_MOD, //modId + SSX_GENERIC_FAILURE, //reasoncode + ERC_CENTAUR_PORE_FLEX_CREATE_FAILURE, //Extended reason code + ERRL_SEV_PREDICTIVE, //Severity + NULL, //Trace Buf + DEFAULT_TRACE_SIZE, //Trace Size + l_rc_gpe, //userdata1 + 0 //userdata2 + ); + + addUsrDtlsToErrl(l_err, //io_err + (uint8_t *) &G_centaur_control_pore_req.ffdc, //i_dataPtr, + sizeof(PoreFfdc), //i_size + ERRL_USR_DTL_STRUCT_VERSION_1, //version + ERRL_USR_DTL_BINARY_DATA); //type + + REQUEST_RESET(l_err); //@gm006 + } + + return; +} + + + + diff --git a/src/occ/cent/centaur_control.h b/src/occ/cent/centaur_control.h new file mode 100755 index 0000000..3fae595 --- /dev/null +++ b/src/occ/cent/centaur_control.h @@ -0,0 +1,100 @@ +/****************************************************************************** +// @file centaur_control.h +// @brief Centaur external control functions & data. +*/ +/****************************************************************************** + * + * @page ChangeLogs Change Logs + * @section _centaur_control_h centaur_control.h + * @verbatim + * + * Flag Def/Fea Userid Date Description + * ------- ---------- -------- ---------- ---------------------------------- + * @th031 878471 thallet 04/15/2013 Centaur Throttles + * @gm004 892961 milesg 07/25/2013 Removed centaur_control_init from init section + * @gm016 909061 milesg 12/10/2013 Support memory throttling due to temperature + * + * @endverbatim + * + *///*************************************************************************/ + +#ifndef _CENTAUR_CONTROL_H +#define _CENTAUR_CONTROL_H + +//************************************************************************* +// Includes +//************************************************************************* +#include <occ_common.h> +#include <ssx.h> +#include "rtls.h" +#include "gpe_data.h" +#include "occ_sys_config.h" + +//************************************************************************* +// Externs +//************************************************************************* + +//************************************************************************* +// Defines/Enums +//************************************************************************* + +//************************************************************************* +// Macros +//************************************************************************* + +//************************************************************************* +// Structures +//************************************************************************* + +typedef enum +{ + CENTAUR_CONTROL_GPE_STILL_RUNNING = 0x01, + CENTAUR_CONTROL_RESERVED_1 = 0x02, + CENTAUR_CONTROL_RESERVED_2 = 0x04, + CENTAUR_CONTROL_RESERVED_3 = 0x08, + CENTAUR_CONTROL_RESERVED_4 = 0x10, + CENTAUR_CONTROL_RESERVED_5 = 0x20, + CENTAUR_CONTROL_RESERVED_6 = 0x40, + CENTAUR_CONTROL_RESERVED_7 = 0x80, +} eCentaurControlTraceFlags; + +//Centaur data collect structures used for task data pointers +struct centaur_control_task { + uint8_t startCentaur; + uint8_t prevCentaur; + uint8_t curCentaur; + uint8_t endCentaur; + uint8_t traceThresholdFlags; + PoreFlex gpe_req; +} __attribute__ ((__packed__)); +typedef struct centaur_control_task centaur_control_task_t; + +//per mba throttle values -- gm016 +typedef struct +{ + uint16_t max_n_per_mba; //mode and OVS dependent, from config data + uint16_t max_n_per_chip; //mode and OVS dependent, from config data + uint16_t min_n_per_mba; //from config data + uint16_t m; +} centaur_throttle_t; + +//************************************************************************* +// Globals +//************************************************************************* + +//Global centaur structures used for task data pointers +extern centaur_control_task_t G_centaur_control_task; + +//************************************************************************* +// Function Prototypes +//************************************************************************* + +//Collect centaur data for all centaur in specified range +void task_centaur_control( task_t * i_task ); + +//Initialize structures for collecting centaur data. +//void centaur_control_init( void ) INIT_SECTION; +void centaur_control_init( void ); //gm004 + +#endif //_CENTAUR_CONTROL_H + diff --git a/src/occ/cent/centaur_data.c b/src/occ/cent/centaur_data.c new file mode 100755 index 0000000..64181ad --- /dev/null +++ b/src/occ/cent/centaur_data.c @@ -0,0 +1,1194 @@ +/****************************************************************************** +// @file centaur_data.c +// @brief Collection & Initialization of Centaur Chips/Procedures/Data. +*/ +/****************************************************************************** + * + * @page ChangeLogs Change Logs + * @section _centaur_data_c centaur_data.c + * @verbatim + * + * Flag Def/Fea Userid Date Description + * ------- ---------- -------- ---------- ---------------------------------- + * @th00c thallet 03/14/2012 Created + * @nh001 neilhsu 05/23/2012 Add missing error log tags + * @th012 thallet 07/17/2012 Changed centaur procedure to get + * MCS BaseAddress automatically + * @th013 thallet 07/17/2012 SW150201: OCC VPO: inbound scom + * @th016 thallet 08/13/2012 Change tor only 4 Centaurs possible on Murano + * @th018 852950 thallet 09/12/2012 Added Centaur thermal readings + * @th031 878471 thallet 04/15/2013 Centaur Throttles + * @th045 893135 thallet 07/26/2013 Updated for new Centaur Procedures + * @gm006 SW224414 milesg 09/16/2013 Reset and FFDC improvements + * @rt001 901927 tapiar 10/03/2013 Fix src tags + * @gm012 905097 milesg 10/31/2013 Fix Centaur enablement + * @gm013 907548 milesg 11/22/2013 Memory therm monitoring support + * @gm015 907601 milesg 12/06/2013 L4 Bank Delete circumvention and centaur i2c recovery + * @gm016 909061 milesg 12/10/2013 Support memory throttling due to temperature + * @gm019 910509 milesg 01/10/2014 Memory corruption problem + * @gm022 908890 milesg 01/23/2014 Enable centaur deadman timer + * @gm023 913865 milesg 01/31/2014 Centaur GPE times out + * @gm039 922963 milesg 05/28/2014 Handle centaur nest LFIR 6 + * @gm041 928150 milesg 06/02/2014 log error in mfg if centaur lfir[6] bit is set + * @gm042 917016 milesg 06/04/2014 add delay between centaur i2c recovery operations + * @gs038 935559 gjsilva 08/26/2014 Support new RCs from gpe_get_mem_data procedure + * + * @endverbatim + * + *///*************************************************************************/ + +//************************************************************************* +// Includes +//************************************************************************* +#include "centaur_data.h" +#include "centaur_control.h" +#include "pgp_async.h" +#include "threadSch.h" +#include "pmc_register_addresses.h" +#include "centaur_data_service_codes.h" +#include "occ_service_codes.h" +#include "errl.h" +#include "trac.h" +#include "rtls.h" +#include "apss.h" +#include "state.h" +#include "gpe_scom.h" +#include "centaur_firmware_registers.h" +#include "centaur_register_addresses.h" + +//************************************************************************* +// Externs +//************************************************************************* + +//************************************************************************* +// Macros +//************************************************************************* + +//************************************************************************* +// Defines/Enums +//************************************************************************* + +// Enumerated list of possible centaur operations -- gm039 +typedef enum +{ + L4_LINE_DELETE, + READ_NEST_LFIR6, + READ_THERM_STATUS, + RESET_DTS_FSM, + DISABLE_SC, + CLEAR_NEST_LFIR6, + ENABLE_SC, + NUM_CENT_OPS +}cent_ops_enum; + + +#define MBCCFGQ_REG ((uint32_t)0x0201140ful) +#define LINE_DELETE_ON_NEXT_CE ((uint64_t)0x0080000000000000ull) +#define SCAC_CONFIG_REG ((uint32_t)0x020115ceul) +#define SCAC_MASTER_ENABLE ((uint64_t)0x8000000000000000ull) +#define CENT_NEST_LFIR_REG ((uint32_t)0x0204000aul) +#define CENT_NEST_LFIR_AND_REG ((uint32_t)0x0204000bul) +#define CENT_NEST_LFIR6 ((uint64_t)0x0200000000000000ull) +#define CENT_THRM_CTRL_REG ((uint32_t)0x02050012ul) +#define CENT_THRM_CTRL4 ((uint64_t)0x0800000000000000ull) +#define CENT_THRM_STATUS_REG ((uint32_t)0x02050013ul) +#define CENT_THRM_PARITY_ERROR26 ((uint64_t)0x0000002000000000ull) +#define CENT_MAX_DEADMAN_TIMER 0xf +#define CENT_DEADMAN_TIMER_2SEC 0x8 +//************************************************************************* +// Structures +//************************************************************************* + +//************************************************************************* +// Globals +//************************************************************************* +//Global array of centaur data buffers +GPE_BUFFER(MemData G_centaur_data[NUM_CENTAUR_DATA_BUFF + + NUM_CENTAUR_DOUBLE_BUF + + NUM_CENTAUR_DATA_EMPTY_BUF]); + +//pore request for scoming centaur registers +PoreFlex G_cent_scom_req; + +//input/output parameters for gpe_scom_centaur() +GPE_BUFFER(GpeScomParms G_cent_scom_gpe_parms); + +//scom command list entry +GPE_BUFFER(scomList_t G_cent_scom_list_entry[NUM_CENT_OPS]); //gm039 + +//buffer for storing output from running gpe_scom_centaur() +GPE_BUFFER(uint64_t G_cent_scom_data[MAX_NUM_CENTAURS]) = {0}; + +cent_sensor_flags_t G_cent_enabled_sensors = {0}; + +//Global array of centaur data pointers +MemData * G_centaur_data_ptrs[MAX_NUM_CENTAURS] = { &G_centaur_data[0], + &G_centaur_data[1], &G_centaur_data[2], &G_centaur_data[3], + &G_centaur_data[4], &G_centaur_data[5], &G_centaur_data[6], + &G_centaur_data[7]}; + +//Global structures for gpe get mem data parms +GPE_BUFFER(GpeGetMemDataParms G_centaur_data_parms); + +//Pore flex request for the GPE job that is used for centaur init. +PoreFlex G_centaur_reg_pore_req; + +//Centaur structures used for task data pointers. +centaur_data_task_t G_centaur_data_task = { + .start_centaur = 0, + .current_centaur = 0, + .end_centaur = 7, + .prev_centaur = 7, + .centaur_data_ptr = &G_centaur_data[8] +}; + +//AMEC needs to know when data for a centaur has been collected. +uint32_t G_updated_centaur_mask = 0; + +//Global G_present_centaurs is bitmask of all centaurs +//(1 = present, 0 = not present. Core 0 has the most significant bit) +uint32_t G_present_centaurs = 0; + +// Latch for a Trace Entry +uint8_t G_centaur_queue_not_idle_traced = 0; + +// bitmap of centaurs requiring i2c recovery -- gm015 +uint8_t G_centaur_needs_recovery = 0; + +// bitmap of centaurs that have NEST LFIR6 asserted... +// This tells amec code to treat the centaur temperature as invalid +uint8_t G_centaur_nest_lfir6 = 0; //gm039 + +//************************************************************************* +// Function Prototypes +//************************************************************************* + +//************************************************************************* +// Functions +//************************************************************************* + +// Function Specification +// +// Name: cent_recovery +// +// Description: i2c recovery procedure and other hw workarounds +// +// Flow: ??? FN= ??? +// +// End Function Specification + +//number of times in a row we must go without needing i2c recovery +//before we declare success and allow tracing again +#define I2C_REC_TRC_THROT_COUNT 8 + +//threshold of times LFIR6 is asserted (up/down counter) before tracing +#define NEST_LFIR6_MAX_COUNT 4 + +//number of SC polls to wait between i2c recovery attempts (see SW248695) -- gm042 +#define CENT_SC_MAX_INTERVAL 32 + +void cent_recovery(uint32_t i_cent) //gm015 +{ + int l_rc = 0; + errlHndl_t l_err = NULL; + uint32_t l_prev_cent = G_cent_scom_list_entry[L4_LINE_DELETE].instanceNumber; //gm039 + uint8_t l_cent_mask = CENTAUR0_PRESENT_MASK >> l_prev_cent; //gm039 + static bool L_not_idle_traced = FALSE; + static uint8_t L_cent_callouts = 0; + static bool L_gpe_scheduled = FALSE; + static uint8_t L_i2c_rec_trc_throt = 0; + static bool L_gpe_had_1_tick = FALSE; + static uint8_t L_nest_lfir6_count[MAX_NUM_CENTAURS] = {0}; + static uint8_t L_nest_lfir6_traced = 0; + static uint8_t L_nest_lfir6_logged = 0; //gm041 + static uint8_t L_i2c_recovery_delay[MAX_NUM_CENTAURS] = {0}; //gm042 + + do + { + //First, check to see if the previous GPE request is still running. + //A request is considered idle if it is not attached to any of the + //asynchronous request queues. + if( !(async_request_is_idle(&G_cent_scom_req.request)) ) + { + //This can happen due to variability in when the task runs from + //one tick to another. Only trace if it has had a full tick. + if(!L_not_idle_traced && L_gpe_had_1_tick) + { + TRAC_INFO("cent_recovery: Centaur recovery GPE is still running. cent[%d], entries[%d], state[0x%08x]", + l_prev_cent, //gm039 + G_cent_scom_gpe_parms.entries, + G_cent_scom_req.request.state); + L_not_idle_traced = TRUE; + } + L_gpe_had_1_tick = TRUE; + break; + } + else + { + //Request is idle + L_gpe_had_1_tick = FALSE; + //allow the trace again if it's idle + if(L_not_idle_traced) + { + TRAC_INFO("cent_recovery: GPE completed. cent[%d],", + l_prev_cent); //gm039 + L_not_idle_traced = FALSE; + } + } + + //Request completed + + //Check for failure and log an error if we haven't already logged one for this centaur + //but keep retrying. + if(L_gpe_scheduled && + (!async_request_completed(&G_cent_scom_req.request) || G_cent_scom_gpe_parms.rc) && + (!(L_cent_callouts & l_cent_mask))) //gm039 + { + //Mark the centaur as being called out + L_cent_callouts |= l_cent_mask; //gm039 + + // There was an error doing the recovery scoms + TRAC_ERR("cent_recovery: gpe_scom_centaur failed. rc[0x%08x] cent[%d] entries[%d] errorIndex[0x%08X]", + G_cent_scom_gpe_parms.rc, + l_prev_cent, //gm039 + G_cent_scom_gpe_parms.entries, + G_cent_scom_gpe_parms.errorIndex); + + /* @ + * @errortype + * @moduleid CENT_RECOVERY_MOD + * @reasoncode CENT_SCOM_ERROR + * @userdata1 rc - Return code of failing scom + * @userdata2 index of failing scom + * @userdata4 0 + * @devdesc OCC to Centaur communication failure + */ + l_err = createErrl( + CENT_RECOVERY_MOD, //modId + CENT_SCOM_ERROR, //reasoncode + OCC_NO_EXTENDED_RC, //Extended reason code + ERRL_SEV_PREDICTIVE, //Severity + NULL, //Trace Buf + DEFAULT_TRACE_SIZE, //Trace Size + G_cent_scom_gpe_parms.rc, //userdata1 + G_cent_scom_gpe_parms.errorIndex //userdata2 + ); + + //dump ffdc contents collected by ssx + addUsrDtlsToErrl(l_err, //io_err + (uint8_t *) &(G_cent_scom_req.ffdc), //i_dataPtr, + sizeof(PoreFfdc), //i_size + ERRL_USR_DTL_STRUCT_VERSION_1, //version + ERRL_USR_DTL_BINARY_DATA); //type + + //callout the centaur + addCalloutToErrl(l_err, + ERRL_CALLOUT_TYPE_HUID, + G_sysConfigData.centaur_huids[l_prev_cent], //gm039 + ERRL_CALLOUT_PRIORITY_MED); + + //callout the processor + addCalloutToErrl(l_err, + ERRL_CALLOUT_TYPE_HUID, + G_sysConfigData.proc_huid, + ERRL_CALLOUT_PRIORITY_MED); + commitErrl(&l_err); + } + +#if 0 //set this to 1 for testing hard failures + if(CURRENT_TICK > 120000) + { + G_cent_scom_list_entry[READ_NEST_LFIR6].data = CENT_NEST_LFIR6; + } +#endif + + // check the centaur nest lfir register for parity errors from thermal (bit 6) + // NOTE: recovery will occur 8 ticks from now so that all entries target the + // same centaur in a given tick (simplifies callouts) -- gm039 + if(G_cent_scom_list_entry[READ_NEST_LFIR6].data & CENT_NEST_LFIR6) + { + //Increment the per-centaur LFIR[6] threshold counter + if(L_nest_lfir6_count[l_prev_cent] < NEST_LFIR6_MAX_COUNT) + { + L_nest_lfir6_count[l_prev_cent]++; + + //log an error the first time we see this. Error will be predictive + //if mfg mode ipl and informational otherwise -- gm041 + if(!(L_nest_lfir6_logged & l_cent_mask)) + { + //only log error once + L_nest_lfir6_logged |= l_cent_mask; + + TRAC_ERR("cent_recovery: NEST LFIR[6] was asserted on cent[%d] lfir[%08x%08x], thrm_stat[%08x%08x]", + l_prev_cent, + (uint32_t)(G_cent_scom_list_entry[READ_NEST_LFIR6].data >> 32), + (uint32_t)(G_cent_scom_list_entry[READ_NEST_LFIR6].data), + (uint32_t)(G_cent_scom_list_entry[READ_THERM_STATUS].data >> 32), + (uint32_t)(G_cent_scom_list_entry[READ_THERM_STATUS].data)); + + //only log the error if the thermal parity error is being masked (per Mike Pardeik) + if(G_cent_scom_list_entry[READ_THERM_STATUS].data & CENT_THRM_PARITY_ERROR26) + { + /* @ + * @errortype + * @moduleid CENT_RECOVERY_MOD + * @reasoncode CENT_LFIR_ERROR + * @userdata1 0 + * @userdata2 0 + * @userdata4 OCC_NO_EXTENDED_RC + * @devdesc Centaur has an unexpected FIR bit set + */ + l_err = createErrl( + CENT_RECOVERY_MOD, //modId + CENT_LFIR_ERROR, //reasoncode + OCC_NO_EXTENDED_RC, //Extended reason code + ERRL_SEV_INFORMATIONAL, //Severity + NULL, //Trace Buf + DEFAULT_TRACE_SIZE, //Trace Size + 0, //userdata1 + 0); //userdata2 + + //force severity to predictive if mfg ipl (allows callout to be added to info error) + setErrlActions(l_err, ERRL_ACTIONS_MANUFACTURING_ERROR); + + //add centaur callout + addCalloutToErrl(l_err, + ERRL_CALLOUT_TYPE_HUID, + G_sysConfigData.centaur_huids[l_prev_cent], + ERRL_CALLOUT_PRIORITY_HIGH); + + commitErrl(&l_err); + } + } + + //Trace if it looks like a hard failure (error will be logged if we time out later) + if((L_nest_lfir6_count[l_prev_cent] == NEST_LFIR6_MAX_COUNT) && + !(L_nest_lfir6_traced & l_cent_mask)) + { + //one-time trace of hitting the threshold + TRAC_IMP("cent_recovery: NEST LFIR[6] count reached thresh[%d]. cent[%d] scom[%08x%08x]", + NEST_LFIR6_MAX_COUNT, + l_prev_cent, + (uint32_t)(G_cent_scom_list_entry[READ_NEST_LFIR6].data >> 32), + (uint32_t)(G_cent_scom_list_entry[READ_NEST_LFIR6].data)); + L_nest_lfir6_traced |= l_cent_mask; + } + } + + G_centaur_nest_lfir6 |= l_cent_mask; + } + else + { + //decrement the per-centaur LFIR[6] threshold counter + if(L_nest_lfir6_count[l_prev_cent] > 0) + { + L_nest_lfir6_count[l_prev_cent]--; + } + + G_centaur_nest_lfir6 &= ~l_cent_mask; + } + + //Now we can start working on the next centaur (i_cent) -- gm039 + l_cent_mask = CENTAUR0_PRESENT_MASK >> i_cent; + + //reset for next pass + L_gpe_scheduled = FALSE; + + //check if this centaur requires lfir6 recovery -- gm039 + if(G_centaur_nest_lfir6 & l_cent_mask) + { + //Set the command type from GPE_SCOM_NOP to GPE_SCOM_RMW + //these entries will reset the centaur DTS FSM and clear LFIR 6 + //if recovery worked, LFIR 6 should remain cleared. + G_cent_scom_list_entry[RESET_DTS_FSM].commandType = GPE_SCOM_WRITE; + G_cent_scom_list_entry[CLEAR_NEST_LFIR6].commandType = GPE_SCOM_WRITE; + } + else + { + //these ops aren't needed so disable them + G_cent_scom_list_entry[RESET_DTS_FSM].commandType = GPE_SCOM_NOP; + G_cent_scom_list_entry[CLEAR_NEST_LFIR6].commandType = GPE_SCOM_NOP; + } + + //Decrement the delay counter for centaur i2c recovery -- gm042 + if(L_i2c_recovery_delay[i_cent]) + { + L_i2c_recovery_delay[i_cent]--; + } + //check if this centaur requires i2c recovery (dimm sensor has error status bit set) + if(G_centaur_needs_recovery & l_cent_mask) //gm039 + { + //If the delay time is up, do the i2c recovery -- gm042 + if(!L_i2c_recovery_delay[i_cent]) + { + if(!L_i2c_rec_trc_throt) + { + TRAC_INFO("cent_recovery: Performing centaur i2c recovery procedure. required bitmap = 0x%02X", G_centaur_needs_recovery); + } + + //restart the recovery delay -- gm042 + L_i2c_recovery_delay[i_cent] = CENT_SC_MAX_INTERVAL; + + //don't allow tracing for at least I2C_REC_TRC_THROT_COUNT calls to this function + //where we didn't require i2c recovery + L_i2c_rec_trc_throt = I2C_REC_TRC_THROT_COUNT; + + //clear the request for i2c recovery here + G_centaur_needs_recovery &= ~l_cent_mask; //gm039 + + //Set the command type from GPE_SCOM_NOP to GPE_SCOM_RMW + //these entries will disable and re-enable the centuar sensor cache + //which will also cause the i2c master to be reset -- gm039 + G_cent_scom_list_entry[DISABLE_SC].commandType = GPE_SCOM_RMW; + G_cent_scom_list_entry[ENABLE_SC].commandType = GPE_SCOM_RMW; + } + } + else + { + //Centaur didn't require i2c recovery so decrement the throttle count if + //not already 0. + if(L_i2c_rec_trc_throt) + { + L_i2c_rec_trc_throt--; + + //Trace on transition from 1 to 0 only + if(!L_i2c_rec_trc_throt) + { + TRAC_INFO("cent_recovery: Centaur i2c recovered on all present centaurs"); + } + } + + //these ops aren't needed so disable them -- gm039 + G_cent_scom_list_entry[DISABLE_SC].commandType = GPE_SCOM_NOP; + G_cent_scom_list_entry[ENABLE_SC].commandType = GPE_SCOM_NOP; + } + + //Set the target centaur for all ops + G_cent_scom_list_entry[L4_LINE_DELETE].instanceNumber = i_cent; + G_cent_scom_list_entry[READ_NEST_LFIR6].instanceNumber = i_cent; + G_cent_scom_list_entry[READ_THERM_STATUS].instanceNumber = i_cent; + G_cent_scom_list_entry[RESET_DTS_FSM].instanceNumber = i_cent; + G_cent_scom_list_entry[CLEAR_NEST_LFIR6].instanceNumber = i_cent; + G_cent_scom_list_entry[DISABLE_SC].instanceNumber = i_cent; + G_cent_scom_list_entry[ENABLE_SC].instanceNumber = i_cent; + + // Set up GPE parameters + G_cent_scom_gpe_parms.rc = 0; + G_cent_scom_gpe_parms.entries = NUM_CENT_OPS; //gm039 + G_cent_scom_gpe_parms.scomList = (uint32_t) (&G_cent_scom_list_entry[0]); + G_cent_scom_gpe_parms.options = 0; + G_cent_scom_gpe_parms.errorIndex = 0; + + + // Submit Pore GPE without blocking + l_rc = pore_flex_schedule(&G_cent_scom_req); + if(l_rc) + { + TRAC_ERR("cent_recovery: pore_flex_schedule failed. rc = 0x%08x", l_rc); + /* @ + * @errortype + * @moduleid CENT_RECOVERY_MOD + * @reasoncode SSX_GENERIC_FAILURE + * @userdata1 rc - Return code of failing function + * @userdata2 0 + * @userdata4 0 + * @devdesc Internal failure (code bug) + */ + l_err = createErrl( + CENT_RECOVERY_MOD, //modId + SSX_GENERIC_FAILURE, //reasoncode + OCC_NO_EXTENDED_RC, //Extended reason code + ERRL_SEV_PREDICTIVE, //Severity + NULL, //Trace Buf + DEFAULT_TRACE_SIZE, //Trace Size + l_rc, //userdata1 + 0 //userdata2 + ); + + REQUEST_RESET(l_err); + break; + } + L_gpe_scheduled = TRUE; + + }while(0); +} + +// Function Specification +// +// Name: task_centaur_data +// +// Description: Collect centaur data. The task is used for centaur data +// collection +// +// Flow: 03/17/12 FN=task_centaur_data +// +// End Function Specification +void task_centaur_data( task_t * i_task ) +{ + errlHndl_t l_err = NULL; // Error handler + int rc = 0; // Return code + MemData * l_temp = NULL; + centaur_data_task_t * l_centaur_data_ptr = (centaur_data_task_t *)i_task->data_ptr; + GpeGetMemDataParms * l_parms = (GpeGetMemDataParms *)(l_centaur_data_ptr->gpe_req.parameter); + static bool L_gpe_scheduled = FALSE; + static bool L_gpe_error_logged = FALSE; + static bool L_gpe_had_1_tick = FALSE; + + do + { + // ------------------------------------------ + // Centaur Data Task Variable Initial State + // ------------------------------------------ + // ->current_centaur: the one that was just 'written' to last tick to + // kick off the sensor cache population in the + // centaur. It will be 'read from' during this tick. + // + // ->prev_centaur: the one that was 'read from' during the last tick + // and will be used to update the + // G_updated_centaur_mask during this tick. + // + // ->centaur_data_ptr: points to G_centaur_data_ptrs[] for + // the centaur that is referenced by prev_centaur + // (the one that was just 'read') + + //First, check to see if the previous GPE request still running + //A request is considered idle if it is not attached to any of the + //asynchronous request queues + if( !(async_request_is_idle(&l_centaur_data_ptr->gpe_req.request)) ) + { + //This may happen due to variability in the time that this + //task runs. Don't trace on the first occurrence. + if( !G_centaur_queue_not_idle_traced && L_gpe_had_1_tick) + { + TRAC_INFO("task_centaur_data: GPE is still running"); + G_centaur_queue_not_idle_traced = TRUE; + } + L_gpe_had_1_tick = TRUE; + break; + } + else + { + //Request is idle + L_gpe_had_1_tick = FALSE; + if( G_centaur_queue_not_idle_traced) + { + TRAC_INFO("task_centaur_data: GPE completed"); + G_centaur_queue_not_idle_traced = FALSE; + } + } + + //Need to complete collecting data for all assigned centaurs from + //previous interval and tick 0 is the current tick before collect data again. + if( (l_centaur_data_ptr->current_centaur == l_centaur_data_ptr->end_centaur) + && ((CURRENT_TICK & (MAX_NUM_TICKS - 1)) != 0) ) + { + CENT_DBG("Did not collect centaur data. Need to wait for tick."); + break; + } + + //Check to see if the previous GPE request has succeeded. + //A request is not considered complete until both the engine job + //has finshed without error and any callback has run to completion. + if(L_gpe_scheduled) + { + //Per Bishop, If the request is idle but not completed then there was an error + //(as long as the request was scheduled). + if(!async_request_completed(&l_centaur_data_ptr->gpe_req.request) || l_parms->rc ) + { + //log an error the first time this happens but keep on truckin. + //eventually, we will timeout on the dimm & centaur temps not being updated + //and fans will go to max speed (probably won't be able to throttle for + //same reason we can't access the centaur here). + if(!L_gpe_error_logged) + { + L_gpe_error_logged = TRUE; + + // There was an error collecting the centaur sensor cache + TRAC_ERR("task_centaur_data: gpe_get_mem_data failed. rc=0x%08x%08x, cur=%d, prev=%d", + (uint32_t)(l_parms->rc >> 32), + (uint32_t)(l_parms->rc), + l_centaur_data_ptr->current_centaur, + l_centaur_data_ptr->prev_centaur); + /* @ + * @errortype + * @moduleid CENT_TASK_DATA_MOD + * @reasoncode CENT_SCOM_ERROR + * @userdata1 l_parms->rc + * @userdata2 0 + * @userdata4 OCC_NO_EXTENDED_RC + * @devdesc Failed to get centaur data + */ + l_err = createErrl( + CENT_TASK_DATA_MOD, //modId + CENT_SCOM_ERROR, //reasoncode + OCC_NO_EXTENDED_RC, //Extended reason code + ERRL_SEV_PREDICTIVE, //Severity + NULL, //Trace Buf + DEFAULT_TRACE_SIZE, //Trace Size + l_parms->rc, //userdata1 + 0 //userdata2 + ); + + addUsrDtlsToErrl(l_err, //io_err + (uint8_t *) &(l_centaur_data_ptr->gpe_req.ffdc), //i_dataPtr, + sizeof(PoreFfdc), //i_size + ERRL_USR_DTL_STRUCT_VERSION_1, //version + ERRL_USR_DTL_BINARY_DATA); //type + + //Callouts depend on the return code of the gpe_get_mem_data procedure + if(l_parms->rc == GPE_GET_MEM_DATA_DIED) + { + //callout the processor + addCalloutToErrl(l_err, + ERRL_CALLOUT_TYPE_HUID, + G_sysConfigData.proc_huid, + ERRL_CALLOUT_PRIORITY_LOW); + } + else if(l_parms->rc == GPE_GET_MEM_DATA_SENSOR_CACHE_FAILED) + { + //callout the previous centaur if present + if(CENTAUR_PRESENT(l_centaur_data_ptr->prev_centaur)) + { + addCalloutToErrl(l_err, + ERRL_CALLOUT_TYPE_HUID, + G_sysConfigData.centaur_huids[l_centaur_data_ptr->prev_centaur], + ERRL_CALLOUT_PRIORITY_HIGH); + } + + //callout the processor + addCalloutToErrl(l_err, + ERRL_CALLOUT_TYPE_HUID, + G_sysConfigData.proc_huid, + ERRL_CALLOUT_PRIORITY_LOW); + } + else if(l_parms->rc == GPE_GET_MEM_DATA_UPDATE_FAILED) + { + //callout the current centaur if present + if(CENTAUR_PRESENT(l_centaur_data_ptr->current_centaur)) + { + addCalloutToErrl(l_err, + ERRL_CALLOUT_TYPE_HUID, + G_sysConfigData.centaur_huids[l_centaur_data_ptr->current_centaur], + ERRL_CALLOUT_PRIORITY_HIGH); + } + + //callout the processor + addCalloutToErrl(l_err, + ERRL_CALLOUT_TYPE_HUID, + G_sysConfigData.proc_huid, + ERRL_CALLOUT_PRIORITY_LOW); + } + else + { + //callout the firmware + addCalloutToErrl(l_err, + ERRL_CALLOUT_TYPE_COMPONENT_ID, + ERRL_COMPONENT_ID_FIRMWARE, + ERRL_CALLOUT_PRIORITY_MED); + } + + commitErrl(&l_err); + } + + } + else + { + //If the previous GPE request succeeded then swap l_centaur_data_ptr + //with the global one. The gpe routine will write new data into + //a buffer that is not being accessed by the RTLoop code. + l_temp = l_centaur_data_ptr->centaur_data_ptr; + l_centaur_data_ptr->centaur_data_ptr = + G_centaur_data_ptrs[l_centaur_data_ptr->current_centaur]; + G_centaur_data_ptrs[l_centaur_data_ptr->prev_centaur] = l_temp; // @th013 + + //Centaur data has been collected so set the bit in global mask. + //AMEC code will know which centaur to update sensors for. AMEC is + //reponsible for clearing the bit later on. + // prev centaur is the one that was just 'read from' in the last tick + if( CENTAUR_PRESENT(l_centaur_data_ptr->prev_centaur) ) // @th013 + { + G_updated_centaur_mask |= CENTAUR_BY_MASK(l_centaur_data_ptr->prev_centaur); + } + } + }//if(L_gpe_scheduled) + + // If the centaur is not present, then we need to point to the empty G_centaur_data + // so that we don't use old/stale data from a leftover G_centaur_data + // (this is very handy for debug...) + if( !CENTAUR_PRESENT(l_centaur_data_ptr->current_centaur)) + { + G_centaur_data_ptrs[l_centaur_data_ptr->current_centaur] = &G_centaur_data[9]; + } + + //Update current centaur + if ( l_centaur_data_ptr->current_centaur >= l_centaur_data_ptr->end_centaur ) + { + l_centaur_data_ptr->prev_centaur = l_centaur_data_ptr->current_centaur; // @th013 + l_centaur_data_ptr->current_centaur = l_centaur_data_ptr->start_centaur; + } + else + { + l_centaur_data_ptr->prev_centaur = l_centaur_data_ptr->current_centaur; // @th013 + l_centaur_data_ptr->current_centaur++; + } + + // ------------------------------------------ + // Centaur Data Task Variable State Changed + // ------------------------------------------ + // ->current_centaur: the one that will be 'written' to in order to + // kick off the sensor cache population in the + // centaur. + // + // ->prev_centaur: the one that will be 'read from', meaning have + // the sensor cache transferred from the Centaur + // to l_centaur_data_ptr->centaur_data_ptr + // + // ->centaur_data_ptr: points to G_centaur_data_ptrs[] for + // the centaur that is referenced by prev_centaur + // (the one that will be 'read') + + //If centaur is not present then skip it. This task assigned to this centaur will + //be idle during this time it would have collected the data. + if( CENTAUR_PRESENT(l_centaur_data_ptr->current_centaur) + || CENTAUR_PRESENT(l_centaur_data_ptr->prev_centaur) ) // @th013 + { + // Setup the 'get centaur data' parms + // ->config controls which Centaur we are reading from + if( CENTAUR_PRESENT(l_centaur_data_ptr->prev_centaur) ){ // @th013 + // If prev centaur is present, do the read of the sensor cache + l_parms->collect = l_centaur_data_ptr->prev_centaur; + } + else{ + // If prev centaur is not present, don't do the read of the sensor cache. + l_parms->collect = -1; + } + + // ->config_update controls which Centaur we are writing to + if( CENTAUR_PRESENT(l_centaur_data_ptr->current_centaur) ){ // @th013 + // If cur centaur is present, do the write to kick off the sensor cache collect + l_parms->update = l_centaur_data_ptr->current_centaur; + } + else{ + // If cur centaur is not present, don't do the write to kick off the sensor cache collect + l_parms->update = -1; + } + + l_parms->data = (uint32_t) l_centaur_data_ptr->centaur_data_ptr; + l_parms->rc = 0; + + // Pore flex schedule gpe_get_mem_data + // Check pore_flex_schedule return code if error + // then request OCC reset. + rc = pore_flex_schedule( &(l_centaur_data_ptr->gpe_req) ); + if(rc) + { + TRAC_ERR("task_centaur_data: pore_flex_schedule failed for centaur data collection. rc=%d", rc); + /* @ + * @errortype + * @moduleid CENT_TASK_DATA_MOD + * @reasoncode SSX_GENERIC_FAILURE + * @userdata1 rc - Return code of failing function + * @userdata2 0 + * @userdata4 ERC_CENTAUR_PORE_FLEX_SCHEDULE_FAILURE + * @devdesc Failed to get centaur data + */ + l_err = createErrl( + CENT_TASK_DATA_MOD, //modId + SSX_GENERIC_FAILURE, //reasoncode + ERC_CENTAUR_PORE_FLEX_SCHEDULE_FAILURE, //Extended reason code + ERRL_SEV_PREDICTIVE, //Severity + NULL, //Trace Buf + DEFAULT_TRACE_SIZE, //Trace Size + rc, //userdata1 + l_parms->rc //userdata2 + ); + + addUsrDtlsToErrl(l_err, //io_err + (uint8_t *) &(l_centaur_data_ptr->gpe_req.ffdc), //i_dataPtr, + sizeof(PoreFfdc), //i_size + ERRL_USR_DTL_STRUCT_VERSION_1, //version + ERRL_USR_DTL_BINARY_DATA); //type + + REQUEST_RESET(l_err); //this will add firmware callout + break; + } + + L_gpe_scheduled = TRUE; + } + + } while(0); + + //handle centaur i2c recovery requests and centaur workaround -- gm015 + if(CENTAUR_PRESENT(l_centaur_data_ptr->current_centaur)) + { + cent_recovery(l_centaur_data_ptr->current_centaur); + } + return; +} + +#define CENTAUR_SENSCACHE_ENABLE 0x020115CC +// Function Specification +// +// Name: cent_get_enabled_sensors +// +// Description: Reads +// +// Flow: ??? FN= ??? +// +// End Function Specification +int cent_get_enabled_sensors() +{ + int l_rc = 0; + unsigned int l_cent; + + do + { + // Set up scom list entry (there's only 1) + G_cent_scom_list_entry[0].scom = CENTAUR_SENSCACHE_ENABLE; //scom address + G_cent_scom_list_entry[0].commandType = GPE_SCOM_READ_VECTOR; //scom operation to perform + G_cent_scom_list_entry[0].instanceNumber = 0; //Ignored for READ_VECTOR operation + G_cent_scom_list_entry[0].pData = (uint64_t *) G_cent_scom_data; //scom data will be stored here + + // Set up GPE parameters + G_cent_scom_gpe_parms.rc = 0; + G_cent_scom_gpe_parms.entries = 1; + G_cent_scom_gpe_parms.scomList = (uint32_t) (&G_cent_scom_list_entry[0]); + G_cent_scom_gpe_parms.options = 0; + G_cent_scom_gpe_parms.errorIndex = 0; + + //Initializes PoreFlex + l_rc = pore_flex_create( &G_cent_scom_req, // gpe_req for the task + &G_pore_gpe1_queue, // queue + gpe_scom_centaur, // entry point + (uint32_t) &G_cent_scom_gpe_parms, // parm for the task + SSX_SECONDS(2), // timeout + NULL, // callback + NULL, // callback argument + ASYNC_REQUEST_BLOCKING ); // options + if(l_rc) + { + TRAC_ERR("cent_get_enabled_sensors: pore_flex_create failed. rc = 0x%08x", l_rc); + break; + } + + // Submit Pore GPE and wait for completion + l_rc = pore_flex_schedule(&G_cent_scom_req); + if(l_rc) + { + TRAC_ERR("cent_get_enabled_sensors: pore_flex_schedule failed. rc = 0x%08x", l_rc); + break; + } + + //consolidate scom data into a smaller, cacheable 8 byte buffer + for(l_cent = 0; l_cent < MAX_NUM_CENTAURS; l_cent++) + { + G_cent_enabled_sensors.bytes[l_cent] = ((uint8_t*)(&G_cent_scom_data[l_cent]))[0]; + } + + TRAC_IMP("bitmap of enabled dimm temperature sensors: 0x%08X %08X", + G_cent_enabled_sensors.words[0], + G_cent_enabled_sensors.words[1]); + }while(0); + return l_rc; +} + +// Function Specification +// +// Name: centaur_init +// +// Description: Initialize procedures for collecting centaur data. It +// needs to be run in occ main and before RTLoop started. +// This will also initialize the centaur watchdog. +// +// Flow: 03/17/12 FN=centaur_init +// +// End Function Specification +void centaur_init( void ) +{ + errlHndl_t l_err = NULL; // Error handler + int rc = 0; // Return code + int l_jj = 0; // Indexer + static scomList_t L_scomList[2] SECTION_ATTRIBUTE(".noncacheable"); + static GpeScomParms L_centaur_reg_parms SECTION_ATTRIBUTE(".noncacheable"); + + do + { + /// Initialize Centaur & Centaur Data Structures + /// This needs to run before RTLoop starts as init needs to be + /// done before task to collect centaur data starts. + + TRAC_INFO("centaur_init: Initializing Centaur ... " ); + + /// Before anything else, we need to call this procedure to + /// determine which Centaurs are out there, their config info. + /// and Type/EC Level + + rc = centaur_configuration_create(); + if( rc ) + { + TRAC_ERR("centaur_init: Centaur Config Create failed with rc=0x%08x ", rc ); + break; + } + + /// Set up Centaurs present global variable for use by OCC + /// looping though the bitmask. + + G_present_centaurs = 0; + for(l_jj=0; l_jj<MAX_NUM_CENTAURS; l_jj++) + { + // Check if this centaur is even possible to be present + // by ANDing it against ALL_CENTAURS_MASK in this macro + + if( CENTAUR_BY_MASK(l_jj) ) // @th016 + { + if( G_centaurConfiguration.baseAddress[l_jj] ) + { + // G_cent_ba is != 0, so a valid Bar Address was found + // This means there is a VALID centaur there. + G_present_centaurs |= (CENTAUR0_PRESENT_MASK >> l_jj); + + // Trace out the CFAM Chip ID, which includes Type & EC + TRAC_INFO("centaur_init: Centaur[%d] Found, Chip Id=0x%08x",l_jj, mb_id(l_jj)); + } + } + } + + TRAC_IMP("centaur_init: G_present_centaurs = 0x%08x", G_present_centaurs); + + //initialize global bitmap of enabled centaur temperature sensors (for dimms) + rc = cent_get_enabled_sensors(); + + // Set up recovery scom list entries + G_cent_scom_list_entry[L4_LINE_DELETE].scom = MBCCFGQ_REG; //scom address + G_cent_scom_list_entry[L4_LINE_DELETE].commandType = GPE_SCOM_RMW; //scom operation to perform + G_cent_scom_list_entry[L4_LINE_DELETE].mask = LINE_DELETE_ON_NEXT_CE; //mask of bits to chagne + G_cent_scom_list_entry[L4_LINE_DELETE].data = LINE_DELETE_ON_NEXT_CE; //scom data (always set the bit) + + //one time init for reading LFIR6 -- gm039 + G_cent_scom_list_entry[READ_NEST_LFIR6].scom = CENT_NEST_LFIR_REG; //scom address + G_cent_scom_list_entry[READ_NEST_LFIR6].commandType = GPE_SCOM_READ; //scom operation to perform + G_cent_scom_list_entry[READ_NEST_LFIR6].mask = 0; //mask (not used for reads) + G_cent_scom_list_entry[READ_NEST_LFIR6].data = 0; //scom data (initialize to 0) + + //one time init for reading centaur thermal status register -- gm041 + G_cent_scom_list_entry[READ_THERM_STATUS].scom = CENT_THRM_STATUS_REG; //scom address + G_cent_scom_list_entry[READ_THERM_STATUS].commandType = GPE_SCOM_READ; //scom operation to perform + G_cent_scom_list_entry[READ_THERM_STATUS].mask = 0; //mask (not used for reads) + G_cent_scom_list_entry[READ_THERM_STATUS].data = 0; //scom data (initialize to 0) + + //one time init to reset the centaur dts FSM -- gm039 + G_cent_scom_list_entry[RESET_DTS_FSM].scom = CENT_THRM_CTRL_REG; //scom address + G_cent_scom_list_entry[RESET_DTS_FSM].commandType = GPE_SCOM_NOP; //init to no-op (only runs if needed) + G_cent_scom_list_entry[RESET_DTS_FSM].mask = 0; //mask (not used for writes) + G_cent_scom_list_entry[RESET_DTS_FSM].data = CENT_THRM_CTRL4; //scom data (sets bit4) + + //one time init to clear centaur NEST LFIR 6 -- gm039 + G_cent_scom_list_entry[CLEAR_NEST_LFIR6].scom = CENT_NEST_LFIR_AND_REG; //scom address + G_cent_scom_list_entry[CLEAR_NEST_LFIR6].commandType = GPE_SCOM_NOP; //init to no-op (only runs if needed) + G_cent_scom_list_entry[CLEAR_NEST_LFIR6].mask = 0; //mask (not used for writes) + G_cent_scom_list_entry[CLEAR_NEST_LFIR6].data = ~CENT_NEST_LFIR6; //scom data + + //one time init to disable centaur sensor cache + G_cent_scom_list_entry[DISABLE_SC].scom = SCAC_CONFIG_REG; //scom address + G_cent_scom_list_entry[DISABLE_SC].commandType = GPE_SCOM_NOP; //init to no-op (only runs if needed) + G_cent_scom_list_entry[DISABLE_SC].mask = SCAC_MASTER_ENABLE; //mask of bits to chagne + G_cent_scom_list_entry[DISABLE_SC].data = 0; //scom data (disable sensor cache) + + //one time init to enable centaur sensor cache + G_cent_scom_list_entry[ENABLE_SC].scom = SCAC_CONFIG_REG; //scom address + G_cent_scom_list_entry[ENABLE_SC].commandType = GPE_SCOM_NOP; //init to no-op (only runs if needed) + G_cent_scom_list_entry[ENABLE_SC].mask = SCAC_MASTER_ENABLE; //mask of bits to chagne + G_cent_scom_list_entry[ENABLE_SC].data = SCAC_MASTER_ENABLE; //scom data (enable sensor cache) + + + + /// Set up Centuar Scom Registers - array of Scoms + /// [0]: Setup deadman timer + /// NOTE: According to Irving Baysah, centaur spec is incorrect by a factor of 16. + /// max timeout is about 2 seconds. + + L_scomList[0].scom = CENTAUR_MBSCFGQ; + L_scomList[0].commandType = GPE_SCOM_RMW_ALL; + + centaur_mbscfgq_t l_mbscfg; + l_mbscfg.value = 0; + + //set up the mask bits + l_mbscfg.fields.occ_deadman_timer_sel = CENT_MAX_DEADMAN_TIMER; + L_scomList[0].mask = l_mbscfg.value; + + //set up the data bits + l_mbscfg.fields.occ_deadman_timer_sel = CENT_DEADMAN_TIMER_2SEC; + L_scomList[0].data = l_mbscfg.value; + + /// Set up Centuar Scom Registers - array of Scoms + /// [1]: clear the emergency throttle bit + + L_scomList[1].scom = CENTAUR_MBSEMERTHROQ; + L_scomList[1].commandType = GPE_SCOM_RMW_ALL; + + centaur_mbsemerthroq_t l_mbs_et; + l_mbs_et.value = 0; + + //set up the data + L_scomList[1].data = l_mbs_et.value; + + //set up the mask + l_mbs_et.fields.emergency_throttle_ip = 1; + L_scomList[1].mask = l_mbs_et.value; + + L_centaur_reg_parms.scomList = (uint32_t) (&L_scomList[0]); + L_centaur_reg_parms.entries = 2; + L_centaur_reg_parms.options = 0; + L_centaur_reg_parms.rc = 0; + L_centaur_reg_parms.errorIndex = 0; + + //Initialize PoreFlex + rc = pore_flex_create( &G_centaur_reg_pore_req, //gpe_req for the task + &G_pore_gpe1_queue, //queue + gpe_scom_centaur, //entry point + (uint32_t) &L_centaur_reg_parms, //parm for the task + SSX_SECONDS(5), //no timeout + NULL, //callback + NULL, //callback argument + ASYNC_REQUEST_BLOCKING ); //options + if(rc) + { + TRAC_ERR("centaur_init: pore_flex_create failed for G_centaur_reg_pore_req. rc = 0x%08x", rc); + break; + } + + // Submit Pore GPE and wait for completion + rc = pore_flex_schedule(&G_centaur_reg_pore_req); + + // Check for errors on Scom + if(rc || L_centaur_reg_parms.rc) + { + TRAC_ERR("centaur_init: gpe_scom_centaur failure. rc = 0x%08x, gpe_rc = 0x%08x, index = 0x%08x", + rc, + L_centaur_reg_parms.rc, + L_centaur_reg_parms.errorIndex); + if(!rc) + { + rc = L_centaur_reg_parms.rc; + } + break; + } + + /// Set up the OCC Centuar Data Collection Procedure + /// Includes initializing the centaur procedure parameters + /// to gather the 'centaur' data, but we will set them to + /// invalid (-1) util the task sets them up + + G_centaur_data_parms.rc = 0; + G_centaur_data_parms.collect = -1; + G_centaur_data_parms.update = -1; + G_centaur_data_parms.data = 0; + + //Initializes existing PoreFlex object for centaur data + rc = pore_flex_create( &G_centaur_data_task.gpe_req, //gpe_req for the task + &G_pore_gpe1_queue, //queue + gpe_get_mem_data, //entry point + (uint32_t) &G_centaur_data_parms, //parm for the task + SSX_WAIT_FOREVER, //gm023 + NULL, //callback + NULL, //callback argument + 0 ); //options + + if(rc) + { + TRAC_ERR("centaur_init: pore_flex_create failed for G_centaur_data_task.gpe_req. rc = 0x%08x", rc); + break; + } + + //Initialize existing PoreFlex object for centaur recovery + rc = pore_flex_create( &G_cent_scom_req, // gpe_req for the task + &G_pore_gpe1_queue, // queue + gpe_scom_centaur, // entry point + (uint32_t) &G_cent_scom_gpe_parms, // parm for the task + SSX_WAIT_FOREVER, // gm023 + NULL, // callback + NULL, // callback argument + 0); // options + if(rc) + { + TRAC_ERR("centaur_init: pore_flex_create failed for G_cent_scom_req. rc = 0x%08x", rc); + break; + } + + /// Initialization complete, Centaur Control & Data Collection + /// Tasks can now run + + } while(0); + + if( rc ) + { + + /* @ + * @errortype + * @moduleid CENTAUR_INIT_MOD + * @reasoncode SSX_GENERIC_FAILURE + * @userdata1 rc - Return code of failing function + * @userdata2 Return code of failing GPE + * @userdata4 OCC_NO_EXTENDED_RC + * @devdesc Failed to initialize Centaurs + */ + l_err = createErrl( + CENTAUR_INIT_MOD, //modId + SSX_GENERIC_FAILURE, //reasoncode + OCC_NO_EXTENDED_RC, //Extended reason code + ERRL_SEV_PREDICTIVE, //Severity + NULL, //Trace Buf + DEFAULT_TRACE_SIZE, //Trace Size + rc, //userdata1 + L_centaur_reg_parms.rc //userdata2 + ); + + addUsrDtlsToErrl(l_err, //io_err + (uint8_t *) &G_centaur_reg_pore_req.ffdc, //i_dataPtr, + sizeof(PoreFfdc), //i_size + ERRL_USR_DTL_STRUCT_VERSION_1, //version + ERRL_USR_DTL_BINARY_DATA); //type + + REQUEST_RESET(l_err); + } + else + { + // Only initalize the control structures if we haven't had + // any errors yet + centaur_control_init(); + } + + return; +} + + +// Function Specification +// +// Name: cent_get_centaur_data_ptr +// +// Description: Returns a pointer to the most up-to-date centaur data for +// the centaur associated with the specified OCC centaur id. +// Returns NULL for centaur ID outside the range of 0 to 7. +// +// Flow: FN=None +// +// End Function Specification +MemData * cent_get_centaur_data_ptr( const uint8_t i_occ_centaur_id ) +{ + //The caller needs to send in a valid OCC centaur id. Since type is uchar + //so there is no need to check for case less than 0. + //If centaur id is invalid then returns NULL. + if( i_occ_centaur_id < MAX_NUM_CENTAURS ) + { + //Returns a pointer to the most up-to-date centaur data. + return G_centaur_data_ptrs[i_occ_centaur_id]; + } + else + { + //Core id outside the range + TRAC_ERR("cent_get_centaur_data_ptr: Invalid OCC centaur id [0x%x]", i_occ_centaur_id); + return( NULL ); + } +} + diff --git a/src/occ/cent/centaur_data.h b/src/occ/cent/centaur_data.h new file mode 100755 index 0000000..8715940 --- /dev/null +++ b/src/occ/cent/centaur_data.h @@ -0,0 +1,180 @@ +/****************************************************************************** +// @file centaur_data.h +// @brief Centaur external functions & data. +*/ +/****************************************************************************** + * + * @page ChangeLogs Change Logs + * @section _centaur_data_h centaur_data.h + * @verbatim + * + * Flag Def/Fea Userid Date Description + * ------- ---------- -------- ---------- ---------------------------------- + * @th00c thallet 03/14/2012 Created + * @th013 thallet 07/17/2012 SW150201: OCC VPO: inbound scom + * @th016 thallet 08/13/2012 Only 4 Centaurs possible on Murano + * @th018 852950 thallet 09/12/2012 Added Centaur thermal readings + * @th031 878471 thallet 04/15/2013 Centaur Throttles + * @th032 thallet 04/26/2013 Centaur Bringup Changes + * @th045 893135 thallet 07/26/2013 Updated for new Centaur Procedures + * @gm004 892961 milesg 07/25/2013 Removed centaur_init from init section + * @gm012 905097 milesg 10/31/2013 Fix Centaur enablement + * @gm013 907548 milesg 11/22/2013 Memory therm monitoring support + * @gm015 907601 milesg 12/06/2013 L4 Bank Delete circumvention and centaur i2c recovery + * @gm016 909061 milesg 12/10/2013 Support memory throttling due to temperature + * + * @endverbatim + * + *///*************************************************************************/ + +#ifndef _CENTAUR_DATA_H +#define _CENTAUR_DATA_H + +//************************************************************************* +// Includes +//************************************************************************* +#include <occ_common.h> +#include <ssx.h> +#include "rtls.h" +#include "gpe_data.h" +#include "occ_sys_config.h" + +//************************************************************************* +// Externs +//************************************************************************* + +//************************************************************************* +// Defines/Enums +//************************************************************************* + +// Mask that is used by procedure to specify which centaurs are present +#define ALL_CENTAURS_MASK 0x000000ff // @th016 +#define ALL_CENTAURS_MASK_GPE 0x000000ff00000000ull // @th016 +// Centaur0, used by OCC +#define CENTAUR0_PRESENT_MASK 0x00000080ul +#define CENTAUR0_PRESENT_MASK_GPE 0x0000008000000000ull + +// Used for specifing buffer allocations +#define NUM_CENTAUR_DATA_BUFF MAX_NUM_CENTAURS +#define NUM_CENTAUR_DOUBLE_BUF 1 +#define NUM_CENTAUR_DATA_EMPTY_BUF 1 + +// Specify a command BAR addresses for all centaur operations +#define CENTAUR_MEM_DATA_BAR MEM_DATA_BAR_SELECT_1 // @th032 + +// Enum for specifying each Centaur +enum eOccCentaurs +{ + CENTAUR_0 = 0, + CENTAUR_1 = 1, + CENTAUR_2 = 2, + CENTAUR_3 = 3, + CENTAUR_4 = 4, + CENTAUR_5 = 5, + CENTAUR_6 = 6, + CENTAUR_7 = 7, +}; + +//************************************************************************* +// Macros +//************************************************************************* +//Returns 0 if the specified centaur is not present. Otherwise, returns none-zero. +#define CENTAUR_PRESENT(occ_cent_id) \ + ((CENTAUR0_PRESENT_MASK >> occ_cent_id) & G_present_centaurs) + +//Returns 0 if the specified centaur is not updated. Otherwise, returns none-zero. +#define CENTAUR_UPDATED(occ_cent_id) \ + ((CENTAUR0_PRESENT_MASK >> occ_cent_id) & G_updated_centaur_mask) + +//Returns 0 if the specified centaur is not updated. Otherwise, returns none-zero. +#define CLEAR_CENTAUR_UPDATED(occ_cent_id) \ + G_updated_centaur_mask &= ~(CENTAUR0_PRESENT_MASK >> occ_cent_id) + +//Returns the bitmask for the passed in Centaur (uint32_t) +#define CENTAUR_BY_MASK(occ_cent_id) \ + ((CENTAUR0_PRESENT_MASK >> occ_cent_id) & ALL_CENTAURS_MASK) + +//Returns the bitmask for the passed in Centaur (uint64_t) +#define CENTAUR_BY_MASK_GPE(occ_cent_id) \ + (((uint64_t) CENTAUR0_PRESENT_MASK_GPE >> occ_cent_id) & ALL_CENTAURS_MASK_GPE) + +#define DIMM_SENSOR0 0x80 + +#define CENTAUR_SENSOR_ENABLED(occ_cent_id, sensor_num) \ + (G_cent_enabled_sensors.bytes[occ_cent_id] & (DIMM_SENSOR0 >> (sensor_num))) + +#define MBA_CONFIGURED(occ_cent_id, mba_num) \ + (G_configured_mbas & (1 << ((occ_cent_id * 2) + mba_num))) +//************************************************************************* +// Structures +//************************************************************************* + +//Centaur data collect structures used for task data pointers +struct centaur_data_task { + uint8_t start_centaur; + uint8_t current_centaur; + uint8_t end_centaur; + uint8_t prev_centaur; // @th013 + MemData * centaur_data_ptr; + PoreFlex gpe_req; +} __attribute__ ((__packed__)); +typedef struct centaur_data_task centaur_data_task_t; + +typedef union +{ + uint64_t bigword; + uint32_t words[2]; + uint8_t bytes[8]; +}cent_sensor_flags_t; + + +//************************************************************************* +// Globals +//************************************************************************* + +//Global centaur structures used for task data pointers +extern centaur_data_task_t G_centaur_data_task; + +//Global is bitmask of centaurs +extern uint32_t G_present_centaurs; + +//AMEC needs to know when data for a centaur has been collected. +extern uint32_t G_updated_centaur_mask; + +//global bitmap of enabled dimm sensors +extern cent_sensor_flags_t G_cent_enabled_sensors; + +//global bitmap of dimms that have ever gone over the error temperature +extern cent_sensor_flags_t G_dimm_overtemp_bitmap; + +//global bitmap of dimms temps that have been updated +extern cent_sensor_flags_t G_dimm_temp_updated_bitmap; + +//global bitmap of centaurs that have ever gone over the error temperature +extern uint8_t G_cent_overtemp_bitmap; + +//global bitmap of centaur temperatures that have been updated +extern uint8_t G_cent_temp_updated_bitmap; + +//bitmap of configured MBA's (2 per centaur, lsb is centaur0/mba0) +extern uint16_t G_configured_mbas; +//************************************************************************* +// Function Prototypes +//************************************************************************* + +//Collect centaur data for all centaur in specified range +void task_centaur_data( task_t * i_task ); + +//Initialize structures for collecting centaur data. +//void centaur_init( void ) INIT_SECTION; +void centaur_init( void ); //gm004 + +//handles centaur i2c recovery and other workarounds +void cent_recovery(uint32_t i_cent); + +//Returns a pointer to the most up-to-date centaur data for the centaur +//associated with the specified OCC centaur id. +MemData * cent_get_centaur_data_ptr( const uint8_t i_centaur_id ); + +#endif //_CENTAUR_DATA_H + diff --git a/src/occ/cent/centaur_data_service_codes.h b/src/occ/cent/centaur_data_service_codes.h new file mode 100755 index 0000000..2634efb --- /dev/null +++ b/src/occ/cent/centaur_data_service_codes.h @@ -0,0 +1,64 @@ +/****************************************************************************** +// @file centaur_data_service_codes.h +// @brief Error codes for cent component. +*/ +/****************************************************************************** + * + * @page ChangeLogs Change Logs + * @section _centaur_data_service_codes_h centaur_data_service_codes.h + * @verbatim + * + * Flag Def/Fea Userid Date Description + * ------- ---------- -------- ---------- ---------------------------------- + * @th00c thallet 03/14/2012 Created + * @th031 878471 thallet 04/15/2013 Centaur Throttles + * @gm015 907601 milesg 12/06/2013 L4 Bank Delete circumvention and centaur i2c recovery + * + * @endverbatim + * + *///*************************************************************************/ + +#ifndef _CENTAUR_DATA_SERVICE_CODES_H_ +#define _CENTAUR_DATA_SERVICE_CODES_H_ + +//************************************************************************* +// Includes +//************************************************************************* +#include <comp_ids.h> + +//************************************************************************* +// Externs +//************************************************************************* + +//************************************************************************* +// Macros +//************************************************************************* + +//************************************************************************* +// Defines/Enums +//************************************************************************* +enum centModuleId +{ + CENT_TASK_DATA_MOD = CENT_COMP_ID | 0x00, + CENTAUR_INIT_MOD = CENT_COMP_ID | 0x01, + CENT_TASK_CONTROL_MOD = CENT_COMP_ID | 0x02, + CENT_RECOVERY_MOD = CENT_COMP_ID | 0x03, +}; + +//************************************************************************* +// Structures +//************************************************************************* + +//************************************************************************* +// Globals +//************************************************************************* + +//************************************************************************* +// Function Prototypes +//************************************************************************* + +//************************************************************************* +// Functions +//************************************************************************* + +#endif /* #ifndef _CENTAUR_DATA_SERVICE_CODES_H_ */ |