summaryrefslogtreecommitdiffstats
path: root/src/occ_405/mem
diff options
context:
space:
mode:
authorWael El-Essawy <welessa@us.ibm.com>2016-08-29 19:33:20 -0500
committerWael El-Essawy <welessa@us.ibm.com>2016-09-16 11:19:15 -0400
commit8a7d7b2d13098453380fbdf69c0136515ba33b06 (patch)
tree9ce20ff68625d0a6d4b5d118514a48949232e173 /src/occ_405/mem
parent444caf2b8e674263f27963b3c9739e48aa793d00 (diff)
downloadtalos-occ-8a7d7b2d13098453380fbdf69c0136515ba33b06.tar.gz
talos-occ-8a7d7b2d13098453380fbdf69c0136515ba33b06.zip
Memory Temperature Control Loop (memory throttling)
* Memory throttling due to over temp * Throttle when reach timeout getting new temperature readings * Log error for temperature exceeding ERROR threshold Change-Id: I089c88aadba84e7296ad87b8cb87fa8c045ff912 RTC: 131188 Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/28933 Reviewed-by: Wael El-Essawy <welessa@us.ibm.com> Tested-by: Wael El-Essawy <welessa@us.ibm.com>
Diffstat (limited to 'src/occ_405/mem')
-rw-r--r--src/occ_405/mem/memory.c411
-rw-r--r--src/occ_405/mem/memory.h76
-rw-r--r--src/occ_405/mem/memory_service_codes.h41
3 files changed, 528 insertions, 0 deletions
diff --git a/src/occ_405/mem/memory.c b/src/occ_405/mem/memory.c
new file mode 100644
index 0000000..fe338ed
--- /dev/null
+++ b/src/occ_405/mem/memory.c
@@ -0,0 +1,411 @@
+/* IBM_PROLOG_BEGIN_TAG */
+/* This is an automatically generated prolog. */
+/* */
+/* $Source: src/occ_405/mem/memory.c $ */
+/* */
+/* OpenPOWER OnChipController Project */
+/* */
+/* Contributors Listed Below - COPYRIGHT 2014,2016 */
+/* [+] International Business Machines Corp. */
+/* */
+/* */
+/* Licensed under the Apache License, Version 2.0 (the "License"); */
+/* you may not use this file except in compliance with the License. */
+/* You may obtain a copy of the License at */
+/* */
+/* http://www.apache.org/licenses/LICENSE-2.0 */
+/* */
+/* Unless required by applicable law or agreed to in writing, software */
+/* distributed under the License is distributed on an "AS IS" BASIS, */
+/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */
+/* implied. See the License for the specific language governing */
+/* permissions and limitations under the License. */
+/* */
+/* IBM_PROLOG_END_TAG */
+
+#include <trac_interface.h>
+#include <trac.h>
+
+#include "memory.h"
+#include "dimm_control.h"
+#include "centaur_control.h"
+#include "centaur_data.h"
+#include "memory_service_codes.h"
+#include <occ_service_codes.h> // for SSX_GENERIC_FAILURE
+
+extern bool G_mem_monitoring_allowed;
+extern dimm_control_args_t G_dimm_control_args;
+extern task_t G_task_table[TASK_END];
+
+
+// @TODO: TEMP - uncomment when Centaur code is enabled
+//extern GpeScomParms G_centaur_control_reg_parms;
+
+// This array identifies dimm throttle limits for both Centaurs (Cumulus) and
+// rdimms (Nimbus) based systems.
+//
+// For Nimbus systems, only the first two rows (corresponding to memory controller
+// pairs M01 and M23) are used.
+//
+// For Cumulus systems, only the first two columns (corresponding to the two mba
+// pairs mba01 and mba23) are used.
+memory_throttle_t G_memoryThrottleLimits[MAX_NUM_MEM_CONTROLLERS][MAX_NUM_MCU_PORTS] =
+{
+ {{0}, {0}, {0}, {0}},
+ {{0}, {0}, {0}, {0}},
+ {{0}, {0}, {0}, {0}},
+ {{0}, {0}, {0}, {0}},
+ {{0}, {0}, {0}, {0}},
+ {{0}, {0}, {0}, {0}},
+ {{0}, {0}, {0}, {0}},
+ {{0}, {0}, {0}, {0}}
+};
+
+//Memory structure used for task data pointers in both Cumulus (Centaur)
+//and Nimbus (RDIMM) systems.
+memory_control_task_t G_memory_control_task =
+{
+ .startMemIndex = 0, // First Memory Control Index (Centaur/MC_pair|port)
+ .prevMemIndex = 7, // Previous Memory Control Index written to
+ .curMemIndex = 0, // Current Memory Control Index
+ .endMemIndex = 7, // Last Memory Control Index
+ .traceThresholdFlags = 0, // Trace Throttle Flags
+};
+
+
+// Function Specification
+//
+// Name: task_memory_control
+//
+// Description: Performs system memory control:
+// - for Nimbus: calls dimm_control() to control dimms power
+// - for Cumulus: calls centaur_control() to control centaur power
+//
+// End Function Specification
+
+// wait up to 16 ticks before logging timeout failure
+#define MEMORY_CONTROL_SCOM_TIMEOUT 16
+
+void task_memory_control( task_t * i_task )
+{
+ //track # of consecutive failures on a specific Centaur/RDIMM
+ static uint8_t L_scom_timeout[MAX_NUM_MEM_CONTROLLERS] = {0};
+
+ errlHndl_t l_err = NULL; // Error handler
+ int rc = 0; // Return code
+ uint8_t memIndex;
+ static bool L_gpe_scheduled = FALSE;
+ static uint8_t L_gpe_fail_logged = 0;
+ static bool L_gpe_idle_traced = FALSE;
+ static bool L_gpe_had_1_tick = FALSE;
+
+ uint32_t gpe_rc = 0;
+
+ // Pointer to the task data structure
+ memory_control_task_t* memControlTask = (memory_control_task_t*) i_task->data_ptr;
+
+
+ if (MEM_TYPE_NIMBUS == G_sysConfigData.mem_type)
+ {
+ gpe_rc = G_dimm_control_args.error.rc;
+ }
+ else if (MEM_TYPE_CUMULUS == G_sysConfigData.mem_type)
+ {
+// @TODO: TEMP - uncomment when Centaur code is enabled
+// gpe_rc = G_centaur_control_reg_parms.rc;
+ }
+
+ do
+ {
+ memIndex = memControlTask->curMemIndex;
+
+ //First, check to see if the previous GPE request still running
+ //A request is considered idle if it is not attached to any of the
+ //asynchronous request queues
+ if( !(async_request_is_idle(&memControlTask->gpe_req.request)) )
+ {
+ L_scom_timeout[memIndex]++;
+ //This can happen due to variability in when the task runs
+ if(!L_gpe_idle_traced && L_gpe_had_1_tick)
+ {
+ TRAC_INFO("task_memory_control: GPE is still running. memIndex[%d]", memIndex);
+ memControlTask->traceThresholdFlags |= MEMORY_CONTROL_GPE_STILL_RUNNING;
+ L_gpe_idle_traced = TRUE;
+ }
+ L_gpe_had_1_tick = TRUE;
+ break;
+ }
+ else
+ {
+ //Request is idle
+ L_gpe_had_1_tick = FALSE;
+ if(L_gpe_idle_traced)
+ {
+ TRAC_INFO("task_memory_control: GPE completed. memIndex[%d]", memIndex);
+ L_gpe_idle_traced = FALSE;
+ }
+ }
+
+ //check scom status
+ if(L_gpe_scheduled)
+ {
+ if(!async_request_completed(&memControlTask->gpe_req.request) || gpe_rc)
+ {
+ if (MEM_TYPE_CUMULUS == G_sysConfigData.mem_type)
+ {
+ if(!(L_gpe_fail_logged & (CENTAUR0_PRESENT_MASK >> memIndex)))
+ {
+// @TODO: TEMP - uncomment when Centaur code is enabled
+/* if (!check_centaur_checkstop(memIndex))
+ {
+ L_gpe_fail_logged |= CENTAUR0_PRESENT_MASK >> memIndex;
+ }
+*/
+ }
+ }
+ //Request failed. Keep count of failures and request a reset if we reach a
+ //max retry count
+ L_scom_timeout[memIndex]++;
+ if(L_scom_timeout[memIndex] == MEMORY_CONTROL_SCOM_TIMEOUT)
+ {
+ break;
+ }
+
+ }//if(!async_request_completed(&memControlTask->gpe_req.request) || l_parms->rc)
+ else
+ {
+ //request completed successfully. reset the timeout.
+ L_scom_timeout[memIndex] = 0;
+ }
+ }//if(L_gpe_scheduled)
+
+ //The previous GPE job completed. Now get ready for the next job.
+ L_gpe_scheduled = FALSE;
+
+
+ //Update current dimm/centaur index if we didn't fail
+ memControlTask->prevMemIndex = memIndex;
+ if ( memIndex >= memControlTask->endMemIndex )
+ {
+ memIndex = memControlTask->startMemIndex;
+ }
+ else
+ {
+ memIndex++;
+ }
+ memControlTask->curMemIndex = memIndex;
+
+
+ if (MEM_TYPE_NIMBUS == G_sysConfigData.mem_type)
+ {
+ if(!NIMBUS_DIMM_INDEX_THROTTLING_CONFIGURED(memIndex))
+ {
+ break;
+ }
+
+ // control dimm specified by mc,port
+ uint8_t mc = memIndex>>2;
+ uint8_t port = memIndex&3;
+
+ // Do the update_nlimit, calculate new N values, check whether throttle values
+ // were updated, then Schedule GPE request, rc if problem, else L_gpe_schedule
+ rc = dimm_control(mc, port);
+
+ MEM_DBG("memIndex=%d, mc|port=0x%%04X, rc=%d",
+ memIndex, mc<<8| port, rc);
+
+ }
+ else if (MEM_TYPE_CUMULUS == G_sysConfigData.mem_type)
+ {
+ //If centaur is not present or neither MBA is configured then skip it.
+ if(!CENTAUR_PRESENT(memIndex) ||
+ (!MBA_CONFIGURED(memIndex, 0) && !MBA_CONFIGURED(memIndex, 1)))
+ {
+ break;
+ }
+// @TODO TEMP: centaur code not ready yet
+// rc = centaur_control(memIndex); // Control one centaur
+ }
+
+ if(rc)
+ {
+ rc = gpe_request_schedule(&G_memory_control_task.gpe_req);
+
+ if( rc )
+ {
+ if (MEM_TYPE_NIMBUS == G_sysConfigData.mem_type)
+ {
+ gpe_rc = G_dimm_control_args.error.rc;
+ }
+ else if (MEM_TYPE_CUMULUS == G_sysConfigData.mem_type)
+ {
+// @TODO: TEMP - uncomment when Centaur code is enabled
+// gpe_rc = G_centaur_control_reg_parms.rc;
+ }
+
+ //Error in schedule gpe memory (dimm/centaur) control
+ TRAC_ERR("task_memory_control: Failed to schedule memory control gpe rc=%x",
+ rc);
+
+ /* @
+ * @errortype
+ * @moduleid MEM_MID_TASK_MEMORY_CONTROL
+ * @reasoncode SSX_GENERIC_FAILURE
+ * @userdata1 rc - Return code of failing function
+ * @userdata2 0
+ * @userdata4 ERC_MEM_CONTROL_SCHEDULE_FAILURE
+ * @devdesc OCC Failed to schedule a GPE job for memory control
+ */
+ l_err = createErrl(
+ MEM_MID_TASK_MEMORY_CONTROL, // modId
+ SSX_GENERIC_FAILURE, // reasoncode
+ ERC_MEM_CONTROL_SCHEDULE_FAILURE, // Extended reason code
+ ERRL_SEV_UNRECOVERABLE, // Severity
+ NULL, // Trace Buf
+ DEFAULT_TRACE_SIZE, // Trace Size
+ rc, // userdata1
+ gpe_rc // userdata2
+ );
+
+ addUsrDtlsToErrl(
+ l_err, //io_err
+ (uint8_t *) &(memControlTask->gpe_req.ffdc), //i_dataPtr,
+ sizeof(memControlTask->gpe_req.ffdc), //i_size
+ ERRL_USR_DTL_STRUCT_VERSION_1, //version
+ ERRL_USR_DTL_BINARY_DATA); //type
+
+ REQUEST_RESET(l_err); //This will add a firmware callout for us
+ break;
+ }
+
+ L_gpe_scheduled = TRUE;
+ }
+
+ } while(0);
+
+
+//Global centaur structures used for task data pointers
+
+
+ if(L_scom_timeout[memIndex] == MEMORY_CONTROL_SCOM_TIMEOUT)
+ {
+ if (MEM_TYPE_NIMBUS == G_sysConfigData.mem_type)
+ {
+ if(memIndex>>2)
+ {
+ TRAC_ERR("task_memory_control: Timeout scomming MC23:port[%d]", memIndex&3);
+ }
+ else
+ {
+ TRAC_ERR("task_memory_control: Timeout scomming MC01:port[%d]", memIndex&3);
+ }
+ }
+ else if (MEM_TYPE_CUMULUS == G_sysConfigData.mem_type)
+ {
+ TRAC_ERR("task_memory_control: Timeout scomming centaur[%d]", memIndex);
+ }
+ /* @
+ * @errortype
+ * @moduleid MEM_MID_TASK_MEMORY_CONTROL
+ * @reasoncode INTERNAL_FAILURE
+ * @userdata1 centaur/memIndex number
+ * @userdata2 0
+ * @userdata4 OCC_NO_EXTENDED_RC
+ * @devdesc Timed out trying to set the memory throttle settings
+ * throttle settings.
+ */
+ l_err = createErrl(
+ MEM_MID_TASK_MEMORY_CONTROL, // modId
+ INTERNAL_FAILURE, // reasoncode
+ OCC_NO_EXTENDED_RC, // Extended reason code
+ ERRL_SEV_PREDICTIVE, // Severity
+ NULL, // Trace Buf
+ DEFAULT_TRACE_SIZE, // Trace Size
+ memIndex, // userdata1
+ 0 // userdata2
+ );
+
+ addUsrDtlsToErrl(l_err, //io_err
+ (uint8_t *) &(memControlTask->gpe_req.ffdc), //i_dataPtr,
+ sizeof(memControlTask->gpe_req.ffdc), //i_size
+ ERRL_USR_DTL_STRUCT_VERSION_1, //version
+ ERRL_USR_DTL_BINARY_DATA); //type
+
+ //callout the centaur
+ addCalloutToErrl(l_err,
+ ERRL_CALLOUT_TYPE_HUID,
+ G_sysConfigData.centaur_huids[memIndex],
+ ERRL_CALLOUT_PRIORITY_MED);
+
+ //callout the processor
+ addCalloutToErrl(l_err,
+ ERRL_CALLOUT_TYPE_HUID,
+ G_sysConfigData.proc_huid,
+ ERRL_CALLOUT_PRIORITY_MED);
+
+ REQUEST_RESET(l_err);
+ }
+
+ return;
+}
+
+
+// Initialize the memory task data
+void memory_init()
+{
+ if(G_mem_monitoring_allowed)
+ {
+ // Check if memory task is running (default task is for NIMBUS)
+ const task_id_t mem_task = TASK_ID_DIMM_SM;
+ if(!rtl_task_is_runnable(mem_task))
+ {
+ if (MEM_TYPE_NIMBUS == G_sysConfigData.mem_type)
+ {
+ // Init DIMM state manager IPC request
+ memory_nimbus_init();
+ }
+ else
+ {
+ // TODO CUMULUS NOT SUPPORTED YET IN PHASE1
+#if 0
+ TRAC_INFO("memory_init: calling centaur_init()");
+ centaur_init(); //no rc, handles errors internally
+#endif
+ TRAC_ERR("memory_init: invalid memory type 0x%02X", G_sysConfigData.mem_type);
+ /*
+ * @errortype
+ * @moduleid MEM_MID_MEMORY_INIT
+ * @reasoncode MEMORY_INIT_FAILED
+ * @userdata1 memory type
+ * @userdata2 0
+ * @devdesc Invalid memory type detected
+ */
+ errlHndl_t err = createErrl(MEM_MID_MEMORY_INIT,
+ MEMORY_INIT_FAILED,
+ OCC_NO_EXTENDED_RC,
+ ERRL_SEV_PREDICTIVE,
+ NULL,
+ DEFAULT_TRACE_SIZE,
+ G_sysConfigData.mem_type,
+ 0);
+ REQUEST_RESET(err);
+ }
+
+ // check if the init resulted in a reset
+ if(isSafeStateRequested())
+ {
+ TRAC_ERR("memory_init: OCC is being reset, memory init failed (type=0x%02X)",
+ G_sysConfigData.mem_type);
+ }
+ else
+ {
+ // Initialization was successful. Set task flags to allow memory
+ // tasks to run and also prevent from doing initialization again.
+ G_task_table[mem_task].flags = MEMORY_DATA_RTL_FLAGS;
+ G_task_table[TASK_ID_MEMORY_CONTROL].flags = MEMORY_CONTROL_RTL_FLAGS;
+ }
+ }
+ }
+
+} // end memory_init()
+
diff --git a/src/occ_405/mem/memory.h b/src/occ_405/mem/memory.h
new file mode 100644
index 0000000..1314823
--- /dev/null
+++ b/src/occ_405/mem/memory.h
@@ -0,0 +1,76 @@
+/* IBM_PROLOG_BEGIN_TAG */
+/* This is an automatically generated prolog. */
+/* */
+/* $Source: src/occ_405/mem/memory.h $ */
+/* */
+/* OpenPOWER OnChipController Project */
+/* */
+/* Contributors Listed Below - COPYRIGHT 2014,2016 */
+/* [+] International Business Machines Corp. */
+/* */
+/* */
+/* Licensed under the Apache License, Version 2.0 (the "License"); */
+/* you may not use this file except in compliance with the License. */
+/* You may obtain a copy of the License at */
+/* */
+/* http://www.apache.org/licenses/LICENSE-2.0 */
+/* */
+/* Unless required by applicable law or agreed to in writing, software */
+/* distributed under the License is distributed on an "AS IS" BASIS, */
+/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */
+/* implied. See the License for the specific language governing */
+/* permissions and limitations under the License. */
+/* */
+/* IBM_PROLOG_END_TAG */
+
+#ifndef _MEMORY_H
+#define _MEMORY_H
+
+#include "occ_sys_config.h"
+
+typedef enum
+{
+ MEMORY_CONTROL_GPE_STILL_RUNNING = 0x01,
+ MEMORY_CONTROL_RESERVED_1 = 0x02,
+ MEMORY_CONTROL_RESERVED_2 = 0x04,
+ MEMORY_CONTROL_RESERVED_3 = 0x08,
+ MEMORY_CONTROL_RESERVED_4 = 0x10,
+ MEMORY_CONTROL_RESERVED_5 = 0x20,
+ MEMORY_CONTROL_RESERVED_6 = 0x40,
+ MEMORY_CONTROL_RESERVED_7 = 0x80,
+} eMemoryControlTraceFlags;
+
+//per slot/mba throttle values used for rdimm/centaur-dimm control
+typedef struct
+{
+ uint16_t max_n_per_mba; //mode and OVS dependent, from config data
+ uint16_t max_n_per_chip; //mode and OVS dependent, from config data
+ uint16_t min_n_per_mba; //from config data
+} memory_throttle_t;
+
+// 64 bits encoding different bit fields corresponding to dimms
+// either on centaurs or on I2C ports, up to 8 dimms on 8 centaurs/ports
+typedef union
+{
+ uint64_t bigword;
+ uint32_t words[2];
+ uint8_t bytes[8];
+}dimm_sensor_flags_t;
+
+
+//Memory data collect structures used for task data pointers
+struct memory_control_task {
+ uint8_t startMemIndex;
+ uint8_t prevMemIndex;
+ uint8_t curMemIndex;
+ uint8_t endMemIndex;
+ uint8_t traceThresholdFlags;
+ GpeRequest gpe_req;
+} __attribute__ ((__packed__));
+typedef struct memory_control_task memory_control_task_t;
+
+extern memory_throttle_t G_memoryThrottleLimits[MAX_NUM_MEM_CONTROLLERS][MAX_NUM_MCU_PORTS];
+
+void task_memory_control( task_t * i_task );
+
+#endif // _MEMORY_H
diff --git a/src/occ_405/mem/memory_service_codes.h b/src/occ_405/mem/memory_service_codes.h
new file mode 100644
index 0000000..1934b12
--- /dev/null
+++ b/src/occ_405/mem/memory_service_codes.h
@@ -0,0 +1,41 @@
+/* IBM_PROLOG_BEGIN_TAG */
+/* This is an automatically generated prolog. */
+/* */
+/* $Source: src/occ_405/cent/centaur_control.h $ */
+/* */
+/* OpenPOWER OnChipController Project */
+/* */
+/* Contributors Listed Below - COPYRIGHT 2015 */
+/* [+] International Business Machines Corp. */
+/* */
+/* */
+/* Licensed under the Apache License, Version 2.0 (the "License"); */
+/* you may not use this file except in compliance with the License. */
+/* You may obtain a copy of the License at */
+/* */
+/* http://www.apache.org/licenses/LICENSE-2.0 */
+/* */
+/* Unless required by applicable law or agreed to in writing, software */
+/* distributed under the License is distributed on an "AS IS" BASIS, */
+/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */
+/* implied. See the License for the specific language governing */
+/* permissions and limitations under the License. */
+/* */
+/* IBM_PROLOG_END_TAG */
+
+#ifndef _MEMORY_SERVICE_CODES_H
+#define _MEMORY_SERVICE_CODES_H
+
+//*************************************************************************
+// Includes
+//*************************************************************************
+#include <comp_ids.h>
+
+
+enum memModuleId
+{
+ MEM_MID_TASK_MEMORY_CONTROL = MEM_COMP_ID | 0x00,
+ MEM_MID_MEMORY_INIT = MEM_COMP_ID | 0x01,
+};
+
+#endif // _MEMORY_SERVICE_CODES_H
OpenPOWER on IntegriCloud