diff options
author | mbroyles <mbroyles@us.ibm.com> | 2017-08-06 19:08:00 -0500 |
---|---|---|
committer | William A. Bryan <wilbryan@us.ibm.com> | 2017-08-14 15:18:26 -0400 |
commit | 8a335d83ed938f05f95ca1cfdbbb5292053ed51f (patch) | |
tree | bd2b38c6df596f3d3bf9f70f8a54a8a205e4e2e1 /src/occ_405/amec/amec_sys.h | |
parent | 71b5f68da8b725f9c5251261b41fd824e652e491 (diff) | |
download | talos-occ-8a335d83ed938f05f95ca1cfdbbb5292053ed51f.tar.gz talos-occ-8a335d83ed938f05f95ca1cfdbbb5292053ed51f.zip |
Initial 405 GPU support
Change-Id: I6e957ca1aa643d257274e99957df5b15ac8c889b
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/44254
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Reviewed-by: William A. Bryan <wilbryan@us.ibm.com>
Diffstat (limited to 'src/occ_405/amec/amec_sys.h')
-rwxr-xr-x | src/occ_405/amec/amec_sys.h | 55 |
1 files changed, 50 insertions, 5 deletions
diff --git a/src/occ_405/amec/amec_sys.h b/src/occ_405/amec/amec_sys.h index 74b2812..a45fb42 100755 --- a/src/occ_405/amec/amec_sys.h +++ b/src/occ_405/amec/amec_sys.h @@ -430,6 +430,53 @@ typedef struct } amec_quad_t; //------------------------------------------------------------- +// GPU Structures +//------------------------------------------------------------- + +typedef struct { + bool disabled; // GPU has been marked failed and no longer monitored + bool readOnce; // Comm has been established with GPU + bool overtempError; // Core OT error has been logged against GPU + bool memOvertempError; // Memory OT error has been logged against GPU + bool checkDriverLoaded; // Indicates if need to check if driver is loaded + bool driverLoaded; // Indicates if GPU driver is loaded + bool checkMemTempSupport; // Indicates if need to check if mem monitoring is supported + bool memTempSupported; // Indicates if memory temperature monitoring is supported + uint8_t memErrorCount; // count of consecutive GPU mem temp read failures + uint8_t errorCount; // count of consecutive GPU core temp read failures +} gpuStatus_t; + +typedef struct { + bool check_pwr_limit; // Indicates if need to read power limits from GPU + bool pwr_limits_read; // Indicates if power limits were read i.e. have min/max + uint32_t gpu_min_pcap_mw; // Min GPU power limit in mW read from the GPU + uint32_t gpu_max_pcap_mw; // Max GPU power limit in mW read from the GPU + uint32_t gpu_desired_pcap_mw; // AMEC determined pcap in mW to set + uint32_t gpu_requested_pcap_mw; // Requested power cap in mW sent to GPU + uint32_t gpu_actual_pcap_mw; // Actual power cap in mW read back from the GPU +} gpuPcap_t; + + +typedef struct +{ + //----------------------------------- + // Sensors + //----------------------------------- + sensor_t tempgpu; // GPU core temperature + sensor_t tempgpumem; // GPU HBM temperature + + //----------------------------------- + // Data + //----------------------------------- + // General Status of GPU + gpuStatus_t status; + + // GPU Power Cap Information + gpuPcap_t pcap; + +} amec_gpu_t; + +//------------------------------------------------------------- // Proc Structure //------------------------------------------------------------- typedef struct @@ -468,11 +515,6 @@ typedef struct // Nimbus DIMM Sensors sensor_t tempdimm[NUM_DIMM_PORTS*NUM_DIMMS_PER_I2CPORT]; - // GPU Sensors - sensor_t tempgpu0; - sensor_t tempgpu1; - sensor_t tempgpu2; - sensor_t curvdn; sensor_t pwrvdd; sensor_t pwrvdn; @@ -607,6 +649,9 @@ typedef struct // in the hopes of perhaps reusing some code from previous projects. amec_proc_t proc[NUM_PROC_CHIPS_PER_OCC]; + // GPU Data + amec_gpu_t gpu[MAX_NUM_GPU_PER_DOMAIN]; + // OCC Firmware Data amec_fw_t fw; |