summaryrefslogtreecommitdiffstats
path: root/src/occ_405/amec/amec_sys.h
diff options
context:
space:
mode:
authorWilliam Bryan <wilbryan@us.ibm.com>2017-09-28 13:32:29 -0500
committerWilliam A. Bryan <wilbryan@us.ibm.com>2017-10-03 16:03:05 -0400
commit74f721c90235a18821b97782d98349cf51e0f12d (patch)
tree1f2fd59b41db514c0273632dd2dd7926e25a2030 /src/occ_405/amec/amec_sys.h
parent76b91d0038d59b30de14108e908bc78c6d988796 (diff)
downloadtalos-occ-74f721c90235a18821b97782d98349cf51e0f12d.tar.gz
talos-occ-74f721c90235a18821b97782d98349cf51e0f12d.zip
GPU 405 Enable Memory Temperatures
Change-Id: Id50d12a50a05b8b3a6a6f1ce3ce4512d3299caa7 Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/46882 Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com> Reviewed-by: Martha Broyles <mbroyles@us.ibm.com> Reviewed-by: Christopher J. Cain <cjcain@us.ibm.com> Reviewed-by: William A. Bryan <wilbryan@us.ibm.com>
Diffstat (limited to 'src/occ_405/amec/amec_sys.h')
-rwxr-xr-xsrc/occ_405/amec/amec_sys.h38
1 files changed, 23 insertions, 15 deletions
diff --git a/src/occ_405/amec/amec_sys.h b/src/occ_405/amec/amec_sys.h
index 803ca28..3f1d333 100755
--- a/src/occ_405/amec/amec_sys.h
+++ b/src/occ_405/amec/amec_sys.h
@@ -436,27 +436,35 @@ typedef struct
//-------------------------------------------------------------
typedef struct {
- bool disabled; // GPU has been marked failed and no longer monitored
- bool readOnce; // Comm has been established with GPU
- bool overtempError; // Core OT error has been logged against GPU
- bool memOvertempError; // Memory OT error has been logged against GPU
+ bool disabled; // GPU has been marked failed and no longer monitored
+ bool readOnce; // Comm has been established with GPU
+ bool commErrorLogged; // GPU has been called out due to comm error
+ bool overtempError; // Core OT error has been logged against GPU
+ bool memOvertempError; // Memory OT error has been logged against GPU
bool checkDriverLoaded; // Indicates if need to check if driver is loaded
- bool driverLoaded; // Indicates if GPU driver is loaded
+ bool driverLoaded; // Indicates if GPU driver is loaded
bool checkMemTempSupport; // Indicates if need to check if mem monitoring is supported
- bool memTempSupported; // Indicates if memory temperature monitoring is supported
- uint8_t memErrorCount; // count of consecutive GPU mem temp read failures
- uint8_t errorCount; // count of consecutive GPU core temp read failures
+ bool memTempSupported; // Indicates if memory temperature monitoring is supported
+ bool notReset; // '1' = GPU NOT in reset. Read from OCC FLAGS register
+ bool coreTempNotAvailable; // for fan control: '1' = core temp not available. (send 0 for fan control)
+ bool memTempNotAvailable; // for fan control: '1' = Mem temp not available. (send 0 for fan control)
+ bool coreTempFailure; // for fan control: '1' = timeout failure reading core temp (send 0xFF for fan control)
+ bool memTempFailure; // for fan control: '1' = timeout failure reading Mem temp (send 0xFF for fan control)
+ uint8_t memErrorCount; // count of consecutive GPU mem temp read failures when GPU not in reset
+ uint8_t errorCount; // count of consecutive GPU core temp read failures when GPU not in reset
+ uint8_t retryCount; // count of consecutive GPU core temp read failures before I2C reset
} gpuStatus_t;
typedef struct {
- bool check_pwr_limit; // Indicates if need to read power limits from GPU
- bool pwr_limits_read; // Indicates if power limits were read i.e. have min/max
- bool gpu_min_cap_required; // Indicates if power limits were read i.e. have min/max
- uint32_t gpu_min_pcap_mw; // Min GPU power limit in mW read from the GPU
- uint32_t gpu_max_pcap_mw; // Max GPU power limit in mW read from the GPU
- uint32_t gpu_desired_pcap_mw; // AMEC determined pcap in mW to set
+ bool check_pwr_limit; // Indicates if need to read power limits from GPU
+ bool pwr_limits_read; // Indicates if power limits were read i.e. have min/max
+ bool set_failed; // Indicates if failed to set power limit
+ bool gpu_min_cap_required; // Indicates if GPU requires min cap
+ uint32_t gpu_min_pcap_mw; // Min GPU power limit in mW read from the GPU
+ uint32_t gpu_max_pcap_mw; // Max GPU power limit in mW read from the GPU
+ uint32_t gpu_desired_pcap_mw; // AMEC determined pcap in mW to set
uint32_t gpu_requested_pcap_mw; // Requested power cap in mW sent to GPU
- uint32_t gpu_actual_pcap_mw; // Actual power cap in mW read back from the GPU
+ uint32_t gpu_default_pcap_mw; // Default power cap in mW read from the GPU
} gpuPcap_t;
OpenPOWER on IntegriCloud