summaryrefslogtreecommitdiffstats
path: root/include/uapi
diff options
context:
space:
mode:
Diffstat (limited to 'include/uapi')
-rw-r--r--include/uapi/drm/amdgpu_drm.h35
-rw-r--r--include/uapi/linux/kfd_ioctl.h12
2 files changed, 46 insertions, 1 deletions
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 4a53f6cfa034..e3a97da4add9 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -210,6 +210,9 @@ union drm_amdgpu_bo_list {
#define AMDGPU_CTX_QUERY2_FLAGS_VRAMLOST (1<<1)
/* indicate some job from this context once cause gpu hang */
#define AMDGPU_CTX_QUERY2_FLAGS_GUILTY (1<<2)
+/* indicate some errors are detected by RAS */
+#define AMDGPU_CTX_QUERY2_FLAGS_RAS_CE (1<<3)
+#define AMDGPU_CTX_QUERY2_FLAGS_RAS_UE (1<<4)
/* Context priority level */
#define AMDGPU_CTX_PRIORITY_UNSET -2048
@@ -680,6 +683,7 @@ struct drm_amdgpu_cs_chunk_data {
#define AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_SRM_MEM 0x11
/* Subquery id: Query DMCU firmware version */
#define AMDGPU_INFO_FW_DMCU 0x12
+ #define AMDGPU_INFO_FW_TA 0x13
/* number of bytes moved for TTM migration */
#define AMDGPU_INFO_NUM_BYTES_MOVED 0x0f
/* the used VRAM size */
@@ -733,6 +737,37 @@ struct drm_amdgpu_cs_chunk_data {
/* Number of VRAM page faults on CPU access. */
#define AMDGPU_INFO_NUM_VRAM_CPU_PAGE_FAULTS 0x1E
#define AMDGPU_INFO_VRAM_LOST_COUNTER 0x1F
+/* query ras mask of enabled features*/
+#define AMDGPU_INFO_RAS_ENABLED_FEATURES 0x20
+
+/* RAS MASK: UMC (VRAM) */
+#define AMDGPU_INFO_RAS_ENABLED_UMC (1 << 0)
+/* RAS MASK: SDMA */
+#define AMDGPU_INFO_RAS_ENABLED_SDMA (1 << 1)
+/* RAS MASK: GFX */
+#define AMDGPU_INFO_RAS_ENABLED_GFX (1 << 2)
+/* RAS MASK: MMHUB */
+#define AMDGPU_INFO_RAS_ENABLED_MMHUB (1 << 3)
+/* RAS MASK: ATHUB */
+#define AMDGPU_INFO_RAS_ENABLED_ATHUB (1 << 4)
+/* RAS MASK: PCIE */
+#define AMDGPU_INFO_RAS_ENABLED_PCIE (1 << 5)
+/* RAS MASK: HDP */
+#define AMDGPU_INFO_RAS_ENABLED_HDP (1 << 6)
+/* RAS MASK: XGMI */
+#define AMDGPU_INFO_RAS_ENABLED_XGMI (1 << 7)
+/* RAS MASK: DF */
+#define AMDGPU_INFO_RAS_ENABLED_DF (1 << 8)
+/* RAS MASK: SMN */
+#define AMDGPU_INFO_RAS_ENABLED_SMN (1 << 9)
+/* RAS MASK: SEM */
+#define AMDGPU_INFO_RAS_ENABLED_SEM (1 << 10)
+/* RAS MASK: MP0 */
+#define AMDGPU_INFO_RAS_ENABLED_MP0 (1 << 11)
+/* RAS MASK: MP1 */
+#define AMDGPU_INFO_RAS_ENABLED_MP1 (1 << 12)
+/* RAS MASK: FUSE */
+#define AMDGPU_INFO_RAS_ENABLED_FUSE (1 << 13)
#define AMDGPU_INFO_MMR_SE_INDEX_SHIFT 0
#define AMDGPU_INFO_MMR_SE_INDEX_MASK 0xff
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index e622fd1fbd46..dc067ed0b72d 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -211,6 +211,11 @@ struct kfd_ioctl_dbg_wave_control_args {
#define KFD_HW_EXCEPTION_GPU_HANG 0
#define KFD_HW_EXCEPTION_ECC 1
+/* For kfd_hsa_memory_exception_data.ErrorType */
+#define KFD_MEM_ERR_NO_RAS 0
+#define KFD_MEM_ERR_SRAM_ECC 1
+#define KFD_MEM_ERR_POISON_CONSUMED 2
+#define KFD_MEM_ERR_GPU_HANG 3
struct kfd_ioctl_create_event_args {
__u64 event_page_offset; /* from KFD */
@@ -250,7 +255,12 @@ struct kfd_hsa_memory_exception_data {
struct kfd_memory_exception_failure failure;
__u64 va;
__u32 gpu_id;
- __u32 pad;
+ __u32 ErrorType; /* 0 = no RAS error,
+ * 1 = ECC_SRAM,
+ * 2 = Link_SYNFLOOD (poison),
+ * 3 = GPU hang (not attributable to a specific cause),
+ * other values reserved
+ */
};
/* hw exception data */
OpenPOWER on IntegriCloud