diff options
Diffstat (limited to 'include/uapi')
| -rw-r--r-- | include/uapi/drm/amdgpu_drm.h | 35 | ||||
| -rw-r--r-- | include/uapi/linux/kfd_ioctl.h | 12 |
2 files changed, 46 insertions, 1 deletions
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 4a53f6cfa034..e3a97da4add9 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -210,6 +210,9 @@ union drm_amdgpu_bo_list { #define AMDGPU_CTX_QUERY2_FLAGS_VRAMLOST (1<<1) /* indicate some job from this context once cause gpu hang */ #define AMDGPU_CTX_QUERY2_FLAGS_GUILTY (1<<2) +/* indicate some errors are detected by RAS */ +#define AMDGPU_CTX_QUERY2_FLAGS_RAS_CE (1<<3) +#define AMDGPU_CTX_QUERY2_FLAGS_RAS_UE (1<<4) /* Context priority level */ #define AMDGPU_CTX_PRIORITY_UNSET -2048 @@ -680,6 +683,7 @@ struct drm_amdgpu_cs_chunk_data { #define AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_SRM_MEM 0x11 /* Subquery id: Query DMCU firmware version */ #define AMDGPU_INFO_FW_DMCU 0x12 + #define AMDGPU_INFO_FW_TA 0x13 /* number of bytes moved for TTM migration */ #define AMDGPU_INFO_NUM_BYTES_MOVED 0x0f /* the used VRAM size */ @@ -733,6 +737,37 @@ struct drm_amdgpu_cs_chunk_data { /* Number of VRAM page faults on CPU access. */ #define AMDGPU_INFO_NUM_VRAM_CPU_PAGE_FAULTS 0x1E #define AMDGPU_INFO_VRAM_LOST_COUNTER 0x1F +/* query ras mask of enabled features*/ +#define AMDGPU_INFO_RAS_ENABLED_FEATURES 0x20 + +/* RAS MASK: UMC (VRAM) */ +#define AMDGPU_INFO_RAS_ENABLED_UMC (1 << 0) +/* RAS MASK: SDMA */ +#define AMDGPU_INFO_RAS_ENABLED_SDMA (1 << 1) +/* RAS MASK: GFX */ +#define AMDGPU_INFO_RAS_ENABLED_GFX (1 << 2) +/* RAS MASK: MMHUB */ +#define AMDGPU_INFO_RAS_ENABLED_MMHUB (1 << 3) +/* RAS MASK: ATHUB */ +#define AMDGPU_INFO_RAS_ENABLED_ATHUB (1 << 4) +/* RAS MASK: PCIE */ +#define AMDGPU_INFO_RAS_ENABLED_PCIE (1 << 5) +/* RAS MASK: HDP */ +#define AMDGPU_INFO_RAS_ENABLED_HDP (1 << 6) +/* RAS MASK: XGMI */ +#define AMDGPU_INFO_RAS_ENABLED_XGMI (1 << 7) +/* RAS MASK: DF */ +#define AMDGPU_INFO_RAS_ENABLED_DF (1 << 8) +/* RAS MASK: SMN */ +#define AMDGPU_INFO_RAS_ENABLED_SMN (1 << 9) +/* RAS MASK: SEM */ +#define AMDGPU_INFO_RAS_ENABLED_SEM (1 << 10) +/* RAS MASK: MP0 */ +#define AMDGPU_INFO_RAS_ENABLED_MP0 (1 << 11) +/* RAS MASK: MP1 */ +#define AMDGPU_INFO_RAS_ENABLED_MP1 (1 << 12) +/* RAS MASK: FUSE */ +#define AMDGPU_INFO_RAS_ENABLED_FUSE (1 << 13) #define AMDGPU_INFO_MMR_SE_INDEX_SHIFT 0 #define AMDGPU_INFO_MMR_SE_INDEX_MASK 0xff diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index e622fd1fbd46..dc067ed0b72d 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -211,6 +211,11 @@ struct kfd_ioctl_dbg_wave_control_args { #define KFD_HW_EXCEPTION_GPU_HANG 0 #define KFD_HW_EXCEPTION_ECC 1 +/* For kfd_hsa_memory_exception_data.ErrorType */ +#define KFD_MEM_ERR_NO_RAS 0 +#define KFD_MEM_ERR_SRAM_ECC 1 +#define KFD_MEM_ERR_POISON_CONSUMED 2 +#define KFD_MEM_ERR_GPU_HANG 3 struct kfd_ioctl_create_event_args { __u64 event_page_offset; /* from KFD */ @@ -250,7 +255,12 @@ struct kfd_hsa_memory_exception_data { struct kfd_memory_exception_failure failure; __u64 va; __u32 gpu_id; - __u32 pad; + __u32 ErrorType; /* 0 = no RAS error, + * 1 = ECC_SRAM, + * 2 = Link_SYNFLOOD (poison), + * 3 = GPU hang (not attributable to a specific cause), + * other values reserved + */ }; /* hw exception data */ |

