From 9b54d2017687df9fa827faf9e4022973b87fc0ff Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Fri, 11 Jan 2019 14:38:51 -0500 Subject: drm/amdkfd: add RAS ECC event support (v3) RAS ECC event will combine with GPU reset event, due to ECC interrupts are caused by uncorrectable error that triggers GPU reset. v2: Fix misleading-indentation warning v3: fix build with CONFIG_HSA_AMD disabled Signed-off-by: Eric Huang Reviewed-by: Felix Kuehling Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index e622fd1fbd46..dc067ed0b72d 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -211,6 +211,11 @@ struct kfd_ioctl_dbg_wave_control_args { #define KFD_HW_EXCEPTION_GPU_HANG 0 #define KFD_HW_EXCEPTION_ECC 1 +/* For kfd_hsa_memory_exception_data.ErrorType */ +#define KFD_MEM_ERR_NO_RAS 0 +#define KFD_MEM_ERR_SRAM_ECC 1 +#define KFD_MEM_ERR_POISON_CONSUMED 2 +#define KFD_MEM_ERR_GPU_HANG 3 struct kfd_ioctl_create_event_args { __u64 event_page_offset; /* from KFD */ @@ -250,7 +255,12 @@ struct kfd_hsa_memory_exception_data { struct kfd_memory_exception_failure failure; __u64 va; __u32 gpu_id; - __u32 pad; + __u32 ErrorType; /* 0 = no RAS error, + * 1 = ECC_SRAM, + * 2 = Link_SYNFLOOD (poison), + * 3 = GPU hang (not attributable to a specific cause), + * other values reserved + */ }; /* hw exception data */ -- cgit v1.2.3