summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/AMDGPU
diff options
context:
space:
mode:
authorTony Tye <Tony.Tye@amd.com>2018-03-23 18:58:47 +0000
committerTony Tye <Tony.Tye@amd.com>2018-03-23 18:58:47 +0000
commit88441a3d1ef895de416cef4803a74a0faed63501 (patch)
tree62b581c5d08ccfc43d9a175a285718d4113103d3 /llvm/test/CodeGen/AMDGPU
parent68e11a6ecac7a37a5772d8c5c9c56c19614fc7f0 (diff)
downloadbcm5719-llvm-88441a3d1ef895de416cef4803a74a0faed63501.tar.gz
bcm5719-llvm-88441a3d1ef895de416cef4803a74a0faed63501.zip
[AMDGPU] Update OpenCL to use 48 bytes of implicit arguments for AMDGPU
Add two additional implicit arguments for OpenCL for the AMDGPU target using the AMDHSA runtime to support device enqueue. Differential Revision: https://reviews.llvm.org/D44697 llvm-svn: 328351
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU')
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll10
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll4
2 files changed, 7 insertions, 7 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll
index a2ee2bad848..cfe77ffd988 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll
@@ -18,7 +18,7 @@ define amdgpu_kernel void @kernel_implicitarg_ptr_empty() #0 {
; GCN-LABEL: {{^}}opencl_kernel_implicitarg_ptr_empty:
; GCN: enable_sgpr_kernarg_segment_ptr = 1
-; HSA: kernarg_segment_byte_size = 32
+; HSA: kernarg_segment_byte_size = 48
; MESA: kernarg_segment_byte_size = 16
; HSA: s_load_dword s0, s[4:5], 0x0
@@ -46,7 +46,7 @@ define amdgpu_kernel void @kernel_implicitarg_ptr([112 x i8]) #0 {
; GCN-LABEL: {{^}}opencl_kernel_implicitarg_ptr:
; GCN: enable_sgpr_kernarg_segment_ptr = 1
-; HSA: kernarg_segment_byte_size = 144
+; HSA: kernarg_segment_byte_size = 160
; MESA: kernarg_segment_byte_size = 464
; HSA: s_load_dword s0, s[4:5], 0x1c
@@ -106,7 +106,7 @@ define amdgpu_kernel void @kernel_call_implicitarg_ptr_func_empty() #0 {
; GCN-LABEL: {{^}}opencl_kernel_call_implicitarg_ptr_func_empty:
; GCN: enable_sgpr_kernarg_segment_ptr = 1
-; HSA: kernarg_segment_byte_size = 32
+; HSA: kernarg_segment_byte_size = 48
; MESA: kernarg_segment_byte_size = 16
; GCN: s_mov_b64 s[6:7], s[4:5]
; GCN: s_swappc_b64
@@ -132,7 +132,7 @@ define amdgpu_kernel void @kernel_call_implicitarg_ptr_func([112 x i8]) #0 {
; GCN-LABEL: {{^}}opencl_kernel_call_implicitarg_ptr_func:
; GCN: enable_sgpr_kernarg_segment_ptr = 1
-; HSA: kernarg_segment_byte_size = 144
+; HSA: kernarg_segment_byte_size = 160
; MESA: kernarg_segment_byte_size = 464
; HSA: s_add_u32 s6, s4, 0x70
@@ -232,5 +232,5 @@ declare i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #2
declare i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #2
attributes #0 = { nounwind noinline }
-attributes #1 = { nounwind noinline "amdgpu-implicitarg-num-bytes"="32" }
+attributes #1 = { nounwind noinline "amdgpu-implicitarg-num-bytes"="48" }
attributes #2 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll
index 6ece8be0ec6..772c155ea52 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll
@@ -46,7 +46,7 @@ define amdgpu_kernel void @test_implicit_alignment(i32 addrspace(1)* %out, <2 x
}
; ALL-LABEL: {{^}}opencl_test_implicit_alignment
-; HSA: kernarg_segment_byte_size = 48
+; HSA: kernarg_segment_byte_size = 64
; OS-MESA3D: kernarg_segment_byte_size = 28
; OS-UNKNOWN: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xc
; HSA: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x4
@@ -79,4 +79,4 @@ declare i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #0
attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }
-attributes #2 = { nounwind "amdgpu-implicitarg-num-bytes"="32" }
+attributes #2 = { nounwind "amdgpu-implicitarg-num-bytes"="48" }
OpenPOWER on IntegriCloud