diff options
| author | Tony Tye <Tony.Tye@amd.com> | 2018-03-23 18:58:47 +0000 |
|---|---|---|
| committer | Tony Tye <Tony.Tye@amd.com> | 2018-03-23 18:58:47 +0000 |
| commit | 88441a3d1ef895de416cef4803a74a0faed63501 (patch) | |
| tree | 62b581c5d08ccfc43d9a175a285718d4113103d3 | |
| parent | 68e11a6ecac7a37a5772d8c5c9c56c19614fc7f0 (diff) | |
| download | bcm5719-llvm-88441a3d1ef895de416cef4803a74a0faed63501.tar.gz bcm5719-llvm-88441a3d1ef895de416cef4803a74a0faed63501.zip | |
[AMDGPU] Update OpenCL to use 48 bytes of implicit arguments for AMDGPU
Add two additional implicit arguments for OpenCL for the AMDGPU target using the AMDHSA runtime to support device enqueue.
Differential Revision: https://reviews.llvm.org/D44697
llvm-svn: 328351
| -rw-r--r-- | llvm/docs/AMDGPUUsage.rst | 12 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll | 10 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll | 4 |
3 files changed, 15 insertions, 11 deletions
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst index 6bb64c9e061..0bf9b98d32b 100644 --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -3801,10 +3801,14 @@ When the language is OpenCL the following differences occur: Position Byte Byte Description Size Alignment ======== ==== ========= =========================================== - 0 8 8 OpenCL Global Offset X - 1 8 8 OpenCL Global Offset Y - 2 8 8 OpenCL Global Offset Z - 3 8 8 OpenCL printf buffer + 1 8 8 OpenCL Global Offset X + 2 8 8 OpenCL Global Offset Y + 3 8 8 OpenCL Global Offset Z + 4 8 8 OpenCL address of printf buffer + 5 8 8 OpenCL address of virtual queue used by + enqueue_kernel. + 6 8 8 OpenCL address of AqlWrap struct used by + enqueue_kernel. ======== ==== ========= =========================================== .. _amdgpu-hcc: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll index a2ee2bad848..cfe77ffd988 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll @@ -18,7 +18,7 @@ define amdgpu_kernel void @kernel_implicitarg_ptr_empty() #0 { ; GCN-LABEL: {{^}}opencl_kernel_implicitarg_ptr_empty: ; GCN: enable_sgpr_kernarg_segment_ptr = 1 -; HSA: kernarg_segment_byte_size = 32 +; HSA: kernarg_segment_byte_size = 48 ; MESA: kernarg_segment_byte_size = 16 ; HSA: s_load_dword s0, s[4:5], 0x0 @@ -46,7 +46,7 @@ define amdgpu_kernel void @kernel_implicitarg_ptr([112 x i8]) #0 { ; GCN-LABEL: {{^}}opencl_kernel_implicitarg_ptr: ; GCN: enable_sgpr_kernarg_segment_ptr = 1 -; HSA: kernarg_segment_byte_size = 144 +; HSA: kernarg_segment_byte_size = 160 ; MESA: kernarg_segment_byte_size = 464 ; HSA: s_load_dword s0, s[4:5], 0x1c @@ -106,7 +106,7 @@ define amdgpu_kernel void @kernel_call_implicitarg_ptr_func_empty() #0 { ; GCN-LABEL: {{^}}opencl_kernel_call_implicitarg_ptr_func_empty: ; GCN: enable_sgpr_kernarg_segment_ptr = 1 -; HSA: kernarg_segment_byte_size = 32 +; HSA: kernarg_segment_byte_size = 48 ; MESA: kernarg_segment_byte_size = 16 ; GCN: s_mov_b64 s[6:7], s[4:5] ; GCN: s_swappc_b64 @@ -132,7 +132,7 @@ define amdgpu_kernel void @kernel_call_implicitarg_ptr_func([112 x i8]) #0 { ; GCN-LABEL: {{^}}opencl_kernel_call_implicitarg_ptr_func: ; GCN: enable_sgpr_kernarg_segment_ptr = 1 -; HSA: kernarg_segment_byte_size = 144 +; HSA: kernarg_segment_byte_size = 160 ; MESA: kernarg_segment_byte_size = 464 ; HSA: s_add_u32 s6, s4, 0x70 @@ -232,5 +232,5 @@ declare i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #2 declare i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #2 attributes #0 = { nounwind noinline } -attributes #1 = { nounwind noinline "amdgpu-implicitarg-num-bytes"="32" } +attributes #1 = { nounwind noinline "amdgpu-implicitarg-num-bytes"="48" } attributes #2 = { nounwind readnone speculatable } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll index 6ece8be0ec6..772c155ea52 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll @@ -46,7 +46,7 @@ define amdgpu_kernel void @test_implicit_alignment(i32 addrspace(1)* %out, <2 x } ; ALL-LABEL: {{^}}opencl_test_implicit_alignment -; HSA: kernarg_segment_byte_size = 48 +; HSA: kernarg_segment_byte_size = 64 ; OS-MESA3D: kernarg_segment_byte_size = 28 ; OS-UNKNOWN: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xc ; HSA: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x4 @@ -79,4 +79,4 @@ declare i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #0 attributes #0 = { nounwind readnone } attributes #1 = { nounwind } -attributes #2 = { nounwind "amdgpu-implicitarg-num-bytes"="32" } +attributes #2 = { nounwind "amdgpu-implicitarg-num-bytes"="48" } |

