summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/AMDGPU/enqueue-kernel.ll
diff options
context:
space:
mode:
authorYaxun Liu <Yaxun.Liu@amd.com>2018-04-11 14:46:15 +0000
committerYaxun Liu <Yaxun.Liu@amd.com>2018-04-11 14:46:15 +0000
commit9381ae9791d57dd09fa10c22d52a17ca2bbcd4b2 (patch)
treebf8c73e25d171d2d6f509c9fbd925493a0e23e91 /llvm/test/CodeGen/AMDGPU/enqueue-kernel.ll
parent2f326d453feea698996c1c3f104a92e4354cd40f (diff)
downloadbcm5719-llvm-9381ae9791d57dd09fa10c22d52a17ca2bbcd4b2.tar.gz
bcm5719-llvm-9381ae9791d57dd09fa10c22d52a17ca2bbcd4b2.zip
[AMDGPU] Fix lowering enqueue_kernel
Two issues were fixed: runtime has difficulty to allocate memory for an external symbol of a kernel and set the address of the external symbol, therefore make the runtime handle of an enqueued kernel an ordinary global variable. Runtime only needs to store the address of the loaded kernel to the handle and has verified that this approach works. handle the situation where __enqueue_kernel* gets inlined therefore the enqueued kernel may be used through a constant expr instead of an instruction. Differential Revision: https://reviews.llvm.org/D45187 llvm-svn: 329815
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/enqueue-kernel.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/enqueue-kernel.ll21
1 files changed, 17 insertions, 4 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/enqueue-kernel.ll b/llvm/test/CodeGen/AMDGPU/enqueue-kernel.ll
index 67864ed90da..1e4b2ac8093 100644
--- a/llvm/test/CodeGen/AMDGPU/enqueue-kernel.ll
+++ b/llvm/test/CodeGen/AMDGPU/enqueue-kernel.ll
@@ -1,9 +1,9 @@
; RUN: opt -data-layout=A5 -amdgpu-lower-enqueued-block -S < %s | FileCheck %s
-; CHECK: @__test_block_invoke_kernel.runtime_handle = external addrspace(1) externally_initialized constant i8 addrspace(1)*
-; CHECK: @__test_block_invoke_2_kernel.runtime_handle = external addrspace(1) externally_initialized constant i8 addrspace(1)*
-; CHECK: @__amdgpu_enqueued_kernel.runtime_handle = external addrspace(1) externally_initialized constant i8 addrspace(1)*
-; CHECK: @__amdgpu_enqueued_kernel.1.runtime_handle = external addrspace(1) externally_initialized constant i8 addrspace(1)*
+; CHECK: @__test_block_invoke_kernel.runtime_handle = addrspace(1) global i8 addrspace(1)* null
+; CHECK: @__test_block_invoke_2_kernel.runtime_handle = addrspace(1) global i8 addrspace(1)* null
+; CHECK: @__amdgpu_enqueued_kernel.runtime_handle = addrspace(1) global i8 addrspace(1)* null
+; CHECK: @__amdgpu_enqueued_kernel.1.runtime_handle = addrspace(1) global i8 addrspace(1)* null
%struct.ndrange_t = type { i32 }
%opencl.queue_t = type opaque
@@ -80,6 +80,19 @@ entry:
ret void
}
+; __enqueue_kernel* functions may get inlined
+; CHECK-LABEL: define amdgpu_kernel void @inlined_caller
+; CHECK-SAME: #[[AT_CALLER]]
+; CHECK-NOT: @__test_block_invoke_kernel
+; CHECK: load i64, i64 addrspace(1)* bitcast (i8 addrspace(1)* addrspace(1)* @__test_block_invoke_kernel.runtime_handle to i64 addrspace(1)*)
+define amdgpu_kernel void @inlined_caller(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) local_unnamed_addr
+ !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 {
+entry:
+ %tmp = load i64, i64 addrspace(1)* addrspacecast (i64* bitcast (void (<{ i32, i32, i8 addrspace(1)*, i8 }>)* @__test_block_invoke_kernel to i64*) to i64 addrspace(1)*)
+ store i64 %tmp, i64 addrspace(1)* %c
+ ret void
+}
+
; CHECK-LABEL: define dso_local amdgpu_kernel void @__test_block_invoke_kernel
; CHECK-SAME: #[[AT1:[0-9]+]]
define internal amdgpu_kernel void @__test_block_invoke_kernel(<{ i32, i32, i8 addrspace(1)*, i8 }> %arg) #0
OpenPOWER on IntegriCloud