summaryrefslogtreecommitdiffstats
path: root/llvm/test
diff options
context:
space:
mode:
authorYaxun Liu <Yaxun.Liu@amd.com>2017-10-30 14:30:28 +0000
committerYaxun Liu <Yaxun.Liu@amd.com>2017-10-30 14:30:28 +0000
commitc928f2a6d425b00f1023b79dcdeb5cb6544fc487 (patch)
treee1398addd825bdf9a955c5ed96517c0ea9813ce5 /llvm/test
parente56e9a4fef89658f53beeb32e23adc4fc80aca6c (diff)
downloadbcm5719-llvm-c928f2a6d425b00f1023b79dcdeb5cb6544fc487.tar.gz
bcm5719-llvm-c928f2a6d425b00f1023b79dcdeb5cb6544fc487.zip
[AMDGPU] Emit metadata for hidden arguments for kernel enqueue
Identifies kernels which performs device side kernel enqueues and emit metadata for the associated hidden kernel arguments. Such kernels are marked with calls-enqueue-kernel function attribute by AMDGPUOpenCLEnqueueKernelLowering pass and later on hidden kernel arguments metadata HiddenDefaultQueue and HiddenCompletionAction are emitted for them. Differential Revision: https://reviews.llvm.org/D39255 llvm-svn: 316907
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/CodeGen/AMDGPU/enqueue-kernel.ll17
-rw-r--r--llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueu-kernel.ll96
-rw-r--r--llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll47
3 files changed, 159 insertions, 1 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/enqueue-kernel.ll b/llvm/test/CodeGen/AMDGPU/enqueue-kernel.ll
index b1b83c2b4a1..a54453541de 100644
--- a/llvm/test/CodeGen/AMDGPU/enqueue-kernel.ll
+++ b/llvm/test/CodeGen/AMDGPU/enqueue-kernel.ll
@@ -9,7 +9,21 @@ target triple = "amdgcn-amdhsa-amd-opencl"
%struct.ndrange_t = type { i32 }
%opencl.queue_t = type opaque
-define amdgpu_kernel void @test(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) local_unnamed_addr
+; CHECK: define amdgpu_kernel void @non_caller(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) local_unnamed_addr !kernel_arg_addr_space
+define amdgpu_kernel void @non_caller(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) local_unnamed_addr
+ !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 {
+ ret void
+}
+
+; CHECK: define amdgpu_kernel void @caller_indirect(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) local_unnamed_addr #[[AT_CALLER:[0-9]+]]
+define amdgpu_kernel void @caller_indirect(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) local_unnamed_addr
+ !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 {
+ call void @caller(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d)
+ ret void
+}
+
+; CHECK: define amdgpu_kernel void @caller(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) local_unnamed_addr #[[AT_CALLER]]
+define amdgpu_kernel void @caller(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) local_unnamed_addr
!kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 {
entry:
%block = alloca <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i8 }>, align 8
@@ -77,6 +91,7 @@ entry:
ret void
}
+; CHECK: attributes #[[AT_CALLER]] = { "calls-enqueue-kernel" }
; CHECK: attributes #[[AT1]] = {{.*}}"runtime-handle"="__test_block_invoke_kernel_runtime_handle"
; CHECK: attributes #[[AT2]] = {{.*}}"runtime-handle"="__test_block_invoke_2_kernel_runtime_handle"
diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueu-kernel.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueu-kernel.ll
new file mode 100644
index 00000000000..c5121a7fd3b
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueu-kernel.ll
@@ -0,0 +1,96 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
+
+; CHECK: ---
+; CHECK: Version: [ 1, 0 ]
+; CHECK-NOT: Printf:
+; CHECK: Kernels:
+
+; CHECK: - Name: test_non_enqueue_kernel_caller
+; CHECK-NEXT: SymbolName: 'test_non_enqueue_kernel_caller@kd'
+; CHECK-NEXT: Language: OpenCL C
+; CHECK-NEXT: LanguageVersion: [ 2, 0 ]
+; CHECK-NEXT: Args:
+; CHECK-NEXT: - TypeName: char
+; CHECK-NEXT: Size: 1
+; CHECK-NEXT: Align: 1
+; CHECK-NEXT: ValueKind: ByValue
+; CHECK-NEXT: ValueType: I8
+; CHECK-NEXT: AccQual: Default
+; CHECK-NEXT: - Size: 8
+; CHECK-NEXT: Align: 8
+; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX
+; CHECK-NEXT: ValueType: I64
+; CHECK-NEXT: - Size: 8
+; CHECK-NEXT: Align: 8
+; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY
+; CHECK-NEXT: ValueType: I64
+; CHECK-NEXT: - Size: 8
+; CHECK-NEXT: Align: 8
+; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ
+; CHECK-NEXT: ValueType: I64
+; CHECK-NOT: ValueKind: HiddenNone
+; CHECK-NOT: ValueKind: HiddenDefaultQueue
+; CHECK-NOT: ValueKind: HiddenCompletionAction
+define amdgpu_kernel void @test_non_enqueue_kernel_caller(i8 %a)
+ !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
+ !kernel_arg_base_type !3 !kernel_arg_type_qual !4 {
+ ret void
+}
+
+; CHECK: - Name: test_enqueue_kernel_caller
+; CHECK-NEXT: SymbolName: 'test_enqueue_kernel_caller@kd'
+; CHECK-NEXT: Language: OpenCL C
+; CHECK-NEXT: LanguageVersion: [ 2, 0 ]
+; CHECK-NEXT: Args:
+; CHECK-NEXT: - TypeName: char
+; CHECK-NEXT: Size: 1
+; CHECK-NEXT: Align: 1
+; CHECK-NEXT: ValueKind: ByValue
+; CHECK-NEXT: ValueType: I8
+; CHECK-NEXT: AccQual: Default
+; CHECK-NEXT: - Size: 8
+; CHECK-NEXT: Align: 8
+; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX
+; CHECK-NEXT: ValueType: I64
+; CHECK-NEXT: - Size: 8
+; CHECK-NEXT: Align: 8
+; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY
+; CHECK-NEXT: ValueType: I64
+; CHECK-NEXT: - Size: 8
+; CHECK-NEXT: Align: 8
+; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ
+; CHECK-NEXT: ValueType: I64
+; CHECK-NEXT: - Size: 8
+; CHECK-NEXT: Align: 8
+; CHECK-NEXT: ValueKind: HiddenNone
+; CHECK-NEXT: ValueType: I8
+; CHECK-NEXT: AddrSpaceQual: Global
+; CHECK-NEXT: - Size: 8
+; CHECK-NEXT: Align: 8
+; CHECK-NEXT: ValueKind: HiddenDefaultQueue
+; CHECK-NEXT: ValueType: I8
+; CHECK-NEXT: AddrSpaceQual: Global
+; CHECK-NEXT: - Size: 8
+; CHECK-NEXT: Align: 8
+; CHECK-NEXT: ValueKind: HiddenCompletionAction
+; CHECK-NEXT: ValueType: I8
+; CHECK-NEXT: AddrSpaceQual: Global
+define amdgpu_kernel void @test_enqueue_kernel_caller(i8 %a) #0
+ !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
+ !kernel_arg_base_type !3 !kernel_arg_type_qual !4 {
+ ret void
+}
+
+attributes #0 = { "calls-enqueue-kernel" }
+
+!1 = !{i32 0}
+!2 = !{!"none"}
+!3 = !{!"char"}
+!4 = !{!""}
+
+!opencl.ocl.version = !{!90}
+!90 = !{i32 2, i32 0}
+
+
+; PARSER: AMDGPU HSA Metadata Parser Test: PASS
diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll
index 4ac9bacebe1..ea47f83aef3 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll
@@ -51,6 +51,8 @@
; CHECK-NEXT: ValueKind: HiddenPrintfBuffer
; CHECK-NEXT: ValueType: I8
; CHECK-NEXT: AddrSpaceQual: Global
+; CHECK-NOT: ValueKind: HiddenDefaultQueue
+; CHECK-NOT: ValueKind: HiddenCompletionAction
define amdgpu_kernel void @test_char(i8 %a)
!kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !9
!kernel_arg_base_type !9 !kernel_arg_type_qual !4 {
@@ -1267,7 +1269,52 @@ define amdgpu_kernel void @__test_block_invoke_kernel(
ret void
}
+; CHECK: - Name: test_enqueue_kernel_caller
+; CHECK-NEXT: SymbolName: 'test_enqueue_kernel_caller@kd'
+; CHECK-NEXT: Language: OpenCL C
+; CHECK-NEXT: LanguageVersion: [ 2, 0 ]
+; CHECK-NEXT: Args:
+; CHECK-NEXT: - TypeName: char
+; CHECK-NEXT: Size: 1
+; CHECK-NEXT: Align: 1
+; CHECK-NEXT: ValueKind: ByValue
+; CHECK-NEXT: ValueType: I8
+; CHECK-NEXT: AccQual: Default
+; CHECK-NEXT: - Size: 8
+; CHECK-NEXT: Align: 8
+; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX
+; CHECK-NEXT: ValueType: I64
+; CHECK-NEXT: - Size: 8
+; CHECK-NEXT: Align: 8
+; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY
+; CHECK-NEXT: ValueType: I64
+; CHECK-NEXT: - Size: 8
+; CHECK-NEXT: Align: 8
+; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ
+; CHECK-NEXT: ValueType: I64
+; CHECK-NEXT: - Size: 8
+; CHECK-NEXT: Align: 8
+; CHECK-NEXT: ValueKind: HiddenPrintfBuffer
+; CHECK-NEXT: ValueType: I8
+; CHECK-NEXT: AddrSpaceQual: Global
+; CHECK-NEXT: - Size: 8
+; CHECK-NEXT: Align: 8
+; CHECK-NEXT: ValueKind: HiddenDefaultQueue
+; CHECK-NEXT: ValueType: I8
+; CHECK-NEXT: AddrSpaceQual: Global
+; CHECK-NEXT: - Size: 8
+; CHECK-NEXT: Align: 8
+; CHECK-NEXT: ValueKind: HiddenCompletionAction
+; CHECK-NEXT: ValueType: I8
+; CHECK-NEXT: AddrSpaceQual: Global
+define amdgpu_kernel void @test_enqueue_kernel_caller(i8 %a) #1
+ !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !9
+ !kernel_arg_base_type !9 !kernel_arg_type_qual !4 {
+ ret void
+}
+
attributes #0 = { "runtime-handle"="__test_block_invoke_kernel_runtime_handle" }
+attributes #1 = { "calls-enqueue-kernel" }
!llvm.printf.fmts = !{!100, !101}
OpenPOWER on IntegriCloud