summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2015-11-06 18:01:57 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2015-11-06 18:01:57 +0000
commit3931948bb651f60325cbba160a2ef63a03184e6f (patch)
treeb5798ff1585345bce11e36bbc55f8fc20d930db2 /llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll
parent4dc7a5a5c6402a8110d6406cfb9de2cb778980ad (diff)
downloadbcm5719-llvm-3931948bb651f60325cbba160a2ef63a03184e6f.tar.gz
bcm5719-llvm-3931948bb651f60325cbba160a2ef63a03184e6f.zip
AMDGPU: Add pass to detect used kernel features
Mark kernels that use certain features that require user SGPRs to support with kernel attributes. We need to know before instruction selection begins because it impacts the kernel calling convention lowering. For now this only detects the workitem intrinsics. llvm-svn: 252323
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll193
1 files changed, 193 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll
new file mode 100644
index 00000000000..b116c72322b
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll
@@ -0,0 +1,193 @@
+; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefix=HSA -check-prefix=ALL %s
+; RUN: opt -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefix=NOHSA -check-prefix=ALL %s
+
+declare i32 @llvm.r600.read.tgid.x() #0
+declare i32 @llvm.r600.read.tgid.y() #0
+declare i32 @llvm.r600.read.tgid.z() #0
+
+declare i32 @llvm.r600.read.tidig.x() #0
+declare i32 @llvm.r600.read.tidig.y() #0
+declare i32 @llvm.r600.read.tidig.z() #0
+
+declare i32 @llvm.r600.read.local.size.x() #0
+declare i32 @llvm.r600.read.local.size.y() #0
+declare i32 @llvm.r600.read.local.size.z() #0
+
+declare i32 @llvm.r600.read.global.size.x() #0
+declare i32 @llvm.r600.read.global.size.y() #0
+declare i32 @llvm.r600.read.global.size.z() #0
+
+
+; ALL: define void @use_tgid_x(i32 addrspace(1)* %ptr) #1 {
+define void @use_tgid_x(i32 addrspace(1)* %ptr) #1 {
+ %val = call i32 @llvm.r600.read.tgid.x()
+ store i32 %val, i32 addrspace(1)* %ptr
+ ret void
+}
+
+; ALL: define void @use_tgid_y(i32 addrspace(1)* %ptr) #2 {
+define void @use_tgid_y(i32 addrspace(1)* %ptr) #1 {
+ %val = call i32 @llvm.r600.read.tgid.y()
+ store i32 %val, i32 addrspace(1)* %ptr
+ ret void
+}
+
+; ALL: define void @multi_use_tgid_y(i32 addrspace(1)* %ptr) #2 {
+define void @multi_use_tgid_y(i32 addrspace(1)* %ptr) #1 {
+ %val0 = call i32 @llvm.r600.read.tgid.y()
+ store volatile i32 %val0, i32 addrspace(1)* %ptr
+ %val1 = call i32 @llvm.r600.read.tgid.y()
+ store volatile i32 %val1, i32 addrspace(1)* %ptr
+ ret void
+}
+
+; ALL: define void @use_tgid_x_y(i32 addrspace(1)* %ptr) #2 {
+define void @use_tgid_x_y(i32 addrspace(1)* %ptr) #1 {
+ %val0 = call i32 @llvm.r600.read.tgid.x()
+ %val1 = call i32 @llvm.r600.read.tgid.y()
+ store volatile i32 %val0, i32 addrspace(1)* %ptr
+ store volatile i32 %val1, i32 addrspace(1)* %ptr
+ ret void
+}
+
+; ALL: define void @use_tgid_z(i32 addrspace(1)* %ptr) #3 {
+define void @use_tgid_z(i32 addrspace(1)* %ptr) #1 {
+ %val = call i32 @llvm.r600.read.tgid.z()
+ store i32 %val, i32 addrspace(1)* %ptr
+ ret void
+}
+
+; ALL: define void @use_tgid_x_z(i32 addrspace(1)* %ptr) #3 {
+define void @use_tgid_x_z(i32 addrspace(1)* %ptr) #1 {
+ %val0 = call i32 @llvm.r600.read.tgid.x()
+ %val1 = call i32 @llvm.r600.read.tgid.z()
+ store volatile i32 %val0, i32 addrspace(1)* %ptr
+ store volatile i32 %val1, i32 addrspace(1)* %ptr
+ ret void
+}
+
+; ALL: define void @use_tgid_y_z(i32 addrspace(1)* %ptr) #4 {
+define void @use_tgid_y_z(i32 addrspace(1)* %ptr) #1 {
+ %val0 = call i32 @llvm.r600.read.tgid.y()
+ %val1 = call i32 @llvm.r600.read.tgid.z()
+ store volatile i32 %val0, i32 addrspace(1)* %ptr
+ store volatile i32 %val1, i32 addrspace(1)* %ptr
+ ret void
+}
+
+; ALL: define void @use_tgid_x_y_z(i32 addrspace(1)* %ptr) #4 {
+define void @use_tgid_x_y_z(i32 addrspace(1)* %ptr) #1 {
+ %val0 = call i32 @llvm.r600.read.tgid.x()
+ %val1 = call i32 @llvm.r600.read.tgid.y()
+ %val2 = call i32 @llvm.r600.read.tgid.z()
+ store volatile i32 %val0, i32 addrspace(1)* %ptr
+ store volatile i32 %val1, i32 addrspace(1)* %ptr
+ store volatile i32 %val2, i32 addrspace(1)* %ptr
+ ret void
+}
+
+; ALL: define void @use_tidig_x(i32 addrspace(1)* %ptr) #1 {
+define void @use_tidig_x(i32 addrspace(1)* %ptr) #1 {
+ %val = call i32 @llvm.r600.read.tidig.x()
+ store i32 %val, i32 addrspace(1)* %ptr
+ ret void
+}
+
+; ALL: define void @use_tidig_y(i32 addrspace(1)* %ptr) #5 {
+define void @use_tidig_y(i32 addrspace(1)* %ptr) #1 {
+ %val = call i32 @llvm.r600.read.tidig.y()
+ store i32 %val, i32 addrspace(1)* %ptr
+ ret void
+}
+
+; ALL: define void @use_tidig_z(i32 addrspace(1)* %ptr) #6 {
+define void @use_tidig_z(i32 addrspace(1)* %ptr) #1 {
+ %val = call i32 @llvm.r600.read.tidig.z()
+ store i32 %val, i32 addrspace(1)* %ptr
+ ret void
+}
+
+; ALL: define void @use_tidig_x_tgid_x(i32 addrspace(1)* %ptr) #1 {
+define void @use_tidig_x_tgid_x(i32 addrspace(1)* %ptr) #1 {
+ %val0 = call i32 @llvm.r600.read.tidig.x()
+ %val1 = call i32 @llvm.r600.read.tgid.x()
+ store volatile i32 %val0, i32 addrspace(1)* %ptr
+ store volatile i32 %val1, i32 addrspace(1)* %ptr
+ ret void
+}
+
+; ALL: define void @use_tidig_y_tgid_y(i32 addrspace(1)* %ptr) #7 {
+define void @use_tidig_y_tgid_y(i32 addrspace(1)* %ptr) #1 {
+ %val0 = call i32 @llvm.r600.read.tidig.y()
+ %val1 = call i32 @llvm.r600.read.tgid.y()
+ store volatile i32 %val0, i32 addrspace(1)* %ptr
+ store volatile i32 %val1, i32 addrspace(1)* %ptr
+ ret void
+}
+
+; ALL: define void @use_tidig_x_y_z(i32 addrspace(1)* %ptr) #8 {
+define void @use_tidig_x_y_z(i32 addrspace(1)* %ptr) #1 {
+ %val0 = call i32 @llvm.r600.read.tidig.x()
+ %val1 = call i32 @llvm.r600.read.tidig.y()
+ %val2 = call i32 @llvm.r600.read.tidig.z()
+ store volatile i32 %val0, i32 addrspace(1)* %ptr
+ store volatile i32 %val1, i32 addrspace(1)* %ptr
+ store volatile i32 %val2, i32 addrspace(1)* %ptr
+ ret void
+}
+
+; ALL: define void @use_all_workitems(i32 addrspace(1)* %ptr) #9 {
+define void @use_all_workitems(i32 addrspace(1)* %ptr) #1 {
+ %val0 = call i32 @llvm.r600.read.tidig.x()
+ %val1 = call i32 @llvm.r600.read.tidig.y()
+ %val2 = call i32 @llvm.r600.read.tidig.z()
+ %val3 = call i32 @llvm.r600.read.tgid.x()
+ %val4 = call i32 @llvm.r600.read.tgid.y()
+ %val5 = call i32 @llvm.r600.read.tgid.z()
+ store volatile i32 %val0, i32 addrspace(1)* %ptr
+ store volatile i32 %val1, i32 addrspace(1)* %ptr
+ store volatile i32 %val2, i32 addrspace(1)* %ptr
+ store volatile i32 %val3, i32 addrspace(1)* %ptr
+ store volatile i32 %val4, i32 addrspace(1)* %ptr
+ store volatile i32 %val5, i32 addrspace(1)* %ptr
+ ret void
+}
+
+; HSA: define void @use_get_local_size_x(i32 addrspace(1)* %ptr) #10 {
+; NOHSA: define void @use_get_local_size_x(i32 addrspace(1)* %ptr) #1 {
+define void @use_get_local_size_x(i32 addrspace(1)* %ptr) #1 {
+ %val = call i32 @llvm.r600.read.local.size.x()
+ store i32 %val, i32 addrspace(1)* %ptr
+ ret void
+}
+
+; HSA: define void @use_get_local_size_y(i32 addrspace(1)* %ptr) #10 {
+; NOHSA: define void @use_get_local_size_y(i32 addrspace(1)* %ptr) #1 {
+define void @use_get_local_size_y(i32 addrspace(1)* %ptr) #1 {
+ %val = call i32 @llvm.r600.read.local.size.y()
+ store i32 %val, i32 addrspace(1)* %ptr
+ ret void
+}
+
+; HSA: define void @use_get_local_size_z(i32 addrspace(1)* %ptr) #10 {
+; NOHSA: define void @use_get_local_size_z(i32 addrspace(1)* %ptr) #1 {
+define void @use_get_local_size_z(i32 addrspace(1)* %ptr) #1 {
+ %val = call i32 @llvm.r600.read.local.size.z()
+ store i32 %val, i32 addrspace(1)* %ptr
+ ret void
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
+
+; HSA: attributes #0 = { nounwind readnone }
+; HSA: attributes #1 = { nounwind }
+; HSA: attributes #2 = { nounwind "amdgpu-work-group-id-y" }
+; HSA: attributes #3 = { nounwind "amdgpu-work-group-id-z" }
+; HSA: attributes #4 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" }
+; HSA: attributes #5 = { nounwind "amdgpu-work-item-id-y" }
+; HSA: attributes #6 = { nounwind "amdgpu-work-item-id-z" }
+; HSA: attributes #7 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-item-id-y" }
+; HSA: attributes #8 = { nounwind "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" }
+; HSA: attributes #9 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" }
+; HSA: attributes #10 = { nounwind "amdgpu-dispatch-ptr" }
OpenPOWER on IntegriCloud