AMDGPU: Add a fast path for icmp.i1(src, false, NE)

Summary: This allows moving the condition from the intrinsic to the standard ICmp opcode, so that LLVM can do simplifications on it. The icmp.i1 intrinsic is an identity for retrieving the SGPR mask. And we can also get the mask from and i1, or i1, xor i1. Reviewers: arsenm, nhaehnle Subscribers: kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits Differential Revision: https://reviews.llvm.org/D52060 llvm-svn: 351150
author: Marek Olsak <marek.olsak@amd.com> 2019-01-15 02:13:18 +0000
committer: Marek Olsak <marek.olsak@amd.com> 2019-01-15 02:13:18 +0000
commit: 33eb4d947d82de533d132598b3d568363170af3d (patch)
tree: 4f4ac5c08a982c0726d99c8329d4597120070011 /llvm/test/CodeGen/AMDGPU
parent: f793fe14022b727502a9aaba4fd3aabcde737155 (diff)
download: bcm5719-llvm-33eb4d947d82de533d132598b3d568363170af3d.tar.gz
bcm5719-llvm-33eb4d947d82de533d132598b3d568363170af3d.zip
1 files changed, 18 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll
index 584fb43fd8e..06ac7da1288 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll
@@ -4,6 +4,7 @@
 declare i64 @llvm.amdgcn.icmp.i32(i32, i32, i32) #0
 declare i64 @llvm.amdgcn.icmp.i64(i64, i64, i32) #0
 declare i64 @llvm.amdgcn.icmp.i16(i16, i16, i32) #0
+declare i64 @llvm.amdgcn.icmp.i1(i1, i1, i32) #0
 
 ; No crash on invalid input
 ; GCN-LABEL: {{^}}v_icmp_i32_dynamic_cc:
@@ -314,4 +315,21 @@ define amdgpu_kernel void @v_icmp_i16_sle(i64 addrspace(1)* %out, i16 %src) {
   ret void
 }
 
+; GCN-LABEL: {{^}}v_icmp_i1_ne0:
+; GCN: v_cmp_gt_u32_e64 s[[C0:\[[0-9]+:[0-9]+\]]],
+; GCN: v_cmp_gt_u32_e64 s[[C1:\[[0-9]+:[0-9]+\]]],
+; GCN: s_and_b64 s[[SRC:\[[0-9]+:[0-9]+\]]], s[[C0]], s[[C1]]
+; SI-NEXT: s_mov_b32 s{{[0-9]+}}, -1
+; GCN-NEXT: v_mov_b32_e32
+; GCN-NEXT: v_mov_b32_e32
+; GCN-NEXT: {{global|flat|buffer}}_store_dwordx2
+define amdgpu_kernel void @v_icmp_i1_ne0(i64 addrspace(1)* %out, i32 %a, i32 %b) {
+  %c0 = icmp ugt i32 %a, 1
+  %c1 = icmp ugt i32 %b, 2
+  %src = and i1 %c0, %c1
+  %result = call i64 @llvm.amdgcn.icmp.i1(i1 %src, i1 false, i32 33)
+  store i64 %result, i64 addrspace(1)* %out
+  ret void
+}
+
 attributes #0 = { nounwind readnone convergent }
author	Marek Olsak <marek.olsak@amd.com>	2019-01-15 02:13:18 +0000
committer	Marek Olsak <marek.olsak@amd.com>	2019-01-15 02:13:18 +0000
commit	33eb4d947d82de533d132598b3d568363170af3d (patch)
tree	4f4ac5c08a982c0726d99c8329d4597120070011 /llvm/test/CodeGen/AMDGPU
parent	f793fe14022b727502a9aaba4fd3aabcde737155 (diff)
download	bcm5719-llvm-33eb4d947d82de533d132598b3d568363170af3d.tar.gz bcm5719-llvm-33eb4d947d82de533d132598b3d568363170af3d.zip