summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/AMDGPU
diff options
context:
space:
mode:
authorMarek Olsak <marek.olsak@amd.com>2019-01-15 02:13:18 +0000
committerMarek Olsak <marek.olsak@amd.com>2019-01-15 02:13:18 +0000
commit33eb4d947d82de533d132598b3d568363170af3d (patch)
tree4f4ac5c08a982c0726d99c8329d4597120070011 /llvm/test/CodeGen/AMDGPU
parentf793fe14022b727502a9aaba4fd3aabcde737155 (diff)
downloadbcm5719-llvm-33eb4d947d82de533d132598b3d568363170af3d.tar.gz
bcm5719-llvm-33eb4d947d82de533d132598b3d568363170af3d.zip
AMDGPU: Add a fast path for icmp.i1(src, false, NE)
Summary: This allows moving the condition from the intrinsic to the standard ICmp opcode, so that LLVM can do simplifications on it. The icmp.i1 intrinsic is an identity for retrieving the SGPR mask. And we can also get the mask from and i1, or i1, xor i1. Reviewers: arsenm, nhaehnle Subscribers: kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits Differential Revision: https://reviews.llvm.org/D52060 llvm-svn: 351150
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU')
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll18
1 files changed, 18 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll
index 584fb43fd8e..06ac7da1288 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll
@@ -4,6 +4,7 @@
declare i64 @llvm.amdgcn.icmp.i32(i32, i32, i32) #0
declare i64 @llvm.amdgcn.icmp.i64(i64, i64, i32) #0
declare i64 @llvm.amdgcn.icmp.i16(i16, i16, i32) #0
+declare i64 @llvm.amdgcn.icmp.i1(i1, i1, i32) #0
; No crash on invalid input
; GCN-LABEL: {{^}}v_icmp_i32_dynamic_cc:
@@ -314,4 +315,21 @@ define amdgpu_kernel void @v_icmp_i16_sle(i64 addrspace(1)* %out, i16 %src) {
ret void
}
+; GCN-LABEL: {{^}}v_icmp_i1_ne0:
+; GCN: v_cmp_gt_u32_e64 s[[C0:\[[0-9]+:[0-9]+\]]],
+; GCN: v_cmp_gt_u32_e64 s[[C1:\[[0-9]+:[0-9]+\]]],
+; GCN: s_and_b64 s[[SRC:\[[0-9]+:[0-9]+\]]], s[[C0]], s[[C1]]
+; SI-NEXT: s_mov_b32 s{{[0-9]+}}, -1
+; GCN-NEXT: v_mov_b32_e32
+; GCN-NEXT: v_mov_b32_e32
+; GCN-NEXT: {{global|flat|buffer}}_store_dwordx2
+define amdgpu_kernel void @v_icmp_i1_ne0(i64 addrspace(1)* %out, i32 %a, i32 %b) {
+ %c0 = icmp ugt i32 %a, 1
+ %c1 = icmp ugt i32 %b, 2
+ %src = and i1 %c0, %c1
+ %result = call i64 @llvm.amdgcn.icmp.i1(i1 %src, i1 false, i32 33)
+ store i64 %result, i64 addrspace(1)* %out
+ ret void
+}
+
attributes #0 = { nounwind readnone convergent }
OpenPOWER on IntegriCloud