diff options
| author | Marek Olsak <marek.olsak@amd.com> | 2019-01-15 02:13:18 +0000 |
|---|---|---|
| committer | Marek Olsak <marek.olsak@amd.com> | 2019-01-15 02:13:18 +0000 |
| commit | 33eb4d947d82de533d132598b3d568363170af3d (patch) | |
| tree | 4f4ac5c08a982c0726d99c8329d4597120070011 /llvm/test/CodeGen/AMDGPU | |
| parent | f793fe14022b727502a9aaba4fd3aabcde737155 (diff) | |
| download | bcm5719-llvm-33eb4d947d82de533d132598b3d568363170af3d.tar.gz bcm5719-llvm-33eb4d947d82de533d132598b3d568363170af3d.zip | |
AMDGPU: Add a fast path for icmp.i1(src, false, NE)
Summary:
This allows moving the condition from the intrinsic to the standard ICmp
opcode, so that LLVM can do simplifications on it. The icmp.i1 intrinsic
is an identity for retrieving the SGPR mask.
And we can also get the mask from and i1, or i1, xor i1.
Reviewers: arsenm, nhaehnle
Subscribers: kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D52060
llvm-svn: 351150
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll | 18 |
1 files changed, 18 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll index 584fb43fd8e..06ac7da1288 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll @@ -4,6 +4,7 @@ declare i64 @llvm.amdgcn.icmp.i32(i32, i32, i32) #0 declare i64 @llvm.amdgcn.icmp.i64(i64, i64, i32) #0 declare i64 @llvm.amdgcn.icmp.i16(i16, i16, i32) #0 +declare i64 @llvm.amdgcn.icmp.i1(i1, i1, i32) #0 ; No crash on invalid input ; GCN-LABEL: {{^}}v_icmp_i32_dynamic_cc: @@ -314,4 +315,21 @@ define amdgpu_kernel void @v_icmp_i16_sle(i64 addrspace(1)* %out, i16 %src) { ret void } +; GCN-LABEL: {{^}}v_icmp_i1_ne0: +; GCN: v_cmp_gt_u32_e64 s[[C0:\[[0-9]+:[0-9]+\]]], +; GCN: v_cmp_gt_u32_e64 s[[C1:\[[0-9]+:[0-9]+\]]], +; GCN: s_and_b64 s[[SRC:\[[0-9]+:[0-9]+\]]], s[[C0]], s[[C1]] +; SI-NEXT: s_mov_b32 s{{[0-9]+}}, -1 +; GCN-NEXT: v_mov_b32_e32 +; GCN-NEXT: v_mov_b32_e32 +; GCN-NEXT: {{global|flat|buffer}}_store_dwordx2 +define amdgpu_kernel void @v_icmp_i1_ne0(i64 addrspace(1)* %out, i32 %a, i32 %b) { + %c0 = icmp ugt i32 %a, 1 + %c1 = icmp ugt i32 %b, 2 + %src = and i1 %c0, %c1 + %result = call i64 @llvm.amdgcn.icmp.i1(i1 %src, i1 false, i32 33) + store i64 %result, i64 addrspace(1)* %out + ret void +} + attributes #0 = { nounwind readnone convergent } |

