From a8b26936d051d49843ae79f86297ba98e73b1034 Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Wed, 21 Jun 2017 22:30:01 +0000 Subject: [AMDGPU] Combine add and adde, sub and sube If one of the arguments of adde/sube is zero we can fold another add/sub into it. Differential Revision: https://reviews.llvm.org/D34374 llvm-svn: 305964 --- llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll | 80 ++++++++++++++++++++++++ 1 file changed, 80 insertions(+) (limited to 'llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll') diff --git a/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll b/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll index 06cc7fcad06..6026a047d88 100644 --- a/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll +++ b/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll @@ -36,6 +36,86 @@ bb: ret void } +; GCN-LABEL: {{^}}add_adde: +; GCN: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}} +; GCN: v_addc_u32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[CC]] +; GCN-NOT: v_cndmask +; GCN-NOT: v_add + +define amdgpu_kernel void @add_adde(i32 addrspace(1)* nocapture %arg, i32 %a) { +bb: + %x = tail call i32 @llvm.amdgcn.workitem.id.x() + %y = tail call i32 @llvm.amdgcn.workitem.id.y() + %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x + %v = load i32, i32 addrspace(1)* %gep, align 4 + %cmp = icmp ugt i32 %x, %y + %ext = zext i1 %cmp to i32 + %adde = add i32 %v, %ext + %add2 = add i32 %adde, %a + store i32 %add2, i32 addrspace(1)* %gep, align 4 + ret void +} + +; GCN-LABEL: {{^}}adde_add: +; GCN: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}} +; GCN: v_addc_u32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[CC]] +; GCN-NOT: v_cndmask +; GCN-NOT: v_add + +define amdgpu_kernel void @adde_add(i32 addrspace(1)* nocapture %arg, i32 %a) { +bb: + %x = tail call i32 @llvm.amdgcn.workitem.id.x() + %y = tail call i32 @llvm.amdgcn.workitem.id.y() + %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x + %v = load i32, i32 addrspace(1)* %gep, align 4 + %cmp = icmp ugt i32 %x, %y + %ext = zext i1 %cmp to i32 + %add = add i32 %v, %a + %adde = add i32 %add, %ext + store i32 %adde, i32 addrspace(1)* %gep, align 4 + ret void +} + +; GCN-LABEL: {{^}}sub_sube: +; GCN: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}} +; GCN: v_subb_u32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[CC]] +; GCN-NOT: v_cndmask +; GCN-NOT: v_sub + +define amdgpu_kernel void @sub_sube(i32 addrspace(1)* nocapture %arg, i32 %a) { +bb: + %x = tail call i32 @llvm.amdgcn.workitem.id.x() + %y = tail call i32 @llvm.amdgcn.workitem.id.y() + %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x + %v = load i32, i32 addrspace(1)* %gep, align 4 + %cmp = icmp ugt i32 %x, %y + %ext = sext i1 %cmp to i32 + %adde = add i32 %v, %ext + %sub = sub i32 %adde, %a + store i32 %sub, i32 addrspace(1)* %gep, align 4 + ret void +} + +; GCN-LABEL: {{^}}sube_sub: +; GCN: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}} +; GCN: v_subb_u32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[CC]] +; GCN-NOT: v_cndmask +; GCN-NOT: v_sub + +define amdgpu_kernel void @sube_sub(i32 addrspace(1)* nocapture %arg, i32 %a) { +bb: + %x = tail call i32 @llvm.amdgcn.workitem.id.x() + %y = tail call i32 @llvm.amdgcn.workitem.id.y() + %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x + %v = load i32, i32 addrspace(1)* %gep, align 4 + %cmp = icmp ugt i32 %x, %y + %ext = sext i1 %cmp to i32 + %sub = sub i32 %v, %a + %adde = add i32 %sub, %ext + store i32 %adde, i32 addrspace(1)* %gep, align 4 + ret void +} + declare i32 @llvm.amdgcn.workitem.id.x() #0 declare i32 @llvm.amdgcn.workitem.id.y() #0 -- cgit v1.2.3