diff options
author | Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> | 2017-06-21 22:30:01 +0000 |
---|---|---|
committer | Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> | 2017-06-21 22:30:01 +0000 |
commit | a8b26936d051d49843ae79f86297ba98e73b1034 (patch) | |
tree | 2525165249a956c36b6fa76a3b503f1210024adf /llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll | |
parent | 705f798bff394f8f4372d917dfb93fac6017fe34 (diff) | |
download | bcm5719-llvm-a8b26936d051d49843ae79f86297ba98e73b1034.tar.gz bcm5719-llvm-a8b26936d051d49843ae79f86297ba98e73b1034.zip |
[AMDGPU] Combine add and adde, sub and sube
If one of the arguments of adde/sube is zero we can fold another
add/sub into it.
Differential Revision: https://reviews.llvm.org/D34374
llvm-svn: 305964
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll | 80 |
1 files changed, 80 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll b/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll index 06cc7fcad06..6026a047d88 100644 --- a/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll +++ b/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll @@ -36,6 +36,86 @@ bb: ret void } +; GCN-LABEL: {{^}}add_adde: +; GCN: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}} +; GCN: v_addc_u32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[CC]] +; GCN-NOT: v_cndmask +; GCN-NOT: v_add + +define amdgpu_kernel void @add_adde(i32 addrspace(1)* nocapture %arg, i32 %a) { +bb: + %x = tail call i32 @llvm.amdgcn.workitem.id.x() + %y = tail call i32 @llvm.amdgcn.workitem.id.y() + %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x + %v = load i32, i32 addrspace(1)* %gep, align 4 + %cmp = icmp ugt i32 %x, %y + %ext = zext i1 %cmp to i32 + %adde = add i32 %v, %ext + %add2 = add i32 %adde, %a + store i32 %add2, i32 addrspace(1)* %gep, align 4 + ret void +} + +; GCN-LABEL: {{^}}adde_add: +; GCN: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}} +; GCN: v_addc_u32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[CC]] +; GCN-NOT: v_cndmask +; GCN-NOT: v_add + +define amdgpu_kernel void @adde_add(i32 addrspace(1)* nocapture %arg, i32 %a) { +bb: + %x = tail call i32 @llvm.amdgcn.workitem.id.x() + %y = tail call i32 @llvm.amdgcn.workitem.id.y() + %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x + %v = load i32, i32 addrspace(1)* %gep, align 4 + %cmp = icmp ugt i32 %x, %y + %ext = zext i1 %cmp to i32 + %add = add i32 %v, %a + %adde = add i32 %add, %ext + store i32 %adde, i32 addrspace(1)* %gep, align 4 + ret void +} + +; GCN-LABEL: {{^}}sub_sube: +; GCN: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}} +; GCN: v_subb_u32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[CC]] +; GCN-NOT: v_cndmask +; GCN-NOT: v_sub + +define amdgpu_kernel void @sub_sube(i32 addrspace(1)* nocapture %arg, i32 %a) { +bb: + %x = tail call i32 @llvm.amdgcn.workitem.id.x() + %y = tail call i32 @llvm.amdgcn.workitem.id.y() + %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x + %v = load i32, i32 addrspace(1)* %gep, align 4 + %cmp = icmp ugt i32 %x, %y + %ext = sext i1 %cmp to i32 + %adde = add i32 %v, %ext + %sub = sub i32 %adde, %a + store i32 %sub, i32 addrspace(1)* %gep, align 4 + ret void +} + +; GCN-LABEL: {{^}}sube_sub: +; GCN: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}} +; GCN: v_subb_u32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[CC]] +; GCN-NOT: v_cndmask +; GCN-NOT: v_sub + +define amdgpu_kernel void @sube_sub(i32 addrspace(1)* nocapture %arg, i32 %a) { +bb: + %x = tail call i32 @llvm.amdgcn.workitem.id.x() + %y = tail call i32 @llvm.amdgcn.workitem.id.y() + %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x + %v = load i32, i32 addrspace(1)* %gep, align 4 + %cmp = icmp ugt i32 %x, %y + %ext = sext i1 %cmp to i32 + %sub = sub i32 %v, %a + %adde = add i32 %sub, %ext + store i32 %adde, i32 addrspace(1)* %gep, align 4 + ret void +} + declare i32 @llvm.amdgcn.workitem.id.x() #0 declare i32 @llvm.amdgcn.workitem.id.y() #0 |