summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll
diff options
context:
space:
mode:
authorStanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>2017-06-21 22:30:01 +0000
committerStanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>2017-06-21 22:30:01 +0000
commita8b26936d051d49843ae79f86297ba98e73b1034 (patch)
tree2525165249a956c36b6fa76a3b503f1210024adf /llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll
parent705f798bff394f8f4372d917dfb93fac6017fe34 (diff)
downloadbcm5719-llvm-a8b26936d051d49843ae79f86297ba98e73b1034.tar.gz
bcm5719-llvm-a8b26936d051d49843ae79f86297ba98e73b1034.zip
[AMDGPU] Combine add and adde, sub and sube
If one of the arguments of adde/sube is zero we can fold another add/sub into it. Differential Revision: https://reviews.llvm.org/D34374 llvm-svn: 305964
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll80
1 files changed, 80 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll b/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll
index 06cc7fcad06..6026a047d88 100644
--- a/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll
+++ b/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll
@@ -36,6 +36,86 @@ bb:
ret void
}
+; GCN-LABEL: {{^}}add_adde:
+; GCN: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
+; GCN: v_addc_u32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[CC]]
+; GCN-NOT: v_cndmask
+; GCN-NOT: v_add
+
+define amdgpu_kernel void @add_adde(i32 addrspace(1)* nocapture %arg, i32 %a) {
+bb:
+ %x = tail call i32 @llvm.amdgcn.workitem.id.x()
+ %y = tail call i32 @llvm.amdgcn.workitem.id.y()
+ %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x
+ %v = load i32, i32 addrspace(1)* %gep, align 4
+ %cmp = icmp ugt i32 %x, %y
+ %ext = zext i1 %cmp to i32
+ %adde = add i32 %v, %ext
+ %add2 = add i32 %adde, %a
+ store i32 %add2, i32 addrspace(1)* %gep, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}adde_add:
+; GCN: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
+; GCN: v_addc_u32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[CC]]
+; GCN-NOT: v_cndmask
+; GCN-NOT: v_add
+
+define amdgpu_kernel void @adde_add(i32 addrspace(1)* nocapture %arg, i32 %a) {
+bb:
+ %x = tail call i32 @llvm.amdgcn.workitem.id.x()
+ %y = tail call i32 @llvm.amdgcn.workitem.id.y()
+ %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x
+ %v = load i32, i32 addrspace(1)* %gep, align 4
+ %cmp = icmp ugt i32 %x, %y
+ %ext = zext i1 %cmp to i32
+ %add = add i32 %v, %a
+ %adde = add i32 %add, %ext
+ store i32 %adde, i32 addrspace(1)* %gep, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}sub_sube:
+; GCN: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
+; GCN: v_subb_u32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[CC]]
+; GCN-NOT: v_cndmask
+; GCN-NOT: v_sub
+
+define amdgpu_kernel void @sub_sube(i32 addrspace(1)* nocapture %arg, i32 %a) {
+bb:
+ %x = tail call i32 @llvm.amdgcn.workitem.id.x()
+ %y = tail call i32 @llvm.amdgcn.workitem.id.y()
+ %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x
+ %v = load i32, i32 addrspace(1)* %gep, align 4
+ %cmp = icmp ugt i32 %x, %y
+ %ext = sext i1 %cmp to i32
+ %adde = add i32 %v, %ext
+ %sub = sub i32 %adde, %a
+ store i32 %sub, i32 addrspace(1)* %gep, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}sube_sub:
+; GCN: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
+; GCN: v_subb_u32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[CC]]
+; GCN-NOT: v_cndmask
+; GCN-NOT: v_sub
+
+define amdgpu_kernel void @sube_sub(i32 addrspace(1)* nocapture %arg, i32 %a) {
+bb:
+ %x = tail call i32 @llvm.amdgcn.workitem.id.x()
+ %y = tail call i32 @llvm.amdgcn.workitem.id.y()
+ %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x
+ %v = load i32, i32 addrspace(1)* %gep, align 4
+ %cmp = icmp ugt i32 %x, %y
+ %ext = sext i1 %cmp to i32
+ %sub = sub i32 %v, %a
+ %adde = add i32 %sub, %ext
+ store i32 %adde, i32 addrspace(1)* %gep, align 4
+ ret void
+}
+
declare i32 @llvm.amdgcn.workitem.id.x() #0
declare i32 @llvm.amdgcn.workitem.id.y() #0
OpenPOWER on IntegriCloud