summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll
diff options
context:
space:
mode:
authorJiangning Liu <jiangning.liu@arm.com>2014-09-05 02:55:24 +0000
committerJiangning Liu <jiangning.liu@arm.com>2014-09-05 02:55:24 +0000
commit1a486da543b7e4ea7a635f8f467542fd9398772c (patch)
tree3362d36ec17da7f970822e079e517cd1c7fabe6b /llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll
parent2feff785fc5de96968386b8949a34abb6a6a9c47 (diff)
downloadbcm5719-llvm-1a486da543b7e4ea7a635f8f467542fd9398772c.tar.gz
bcm5719-llvm-1a486da543b7e4ea7a635f8f467542fd9398772c.zip
[AArch64] Add pass to enable additional comparison optimizations by CSE.
Patched by Sergey Dmitrouk. This pass tries to make consecutive compares of values use same operands to allow CSE pass to remove duplicated instructions. For this it analyzes branches and adjusts comparisons with immediate values by converting: GE -> GT GT -> GE LT -> LE LE -> LT and adjusting immediate values appropriately. It basically corrects two immediate values towards each other to make them equal. llvm-svn: 217220
Diffstat (limited to 'llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll')
-rw-r--r--llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll349
1 files changed, 349 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll b/llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll
new file mode 100644
index 00000000000..b8bce3f7b13
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll
@@ -0,0 +1,349 @@
+; RUN: llc < %s -march=aarch64 -mtriple=aarch64-linux-gnu | FileCheck %s
+
+; marked as external to prevent possible optimizations
+@a = external global i32
+@b = external global i32
+@c = external global i32
+@d = external global i32
+
+; (a > 10 && b == c) || (a >= 10 && b == d)
+define i32 @combine_gt_ge_10() #0 {
+; CHECK-LABEL: combine_gt_ge_10
+; CHECK: cmp
+; CHECK: b.le
+; CHECK: ret
+; CHECK-NOT: cmp
+; CHECK: b.lt
+entry:
+ %0 = load i32* @a, align 4
+ %cmp = icmp sgt i32 %0, 10
+ br i1 %cmp, label %land.lhs.true, label %lor.lhs.false
+
+land.lhs.true: ; preds = %entry
+ %1 = load i32* @b, align 4
+ %2 = load i32* @c, align 4
+ %cmp1 = icmp eq i32 %1, %2
+ br i1 %cmp1, label %return, label %land.lhs.true3
+
+lor.lhs.false: ; preds = %entry
+ %cmp2 = icmp sgt i32 %0, 9
+ br i1 %cmp2, label %land.lhs.true3, label %if.end
+
+land.lhs.true3: ; preds = %lor.lhs.false, %land.lhs.true
+ %3 = load i32* @b, align 4
+ %4 = load i32* @d, align 4
+ %cmp4 = icmp eq i32 %3, %4
+ br i1 %cmp4, label %return, label %if.end
+
+if.end: ; preds = %land.lhs.true3, %lor.lhs.false
+ br label %return
+
+return: ; preds = %if.end, %land.lhs.true3, %land.lhs.true
+ %retval.0 = phi i32 [ 0, %if.end ], [ 1, %land.lhs.true3 ], [ 1, %land.lhs.true ]
+ ret i32 %retval.0
+}
+
+; (a > 5 && b == c) || (a < 5 && b == d)
+define i32 @combine_gt_lt_5() #0 {
+; CHECK-LABEL: combine_gt_lt_5
+; CHECK: cmp
+; CHECK: b.le
+; CHECK: ret
+; CHECK-NOT: cmp
+; CHECK: b.ge
+entry:
+ %0 = load i32* @a, align 4
+ %cmp = icmp sgt i32 %0, 5
+ br i1 %cmp, label %land.lhs.true, label %lor.lhs.false
+
+land.lhs.true: ; preds = %entry
+ %1 = load i32* @b, align 4
+ %2 = load i32* @c, align 4
+ %cmp1 = icmp eq i32 %1, %2
+ br i1 %cmp1, label %return, label %if.end
+
+lor.lhs.false: ; preds = %entry
+ %cmp2 = icmp slt i32 %0, 5
+ br i1 %cmp2, label %land.lhs.true3, label %if.end
+
+land.lhs.true3: ; preds = %lor.lhs.false
+ %3 = load i32* @b, align 4
+ %4 = load i32* @d, align 4
+ %cmp4 = icmp eq i32 %3, %4
+ br i1 %cmp4, label %return, label %if.end
+
+if.end: ; preds = %land.lhs.true3, %lor.lhs.false, %land.lhs.true
+ br label %return
+
+return: ; preds = %if.end, %land.lhs.true3, %land.lhs.true
+ %retval.0 = phi i32 [ 0, %if.end ], [ 1, %land.lhs.true3 ], [ 1, %land.lhs.true ]
+ ret i32 %retval.0
+}
+
+; (a < 5 && b == c) || (a <= 5 && b == d)
+define i32 @combine_lt_ge_5() #0 {
+; CHECK-LABEL: combine_lt_ge_5
+; CHECK: cmp
+; CHECK: b.ge
+; CHECK: ret
+; CHECK-NOT: cmp
+; CHECK: b.gt
+entry:
+ %0 = load i32* @a, align 4
+ %cmp = icmp slt i32 %0, 5
+ br i1 %cmp, label %land.lhs.true, label %lor.lhs.false
+
+land.lhs.true: ; preds = %entry
+ %1 = load i32* @b, align 4
+ %2 = load i32* @c, align 4
+ %cmp1 = icmp eq i32 %1, %2
+ br i1 %cmp1, label %return, label %land.lhs.true3
+
+lor.lhs.false: ; preds = %entry
+ %cmp2 = icmp slt i32 %0, 6
+ br i1 %cmp2, label %land.lhs.true3, label %if.end
+
+land.lhs.true3: ; preds = %lor.lhs.false, %land.lhs.true
+ %3 = load i32* @b, align 4
+ %4 = load i32* @d, align 4
+ %cmp4 = icmp eq i32 %3, %4
+ br i1 %cmp4, label %return, label %if.end
+
+if.end: ; preds = %land.lhs.true3, %lor.lhs.false
+ br label %return
+
+return: ; preds = %if.end, %land.lhs.true3, %land.lhs.true
+ %retval.0 = phi i32 [ 0, %if.end ], [ 1, %land.lhs.true3 ], [ 1, %land.lhs.true ]
+ ret i32 %retval.0
+}
+
+; (a < 5 && b == c) || (a > 5 && b == d)
+define i32 @combine_lt_gt_5() #0 {
+; CHECK-LABEL: combine_lt_gt_5
+; CHECK: cmp
+; CHECK: b.ge
+; CHECK: ret
+; CHECK-NOT: cmp
+; CHECK: b.le
+entry:
+ %0 = load i32* @a, align 4
+ %cmp = icmp slt i32 %0, 5
+ br i1 %cmp, label %land.lhs.true, label %lor.lhs.false
+
+land.lhs.true: ; preds = %entry
+ %1 = load i32* @b, align 4
+ %2 = load i32* @c, align 4
+ %cmp1 = icmp eq i32 %1, %2
+ br i1 %cmp1, label %return, label %if.end
+
+lor.lhs.false: ; preds = %entry
+ %cmp2 = icmp sgt i32 %0, 5
+ br i1 %cmp2, label %land.lhs.true3, label %if.end
+
+land.lhs.true3: ; preds = %lor.lhs.false
+ %3 = load i32* @b, align 4
+ %4 = load i32* @d, align 4
+ %cmp4 = icmp eq i32 %3, %4
+ br i1 %cmp4, label %return, label %if.end
+
+if.end: ; preds = %land.lhs.true3, %lor.lhs.false, %land.lhs.true
+ br label %return
+
+return: ; preds = %if.end, %land.lhs.true3, %land.lhs.true
+ %retval.0 = phi i32 [ 0, %if.end ], [ 1, %land.lhs.true3 ], [ 1, %land.lhs.true ]
+ ret i32 %retval.0
+}
+
+; (a > -5 && b == c) || (a < -5 && b == d)
+define i32 @combine_gt_lt_n5() #0 {
+; CHECK-LABEL: combine_gt_lt_n5
+; CHECK: cmn
+; CHECK: b.le
+; CHECK: ret
+; CHECK-NOT: cmn
+; CHECK: b.ge
+entry:
+ %0 = load i32* @a, align 4
+ %cmp = icmp sgt i32 %0, -5
+ br i1 %cmp, label %land.lhs.true, label %lor.lhs.false
+
+land.lhs.true: ; preds = %entry
+ %1 = load i32* @b, align 4
+ %2 = load i32* @c, align 4
+ %cmp1 = icmp eq i32 %1, %2
+ br i1 %cmp1, label %return, label %if.end
+
+lor.lhs.false: ; preds = %entry
+ %cmp2 = icmp slt i32 %0, -5
+ br i1 %cmp2, label %land.lhs.true3, label %if.end
+
+land.lhs.true3: ; preds = %lor.lhs.false
+ %3 = load i32* @b, align 4
+ %4 = load i32* @d, align 4
+ %cmp4 = icmp eq i32 %3, %4
+ br i1 %cmp4, label %return, label %if.end
+
+if.end: ; preds = %land.lhs.true3, %lor.lhs.false, %land.lhs.true
+ br label %return
+
+return: ; preds = %if.end, %land.lhs.true3, %land.lhs.true
+ %retval.0 = phi i32 [ 0, %if.end ], [ 1, %land.lhs.true3 ], [ 1, %land.lhs.true ]
+ ret i32 %retval.0
+}
+
+; (a < -5 && b == c) || (a > -5 && b == d)
+define i32 @combine_lt_gt_n5() #0 {
+; CHECK-LABEL: combine_lt_gt_n5
+; CHECK: cmn
+; CHECK: b.ge
+; CHECK: ret
+; CHECK-NOT: cmn
+; CHECK: b.le
+entry:
+ %0 = load i32* @a, align 4
+ %cmp = icmp slt i32 %0, -5
+ br i1 %cmp, label %land.lhs.true, label %lor.lhs.false
+
+land.lhs.true: ; preds = %entry
+ %1 = load i32* @b, align 4
+ %2 = load i32* @c, align 4
+ %cmp1 = icmp eq i32 %1, %2
+ br i1 %cmp1, label %return, label %if.end
+
+lor.lhs.false: ; preds = %entry
+ %cmp2 = icmp sgt i32 %0, -5
+ br i1 %cmp2, label %land.lhs.true3, label %if.end
+
+land.lhs.true3: ; preds = %lor.lhs.false
+ %3 = load i32* @b, align 4
+ %4 = load i32* @d, align 4
+ %cmp4 = icmp eq i32 %3, %4
+ br i1 %cmp4, label %return, label %if.end
+
+if.end: ; preds = %land.lhs.true3, %lor.lhs.false, %land.lhs.true
+ br label %return
+
+return: ; preds = %if.end, %land.lhs.true3, %land.lhs.true
+ %retval.0 = phi i32 [ 0, %if.end ], [ 1, %land.lhs.true3 ], [ 1, %land.lhs.true ]
+ ret i32 %retval.0
+}
+
+%struct.Struct = type { i64, i64 }
+
+@glob = internal unnamed_addr global %struct.Struct* null, align 8
+
+declare %struct.Struct* @Update(%struct.Struct*) #1
+
+; no checks for this case, it just should be processed without errors
+define void @combine_non_adjacent_cmp_br(%struct.Struct* nocapture readonly %hdCall) #0 {
+entry:
+ %size = getelementptr inbounds %struct.Struct* %hdCall, i64 0, i32 0
+ %0 = load i64* %size, align 8
+ br label %land.rhs
+
+land.rhs:
+ %rp.06 = phi i64 [ %0, %entry ], [ %sub, %while.body ]
+ %1 = load i64* inttoptr (i64 24 to i64*), align 8
+ %cmp2 = icmp sgt i64 %1, 0
+ br i1 %cmp2, label %while.body, label %while.end
+
+while.body:
+ %2 = load %struct.Struct** @glob, align 8
+ %call = tail call %struct.Struct* @Update(%struct.Struct* %2) #2
+ %sub = add nsw i64 %rp.06, -2
+ %cmp = icmp slt i64 %0, %rp.06
+ br i1 %cmp, label %land.rhs, label %while.end
+
+while.end:
+ ret void
+}
+
+; undefined external to prevent possible optimizations
+declare void @do_something() #1
+
+define i32 @do_nothing_if_resultant_opcodes_would_differ() #0 {
+; CHECK-LABEL: do_nothing_if_resultant_opcodes_would_differ
+; CHECK: cmn
+; CHECK: b.gt
+; CHECK: cmp
+; CHECK: b.gt
+entry:
+ %0 = load i32* @a, align 4
+ %cmp4 = icmp slt i32 %0, -1
+ br i1 %cmp4, label %while.body.preheader, label %while.end
+
+while.body.preheader: ; preds = %entry
+ br label %while.body
+
+while.body: ; preds = %while.body, %while.body.preheader
+ %i.05 = phi i32 [ %inc, %while.body ], [ %0, %while.body.preheader ]
+ tail call void @do_something() #2
+ %inc = add nsw i32 %i.05, 1
+ %cmp = icmp slt i32 %i.05, 0
+ br i1 %cmp, label %while.body, label %while.cond.while.end_crit_edge
+
+while.cond.while.end_crit_edge: ; preds = %while.body
+ %.pre = load i32* @a, align 4
+ br label %while.end
+
+while.end: ; preds = %while.cond.while.end_crit_edge, %entry
+ %1 = phi i32 [ %.pre, %while.cond.while.end_crit_edge ], [ %0, %entry ]
+ %cmp1 = icmp slt i32 %1, 2
+ br i1 %cmp1, label %land.lhs.true, label %if.end
+
+land.lhs.true: ; preds = %while.end
+ %2 = load i32* @b, align 4
+ %3 = load i32* @d, align 4
+ %cmp2 = icmp eq i32 %2, %3
+ br i1 %cmp2, label %return, label %if.end
+
+if.end: ; preds = %land.lhs.true, %while.end
+ br label %return
+
+return: ; preds = %if.end, %land.lhs.true
+ %retval.0 = phi i32 [ 0, %if.end ], [ 123, %land.lhs.true ]
+ ret i32 %retval.0
+}
+
+define i32 @do_nothing_if_compares_can_not_be_adjusted_to_each_other() #0 {
+; CHECK-LABEL: do_nothing_if_compares_can_not_be_adjusted_to_each_other
+; CHECK: cmp
+; CHECK: b.gt
+; CHECK: cmn
+; CHECK: b.lt
+entry:
+ %0 = load i32* @a, align 4
+ %cmp4 = icmp slt i32 %0, 1
+ br i1 %cmp4, label %while.body.preheader, label %while.end
+
+while.body.preheader: ; preds = %entry
+ br label %while.body
+
+while.body: ; preds = %while.body, %while.body.preheader
+ %i.05 = phi i32 [ %inc, %while.body ], [ %0, %while.body.preheader ]
+ tail call void @do_something() #2
+ %inc = add nsw i32 %i.05, 1
+ %cmp = icmp slt i32 %i.05, 0
+ br i1 %cmp, label %while.body, label %while.end.loopexit
+
+while.end.loopexit: ; preds = %while.body
+ br label %while.end
+
+while.end: ; preds = %while.end.loopexit, %entry
+ %1 = load i32* @c, align 4
+ %cmp1 = icmp sgt i32 %1, -3
+ br i1 %cmp1, label %land.lhs.true, label %if.end
+
+land.lhs.true: ; preds = %while.end
+ %2 = load i32* @b, align 4
+ %3 = load i32* @d, align 4
+ %cmp2 = icmp eq i32 %2, %3
+ br i1 %cmp2, label %return, label %if.end
+
+if.end: ; preds = %land.lhs.true, %while.end
+ br label %return
+
+return: ; preds = %if.end, %land.lhs.true
+ %retval.0 = phi i32 [ 0, %if.end ], [ 123, %land.lhs.true ]
+ ret i32 %retval.0
+}
OpenPOWER on IntegriCloud