summaryrefslogtreecommitdiffstats
path: root/llvm/test
diff options
context:
space:
mode:
authorDehao Chen <dehao@google.com>2016-10-27 16:30:08 +0000
committerDehao Chen <dehao@google.com>2016-10-27 16:30:08 +0000
commitb94c09baa058e57b1e931746745050e19785ef30 (patch)
treea2d57bc5d38400fcb744b91d3cb49282db424056 /llvm/test
parent9c9d9cdcf8e996455318bbe334d1d36622338b70 (diff)
downloadbcm5719-llvm-b94c09baa058e57b1e931746745050e19785ef30.tar.gz
bcm5719-llvm-b94c09baa058e57b1e931746745050e19785ef30.zip
Add Loop Sink pass to reverse the LICM based of basic block frequency.
Summary: LICM may hoist instructions to preheader speculatively. Before code generation, we need to sink down the hoisted instructions inside to loop if it's beneficial. This pass is a reverse of LICM: looking at instructions in preheader and sinks the instruction to basic blocks inside the loop body if basic block frequency is smaller than the preheader frequency. Reviewers: hfinkel, davidxl, chandlerc Subscribers: anna, modocache, mgorny, beanz, reames, dberlin, chandlerc, mcrosier, junbuml, sanjoy, mzolotukhin, llvm-commits Differential Revision: https://reviews.llvm.org/D22778 llvm-svn: 285308
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/Transforms/LICM/loopsink.ll286
-rw-r--r--llvm/test/Transforms/LICM/sink.ll60
2 files changed, 346 insertions, 0 deletions
diff --git a/llvm/test/Transforms/LICM/loopsink.ll b/llvm/test/Transforms/LICM/loopsink.ll
new file mode 100644
index 00000000000..564a35b575e
--- /dev/null
+++ b/llvm/test/Transforms/LICM/loopsink.ll
@@ -0,0 +1,286 @@
+; RUN: opt -S -loop-sink < %s | FileCheck %s
+
+@g = global i32 0, align 4
+
+; b1
+; / \
+; b2 b6
+; / \ |
+; b3 b4 |
+; \ / |
+; b5 |
+; \ /
+; b7
+; preheader: 1000
+; b2: 15
+; b3: 7
+; b4: 7
+; Sink load to b2
+; CHECK: t1
+; CHECK: .b2:
+; CHECK: load i32, i32* @g
+; CHECK: .b3:
+; CHECK-NOT: load i32, i32* @g
+define i32 @t1(i32, i32) #0 {
+ %3 = icmp eq i32 %1, 0
+ br i1 %3, label %.exit, label %.preheader
+
+.preheader:
+ %invariant = load i32, i32* @g
+ br label %.b1
+
+.b1:
+ %iv = phi i32 [ %t7, %.b7 ], [ 0, %.preheader ]
+ %c1 = icmp sgt i32 %iv, %0
+ br i1 %c1, label %.b2, label %.b6, !prof !1
+
+.b2:
+ %c2 = icmp sgt i32 %iv, 1
+ br i1 %c2, label %.b3, label %.b4
+
+.b3:
+ %t3 = sub nsw i32 %invariant, %iv
+ br label %.b5
+
+.b4:
+ %t4 = add nsw i32 %invariant, %iv
+ br label %.b5
+
+.b5:
+ %p5 = phi i32 [ %t3, %.b3 ], [ %t4, %.b4 ]
+ %t5 = mul nsw i32 %p5, 5
+ br label %.b7
+
+.b6:
+ %t6 = add nsw i32 %iv, 100
+ br label %.b7
+
+.b7:
+ %p7 = phi i32 [ %t6, %.b6 ], [ %t5, %.b5 ]
+ %t7 = add nuw nsw i32 %iv, 1
+ %c7 = icmp eq i32 %t7, %p7
+ br i1 %c7, label %.b1, label %.exit, !prof !3
+
+.exit:
+ ret i32 10
+}
+
+; b1
+; / \
+; b2 b6
+; / \ |
+; b3 b4 |
+; \ / |
+; b5 |
+; \ /
+; b7
+; preheader: 500
+; b1: 16016
+; b3: 8
+; b6: 8
+; Sink load to b3 and b6
+; CHECK: t2
+; CHECK: .preheader:
+; CHECK-NOT: load i32, i32* @g
+; CHECK: .b3:
+; CHECK: load i32, i32* @g
+; CHECK: .b4:
+; CHECK: .b6:
+; CHECK: load i32, i32* @g
+; CHECK: .b7:
+define i32 @t2(i32, i32) #0 {
+ %3 = icmp eq i32 %1, 0
+ br i1 %3, label %.exit, label %.preheader
+
+.preheader:
+ %invariant = load i32, i32* @g
+ br label %.b1
+
+.b1:
+ %iv = phi i32 [ %t7, %.b7 ], [ 0, %.preheader ]
+ %c1 = icmp sgt i32 %iv, %0
+ br i1 %c1, label %.b2, label %.b6, !prof !2
+
+.b2:
+ %c2 = icmp sgt i32 %iv, 1
+ br i1 %c2, label %.b3, label %.b4, !prof !1
+
+.b3:
+ %t3 = sub nsw i32 %invariant, %iv
+ br label %.b5
+
+.b4:
+ %t4 = add nsw i32 5, %iv
+ br label %.b5
+
+.b5:
+ %p5 = phi i32 [ %t3, %.b3 ], [ %t4, %.b4 ]
+ %t5 = mul nsw i32 %p5, 5
+ br label %.b7
+
+.b6:
+ %t6 = add nsw i32 %iv, %invariant
+ br label %.b7
+
+.b7:
+ %p7 = phi i32 [ %t6, %.b6 ], [ %t5, %.b5 ]
+ %t7 = add nuw nsw i32 %iv, 1
+ %c7 = icmp eq i32 %t7, %p7
+ br i1 %c7, label %.b1, label %.exit, !prof !3
+
+.exit:
+ ret i32 10
+}
+
+; b1
+; / \
+; b2 b6
+; / \ |
+; b3 b4 |
+; \ / |
+; b5 |
+; \ /
+; b7
+; preheader: 500
+; b3: 8
+; b5: 16008
+; Do not sink load from preheader.
+; CHECK: t3
+; CHECK: .preheader:
+; CHECK: load i32, i32* @g
+; CHECK: .b1:
+; CHECK-NOT: load i32, i32* @g
+define i32 @t3(i32, i32) #0 {
+ %3 = icmp eq i32 %1, 0
+ br i1 %3, label %.exit, label %.preheader
+
+.preheader:
+ %invariant = load i32, i32* @g
+ br label %.b1
+
+.b1:
+ %iv = phi i32 [ %t7, %.b7 ], [ 0, %.preheader ]
+ %c1 = icmp sgt i32 %iv, %0
+ br i1 %c1, label %.b2, label %.b6, !prof !2
+
+.b2:
+ %c2 = icmp sgt i32 %iv, 1
+ br i1 %c2, label %.b3, label %.b4, !prof !1
+
+.b3:
+ %t3 = sub nsw i32 %invariant, %iv
+ br label %.b5
+
+.b4:
+ %t4 = add nsw i32 5, %iv
+ br label %.b5
+
+.b5:
+ %p5 = phi i32 [ %t3, %.b3 ], [ %t4, %.b4 ]
+ %t5 = mul nsw i32 %p5, %invariant
+ br label %.b7
+
+.b6:
+ %t6 = add nsw i32 %iv, 5
+ br label %.b7
+
+.b7:
+ %p7 = phi i32 [ %t6, %.b6 ], [ %t5, %.b5 ]
+ %t7 = add nuw nsw i32 %iv, 1
+ %c7 = icmp eq i32 %t7, %p7
+ br i1 %c7, label %.b1, label %.exit, !prof !3
+
+.exit:
+ ret i32 10
+}
+
+; For single-BB loop with <=1 avg trip count, sink load to b1
+; CHECK: t4
+; CHECK: .preheader:
+; CHECK-not: load i32, i32* @g
+; CHECK: .b1:
+; CHECK: load i32, i32* @g
+; CHECK: .exit:
+define i32 @t4(i32, i32) #0 {
+.preheader:
+ %invariant = load i32, i32* @g
+ br label %.b1
+
+.b1:
+ %iv = phi i32 [ %t1, %.b1 ], [ 0, %.preheader ]
+ %t1 = add nsw i32 %invariant, %iv
+ %c1 = icmp sgt i32 %iv, %0
+ br i1 %c1, label %.b1, label %.exit, !prof !1
+
+.exit:
+ ret i32 10
+}
+
+; b1
+; / \
+; b2 b6
+; / \ |
+; b3 b4 |
+; \ / |
+; b5 |
+; \ /
+; b7
+; preheader: 1000
+; b2: 15
+; b3: 7
+; b4: 7
+; There is alias store in loop, do not sink load
+; CHECK: t5
+; CHECK: .preheader:
+; CHECK: load i32, i32* @g
+; CHECK: .b1:
+; CHECK-NOT: load i32, i32* @g
+define i32 @t5(i32, i32*) #0 {
+ %3 = icmp eq i32 %0, 0
+ br i1 %3, label %.exit, label %.preheader
+
+.preheader:
+ %invariant = load i32, i32* @g
+ br label %.b1
+
+.b1:
+ %iv = phi i32 [ %t7, %.b7 ], [ 0, %.preheader ]
+ %c1 = icmp sgt i32 %iv, %0
+ br i1 %c1, label %.b2, label %.b6, !prof !1
+
+.b2:
+ %c2 = icmp sgt i32 %iv, 1
+ br i1 %c2, label %.b3, label %.b4
+
+.b3:
+ %t3 = sub nsw i32 %invariant, %iv
+ br label %.b5
+
+.b4:
+ %t4 = add nsw i32 %invariant, %iv
+ br label %.b5
+
+.b5:
+ %p5 = phi i32 [ %t3, %.b3 ], [ %t4, %.b4 ]
+ %t5 = mul nsw i32 %p5, 5
+ br label %.b7
+
+.b6:
+ %t6 = call i32 @foo()
+ br label %.b7
+
+.b7:
+ %p7 = phi i32 [ %t6, %.b6 ], [ %t5, %.b5 ]
+ %t7 = add nuw nsw i32 %iv, 1
+ %c7 = icmp eq i32 %t7, %p7
+ br i1 %c7, label %.b1, label %.exit, !prof !3
+
+.exit:
+ ret i32 10
+}
+
+declare i32 @foo()
+
+!1 = !{!"branch_weights", i32 1, i32 2000}
+!2 = !{!"branch_weights", i32 2000, i32 1}
+!3 = !{!"branch_weights", i32 100, i32 1}
diff --git a/llvm/test/Transforms/LICM/sink.ll b/llvm/test/Transforms/LICM/sink.ll
new file mode 100644
index 00000000000..0eceb3df79f
--- /dev/null
+++ b/llvm/test/Transforms/LICM/sink.ll
@@ -0,0 +1,60 @@
+; RUN: opt -S -licm < %s | FileCheck %s --check-prefix=CHECK-LICM
+; RUN: opt -S -licm < %s | opt -S -loop-sink | FileCheck %s --check-prefix=CHECK-SINK
+
+; Original source code:
+; int g;
+; int foo(int p, int x) {
+; for (int i = 0; i != x; i++)
+; if (__builtin_expect(i == p, 0)) {
+; x += g; x *= g;
+; }
+; return x;
+; }
+;
+; Load of global value g should not be hoisted to preheader.
+
+@g = global i32 0, align 4
+
+define i32 @foo(i32, i32) #0 {
+ %3 = icmp eq i32 %1, 0
+ br i1 %3, label %._crit_edge, label %.lr.ph.preheader
+
+.lr.ph.preheader:
+ br label %.lr.ph
+
+; CHECK-LICM: .lr.ph.preheader:
+; CHECK-LICM: load i32, i32* @g
+; CHECK-LICM: br label %.lr.ph
+
+.lr.ph:
+ %.03 = phi i32 [ %8, %.combine ], [ 0, %.lr.ph.preheader ]
+ %.012 = phi i32 [ %.1, %.combine ], [ %1, %.lr.ph.preheader ]
+ %4 = icmp eq i32 %.03, %0
+ br i1 %4, label %.then, label %.combine, !prof !1
+
+.then:
+ %5 = load i32, i32* @g, align 4
+ %6 = add nsw i32 %5, %.012
+ %7 = mul nsw i32 %6, %5
+ br label %.combine
+
+; CHECK-SINK: .then:
+; CHECK-SINK: load i32, i32* @g
+; CHECK-SINK: br label %.combine
+
+.combine:
+ %.1 = phi i32 [ %7, %.then ], [ %.012, %.lr.ph ]
+ %8 = add nuw nsw i32 %.03, 1
+ %9 = icmp eq i32 %8, %.1
+ br i1 %9, label %._crit_edge.loopexit, label %.lr.ph
+
+._crit_edge.loopexit:
+ %.1.lcssa = phi i32 [ %.1, %.combine ]
+ br label %._crit_edge
+
+._crit_edge:
+ %.01.lcssa = phi i32 [ 0, %2 ], [ %.1.lcssa, %._crit_edge.loopexit ]
+ ret i32 %.01.lcssa
+}
+
+!1 = !{!"branch_weights", i32 1, i32 2000}
OpenPOWER on IntegriCloud