Add Loop Sink pass to reverse the LICM based of basic block frequency.

Summary: LICM may hoist instructions to preheader speculatively. Before code generation, we need to sink down the hoisted instructions inside to loop if it's beneficial. This pass is a reverse of LICM: looking at instructions in preheader and sinks the instruction to basic blocks inside the loop body if basic block frequency is smaller than the preheader frequency. Reviewers: hfinkel, davidxl, chandlerc Subscribers: anna, modocache, mgorny, beanz, reames, dberlin, chandlerc, mcrosier, junbuml, sanjoy, mzolotukhin, llvm-commits Differential Revision: https://reviews.llvm.org/D22778 llvm-svn: 285308
author: Dehao Chen <dehao@google.com> 2016-10-27 16:30:08 +0000
committer: Dehao Chen <dehao@google.com> 2016-10-27 16:30:08 +0000
commit: b94c09baa058e57b1e931746745050e19785ef30 (patch)
tree: a2d57bc5d38400fcb744b91d3cb49282db424056 /llvm/test
parent: 9c9d9cdcf8e996455318bbe334d1d36622338b70 (diff)
download: bcm5719-llvm-b94c09baa058e57b1e931746745050e19785ef30.tar.gz
bcm5719-llvm-b94c09baa058e57b1e931746745050e19785ef30.zip
2 files changed, 346 insertions, 0 deletions
diff --git a/llvm/test/Transforms/LICM/loopsink.ll b/llvm/test/Transforms/LICM/loopsink.ll
new file mode 100644
index 00000000000..564a35b575e
--- /dev/null
+++ b/llvm/test/Transforms/LICM/loopsink.ll
@@ -0,0 +1,286 @@
+; RUN: opt -S -loop-sink < %s | FileCheck %s
+
+@g = global i32 0, align 4
+
+;     b1
+;    /  \
+;   b2  b6
+;  /  \  |
+; b3  b4 |
+;  \  /  |
+;   b5   |
+;    \  /
+;     b7
+; preheader: 1000
+; b2: 15
+; b3: 7
+; b4: 7
+; Sink load to b2
+; CHECK: t1
+; CHECK: .b2:
+; CHECK: load i32, i32* @g
+; CHECK: .b3:
+; CHECK-NOT:  load i32, i32* @g
+define i32 @t1(i32, i32) #0 {
+  %3 = icmp eq i32 %1, 0
+  br i1 %3, label %.exit, label %.preheader
+
+.preheader:
+  %invariant = load i32, i32* @g
+  br label %.b1
+
+.b1:
+  %iv = phi i32 [ %t7, %.b7 ], [ 0, %.preheader ]
+  %c1 = icmp sgt i32 %iv, %0
+  br i1 %c1, label %.b2, label %.b6, !prof !1
+
+.b2:
+  %c2 = icmp sgt i32 %iv, 1
+  br i1 %c2, label %.b3, label %.b4
+
+.b3:
+  %t3 = sub nsw i32 %invariant, %iv
+  br label %.b5
+
+.b4:
+  %t4 = add nsw i32 %invariant, %iv
+  br label %.b5
+
+.b5:
+  %p5 = phi i32 [ %t3, %.b3 ], [ %t4, %.b4 ]
+  %t5 = mul nsw i32 %p5, 5
+  br label %.b7
+
+.b6:
+  %t6 = add nsw i32 %iv, 100
+  br label %.b7
+
+.b7:
+  %p7 = phi i32 [ %t6, %.b6 ], [ %t5, %.b5 ]
+  %t7 = add nuw nsw i32 %iv, 1
+  %c7 = icmp eq i32 %t7, %p7
+  br i1 %c7, label %.b1, label %.exit, !prof !3
+
+.exit:
+  ret i32 10
+}
+
+;     b1
+;    /  \
+;   b2  b6
+;  /  \  |
+; b3  b4 |
+;  \  /  |
+;   b5   |
+;    \  /
+;     b7
+; preheader: 500
+; b1: 16016
+; b3: 8
+; b6: 8
+; Sink load to b3 and b6
+; CHECK: t2
+; CHECK: .preheader:
+; CHECK-NOT: load i32, i32* @g
+; CHECK: .b3:
+; CHECK: load i32, i32* @g
+; CHECK: .b4:
+; CHECK: .b6:
+; CHECK: load i32, i32* @g
+; CHECK: .b7:
+define i32 @t2(i32, i32) #0 {
+  %3 = icmp eq i32 %1, 0
+  br i1 %3, label %.exit, label %.preheader
+
+.preheader:
+  %invariant = load i32, i32* @g
+  br label %.b1
+
+.b1:
+  %iv = phi i32 [ %t7, %.b7 ], [ 0, %.preheader ]
+  %c1 = icmp sgt i32 %iv, %0
+  br i1 %c1, label %.b2, label %.b6, !prof !2
+
+.b2:
+  %c2 = icmp sgt i32 %iv, 1
+  br i1 %c2, label %.b3, label %.b4, !prof !1
+
+.b3:
+  %t3 = sub nsw i32 %invariant, %iv
+  br label %.b5
+
+.b4:
+  %t4 = add nsw i32 5, %iv
+  br label %.b5
+
+.b5:
+  %p5 = phi i32 [ %t3, %.b3 ], [ %t4, %.b4 ]
+  %t5 = mul nsw i32 %p5, 5
+  br label %.b7
+
+.b6:
+  %t6 = add nsw i32 %iv, %invariant
+  br label %.b7
+
+.b7:
+  %p7 = phi i32 [ %t6, %.b6 ], [ %t5, %.b5 ]
+  %t7 = add nuw nsw i32 %iv, 1
+  %c7 = icmp eq i32 %t7, %p7
+  br i1 %c7, label %.b1, label %.exit, !prof !3
+
+.exit:
+  ret i32 10
+}
+
+;     b1
+;    /  \
+;   b2  b6
+;  /  \  |
+; b3  b4 |
+;  \  /  |
+;   b5   |
+;    \  /
+;     b7
+; preheader: 500
+; b3: 8
+; b5: 16008
+; Do not sink load from preheader.
+; CHECK: t3
+; CHECK: .preheader:
+; CHECK: load i32, i32* @g
+; CHECK: .b1:
+; CHECK-NOT: load i32, i32* @g
+define i32 @t3(i32, i32) #0 {
+  %3 = icmp eq i32 %1, 0
+  br i1 %3, label %.exit, label %.preheader
+
+.preheader:
+  %invariant = load i32, i32* @g
+  br label %.b1
+
+.b1:
+  %iv = phi i32 [ %t7, %.b7 ], [ 0, %.preheader ]
+  %c1 = icmp sgt i32 %iv, %0
+  br i1 %c1, label %.b2, label %.b6, !prof !2
+
+.b2:
+  %c2 = icmp sgt i32 %iv, 1
+  br i1 %c2, label %.b3, label %.b4, !prof !1
+
+.b3:
+  %t3 = sub nsw i32 %invariant, %iv
+  br label %.b5
+
+.b4:
+  %t4 = add nsw i32 5, %iv
+  br label %.b5
+
+.b5:
+  %p5 = phi i32 [ %t3, %.b3 ], [ %t4, %.b4 ]
+  %t5 = mul nsw i32 %p5, %invariant
+  br label %.b7
+
+.b6:
+  %t6 = add nsw i32 %iv, 5
+  br label %.b7
+
+.b7:
+  %p7 = phi i32 [ %t6, %.b6 ], [ %t5, %.b5 ]
+  %t7 = add nuw nsw i32 %iv, 1
+  %c7 = icmp eq i32 %t7, %p7
+  br i1 %c7, label %.b1, label %.exit, !prof !3
+
+.exit:
+  ret i32 10
+}
+
+; For single-BB loop with <=1 avg trip count, sink load to b1
+; CHECK: t4
+; CHECK: .preheader:
+; CHECK-not: load i32, i32* @g
+; CHECK: .b1:
+; CHECK: load i32, i32* @g
+; CHECK: .exit:
+define i32 @t4(i32, i32) #0 {
+.preheader:
+  %invariant = load i32, i32* @g
+  br label %.b1
+
+.b1:
+  %iv = phi i32 [ %t1, %.b1 ], [ 0, %.preheader ]
+  %t1 = add nsw i32 %invariant, %iv
+  %c1 = icmp sgt i32 %iv, %0
+  br i1 %c1, label %.b1, label %.exit, !prof !1
+
+.exit:
+  ret i32 10
+}
+
+;     b1
+;    /  \
+;   b2  b6
+;  /  \  |
+; b3  b4 |
+;  \  /  |
+;   b5   |
+;    \  /
+;     b7
+; preheader: 1000
+; b2: 15
+; b3: 7
+; b4: 7
+; There is alias store in loop, do not sink load
+; CHECK: t5
+; CHECK: .preheader:
+; CHECK: load i32, i32* @g
+; CHECK: .b1:
+; CHECK-NOT: load i32, i32* @g
+define i32 @t5(i32, i32*) #0 {
+  %3 = icmp eq i32 %0, 0
+  br i1 %3, label %.exit, label %.preheader
+
+.preheader:
+  %invariant = load i32, i32* @g
+  br label %.b1
+
+.b1:
+  %iv = phi i32 [ %t7, %.b7 ], [ 0, %.preheader ]
+  %c1 = icmp sgt i32 %iv, %0
+  br i1 %c1, label %.b2, label %.b6, !prof !1
+
+.b2:
+  %c2 = icmp sgt i32 %iv, 1
+  br i1 %c2, label %.b3, label %.b4
+
+.b3:
+  %t3 = sub nsw i32 %invariant, %iv
+  br label %.b5
+
+.b4:
+  %t4 = add nsw i32 %invariant, %iv
+  br label %.b5
+
+.b5:
+  %p5 = phi i32 [ %t3, %.b3 ], [ %t4, %.b4 ]
+  %t5 = mul nsw i32 %p5, 5
+  br label %.b7
+
+.b6:
+  %t6 = call i32 @foo()
+  br label %.b7
+
+.b7:
+  %p7 = phi i32 [ %t6, %.b6 ], [ %t5, %.b5 ]
+  %t7 = add nuw nsw i32 %iv, 1
+  %c7 = icmp eq i32 %t7, %p7
+  br i1 %c7, label %.b1, label %.exit, !prof !3
+
+.exit:
+  ret i32 10
+}
+
+declare i32 @foo()
+
+!1 = !{!"branch_weights", i32 1, i32 2000}
+!2 = !{!"branch_weights", i32 2000, i32 1}
+!3 = !{!"branch_weights", i32 100, i32 1}
diff --git a/llvm/test/Transforms/LICM/sink.ll b/llvm/test/Transforms/LICM/sink.ll
new file mode 100644
index 00000000000..0eceb3df79f
--- /dev/null
+++ b/llvm/test/Transforms/LICM/sink.ll
@@ -0,0 +1,60 @@
+; RUN: opt -S -licm < %s | FileCheck %s --check-prefix=CHECK-LICM
+; RUN: opt -S -licm < %s | opt -S -loop-sink | FileCheck %s --check-prefix=CHECK-SINK
+
+; Original source code:
+; int g;
+; int foo(int p, int x) {
+;   for (int i = 0; i != x; i++)
+;     if (__builtin_expect(i == p, 0)) {
+;       x += g; x *= g;
+;     }
+;   return x;
+; }
+;
+; Load of global value g should not be hoisted to preheader.
+
+@g = global i32 0, align 4
+
+define i32 @foo(i32, i32) #0 {
+  %3 = icmp eq i32 %1, 0
+  br i1 %3, label %._crit_edge, label %.lr.ph.preheader
+
+.lr.ph.preheader:
+  br label %.lr.ph
+
+; CHECK-LICM: .lr.ph.preheader:
+; CHECK-LICM: load i32, i32* @g
+; CHECK-LICM: br label %.lr.ph
+
+.lr.ph:
+  %.03 = phi i32 [ %8, %.combine ], [ 0, %.lr.ph.preheader ]
+  %.012 = phi i32 [ %.1, %.combine ], [ %1, %.lr.ph.preheader ]
+  %4 = icmp eq i32 %.03, %0
+  br i1 %4, label %.then, label %.combine, !prof !1
+
+.then:
+  %5 = load i32, i32* @g, align 4
+  %6 = add nsw i32 %5, %.012
+  %7 = mul nsw i32 %6, %5
+  br label %.combine
+
+; CHECK-SINK: .then:
+; CHECK-SINK: load i32, i32* @g
+; CHECK-SINK: br label %.combine
+
+.combine:
+  %.1 = phi i32 [ %7, %.then ], [ %.012, %.lr.ph ]
+  %8 = add nuw nsw i32 %.03, 1
+  %9 = icmp eq i32 %8, %.1
+  br i1 %9, label %._crit_edge.loopexit, label %.lr.ph
+
+._crit_edge.loopexit:
+  %.1.lcssa = phi i32 [ %.1, %.combine ]
+  br label %._crit_edge
+
+._crit_edge:
+  %.01.lcssa = phi i32 [ 0, %2 ], [ %.1.lcssa, %._crit_edge.loopexit ]
+  ret i32 %.01.lcssa
+}
+
+!1 = !{!"branch_weights", i32 1, i32 2000}
author	Dehao Chen <dehao@google.com>	2016-10-27 16:30:08 +0000
committer	Dehao Chen <dehao@google.com>	2016-10-27 16:30:08 +0000
commit	b94c09baa058e57b1e931746745050e19785ef30 (patch)
tree	a2d57bc5d38400fcb744b91d3cb49282db424056 /llvm/test
parent	9c9d9cdcf8e996455318bbe334d1d36622338b70 (diff)
download	bcm5719-llvm-b94c09baa058e57b1e931746745050e19785ef30.tar.gz bcm5719-llvm-b94c09baa058e57b1e931746745050e19785ef30.zip