summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Transforms/Utils/LoopUnroll.cpp45
-rw-r--r--llvm/test/Transforms/LoopUnroll/revisit.ll6
-rw-r--r--llvm/test/Transforms/LoopUnroll/runtime-loop5.ll7
-rw-r--r--llvm/test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll4
-rw-r--r--llvm/test/Transforms/LoopUnroll/unroll-pragmas.ll24
5 files changed, 65 insertions, 21 deletions
diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index a675797af27..3c669ce644e 100644
--- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -216,6 +216,45 @@ const Loop* llvm::addClonedBlockToLoopInfo(BasicBlock *OriginalBB,
}
}
+/// The function chooses which type of unroll (epilog or prolog) is more
+/// profitabale.
+/// Epilog unroll is more profitable when there is PHI that starts from
+/// constant. In this case epilog will leave PHI start from constant,
+/// but prolog will convert it to non-constant.
+///
+/// loop:
+/// PN = PHI [I, Latch], [CI, PreHeader]
+/// I = foo(PN)
+/// ...
+///
+/// Epilog unroll case.
+/// loop:
+/// PN = PHI [I2, Latch], [CI, PreHeader]
+/// I1 = foo(PN)
+/// I2 = foo(I1)
+/// ...
+/// Prolog unroll case.
+/// NewPN = PHI [PrologI, Prolog], [CI, PreHeader]
+/// loop:
+/// PN = PHI [I2, Latch], [NewPN, PreHeader]
+/// I1 = foo(PN)
+/// I2 = foo(I1)
+/// ...
+///
+static bool isEpilogProfitable(Loop *L) {
+ BasicBlock *PreHeader = L->getLoopPreheader();
+ BasicBlock *Header = L->getHeader();
+ assert(PreHeader && Header);
+ for (Instruction &BBI : *Header) {
+ PHINode *PN = dyn_cast<PHINode>(&BBI);
+ if (!PN)
+ break;
+ if (isa<ConstantInt>(PN->getIncomingValueForBlock(PreHeader)))
+ return true;
+ }
+ return false;
+}
+
/// Unroll the given loop by Count. The loop must be in LCSSA form. Returns true
/// if unrolling was successful, or false if the loop was unmodified. Unrolling
/// can only fail when the loop's latch block is not terminated by a conditional
@@ -359,9 +398,13 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
"convergent operation.");
});
+ bool EpilogProfitability =
+ UnrollRuntimeEpilog.getNumOccurrences() ? UnrollRuntimeEpilog
+ : isEpilogProfitable(L);
+
if (RuntimeTripCount && TripMultiple % Count != 0 &&
!UnrollRuntimeLoopRemainder(L, Count, AllowExpensiveTripCount,
- UnrollRuntimeEpilog, LI, SE, DT,
+ EpilogProfitability, LI, SE, DT,
PreserveLCSSA)) {
if (Force)
RuntimeTripCount = false;
diff --git a/llvm/test/Transforms/LoopUnroll/revisit.ll b/llvm/test/Transforms/LoopUnroll/revisit.ll
index 88c9f7ba21a..fddf6cd1c4e 100644
--- a/llvm/test/Transforms/LoopUnroll/revisit.ll
+++ b/llvm/test/Transforms/LoopUnroll/revisit.ll
@@ -138,11 +138,11 @@ l0.0.latch:
; CHECK-CHILDREN: LoopUnrollPass on Loop at depth 2 containing: %l0.0<header>
; CHECK-CHILDREN-NOT: LoopUnrollPass
;
-; Revisit the children of the outer loop that are part of the prologue.
+; Revisit the children of the outer loop that are part of the epilogue.
;
-; CHECK: LoopUnrollPass on Loop at depth 2 containing: %l0.0.0.prol<header>
+; CHECK: LoopUnrollPass on Loop at depth 2 containing: %l0.0.0.epil<header>
; CHECK-NOT: LoopUnrollPass
-; CHECK: LoopUnrollPass on Loop at depth 2 containing: %l0.0.1.prol<header>
+; CHECK: LoopUnrollPass on Loop at depth 2 containing: %l0.0.1.epil<header>
; CHECK-NOT: LoopUnrollPass
l0.latch:
br label %l0
diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop5.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop5.ll
index 6340058411f..86a26baca65 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-loop5.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-loop5.ll
@@ -14,9 +14,6 @@ entry:
%cmp1 = icmp eq i3 %n, 0
br i1 %cmp1, label %for.end, label %for.body
-; UNROLL-16-NOT: for.body.prol:
-; UNROLL-4: for.body.prol:
-
for.body: ; preds = %for.body, %entry
; UNROLL-16-LABEL: for.body:
; UNROLL-4-LABEL: for.body:
@@ -42,6 +39,10 @@ for.body: ; preds = %for.body, %entry
; UNROLL-16-LABEL: for.end
; UNROLL-4-LABEL: for.end
+
+; UNROLL-16-NOT: for.body.epil:
+; UNROLL-4: for.body.epil:
+
for.end: ; preds = %for.body, %entry
%sum.0.lcssa = phi i3 [ 0, %entry ], [ %add, %for.body ]
ret i3 %sum.0.lcssa
diff --git a/llvm/test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll b/llvm/test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll
index f7add40b9d1..6778a52b3af 100644
--- a/llvm/test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll
+++ b/llvm/test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll
@@ -3,12 +3,12 @@
@known_constant = internal unnamed_addr constant [9 x i32] [i32 0, i32 -1, i32 0, i32 -1, i32 5, i32 -1, i32 0, i32 -1, i32 0], align 16
; CHECK-LABEL: @bar_prof
-; CHECK: loop.prol:
; CHECK: loop:
; CHECK: %mul = mul
; CHECK: %mul.1 = mul
; CHECK: %mul.2 = mul
; CHECK: %mul.3 = mul
+; CHECK: loop.epil:
define i32 @bar_prof(i32* noalias nocapture readonly %src, i64 %c) !prof !1 {
entry:
br label %loop
@@ -32,7 +32,7 @@ loop.end:
}
; CHECK-LABEL: @bar_prof_flat
-; CHECK-NOT: loop.prol
+; CHECK-NOT: loop.epil
define i32 @bar_prof_flat(i32* noalias nocapture readonly %src, i64 %c) !prof !1 {
entry:
br label %loop
diff --git a/llvm/test/Transforms/LoopUnroll/unroll-pragmas.ll b/llvm/test/Transforms/LoopUnroll/unroll-pragmas.ll
index 2843e627b3c..88f32c92d69 100644
--- a/llvm/test/Transforms/LoopUnroll/unroll-pragmas.ll
+++ b/llvm/test/Transforms/LoopUnroll/unroll-pragmas.ll
@@ -171,10 +171,6 @@ for.end: ; preds = %for.body, %entry
; should be duplicated (original and 4x unrolled).
;
; CHECK-LABEL: @runtime_loop_with_count4(
-; CHECK: for.body.prol:
-; CHECK: store
-; CHECK-NOT: store
-; CHECK: br i1
; CHECK: for.body
; CHECK: store
; CHECK: store
@@ -182,6 +178,10 @@ for.end: ; preds = %for.body, %entry
; CHECK: store
; CHECK-NOT: store
; CHECK: br i1
+; CHECK: for.body.epil:
+; CHECK: store
+; CHECK-NOT: store
+; CHECK: br i1
define void @runtime_loop_with_count4(i32* nocapture %a, i32 %b) {
entry:
%cmp3 = icmp sgt i32 %b, 0
@@ -287,10 +287,6 @@ for.end: ; preds = %for.body
; (original and 8x).
;
; CHECK-LABEL: @runtime_loop_with_enable(
-; CHECK: for.body.prol:
-; CHECK: store
-; CHECK-NOT: store
-; CHECK: br i1
; CHECK: for.body:
; CHECK: store i32
; CHECK: store i32
@@ -302,6 +298,10 @@ for.end: ; preds = %for.body
; CHECK: store i32
; CHECK-NOT: store i32
; CHECK: br i1
+; CHECK: for.body.epil:
+; CHECK: store
+; CHECK-NOT: store
+; CHECK: br i1
define void @runtime_loop_with_enable(i32* nocapture %a, i32 %b) {
entry:
%cmp3 = icmp sgt i32 %b, 0
@@ -328,16 +328,16 @@ for.end: ; preds = %for.body, %entry
; should be duplicated (original and 3x unrolled).
;
; CHECK-LABEL: @runtime_loop_with_count3(
-; CHECK: for.body.prol:
-; CHECK: store
-; CHECK-NOT: store
-; CHECK: br i1
; CHECK: for.body
; CHECK: store
; CHECK: store
; CHECK: store
; CHECK-NOT: store
; CHECK: br i1
+; CHECK: for.body.epil:
+; CHECK: store
+; CHECK-NOT: store
+; CHECK: br i1
define void @runtime_loop_with_count3(i32* nocapture %a, i32 %b) {
entry:
%cmp3 = icmp sgt i32 %b, 0
OpenPOWER on IntegriCloud