summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
authorXin Tong <trent.xin.tong@gmail.com>2017-01-02 20:27:23 +0000
committerXin Tong <trent.xin.tong@gmail.com>2017-01-02 20:27:23 +0000
commit2940231ff0c257cbeb983a8a6dbcd4ed6b1bfcbb (patch)
tree9cef24a92dd4b37437cb0b6236f451800fac2cad /llvm
parentf42339015643d7809a8b81afb8685fb19e069c0f (diff)
downloadbcm5719-llvm-2940231ff0c257cbeb983a8a6dbcd4ed6b1bfcbb.tar.gz
bcm5719-llvm-2940231ff0c257cbeb983a8a6dbcd4ed6b1bfcbb.zip
Make sure total loop body weight is preserved in loop peeling
Summary: Regardless how the loop body weight is distributed, we should preserve total loop body weight. i.e. we should have same weight reaching the body of the loop or its duplicates in peeled and unpeeled case. Reviewers: mkuper, davidxl, anemet Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D28179 llvm-svn: 290833
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp25
-rw-r--r--llvm/test/Transforms/LoopUnroll/peel-loop-pgo.ll2
2 files changed, 18 insertions, 9 deletions
diff --git a/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp b/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp
index dc526a20c90..842cf31f2e3 100644
--- a/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp
@@ -335,10 +335,12 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1);
uint64_t TrueWeight, FalseWeight;
- uint64_t ExitWeight = 0, BackEdgeWeight = 0;
+ uint64_t ExitWeight = 0, CurHeaderWeight = 0;
if (LatchBR->extractProfMetadata(TrueWeight, FalseWeight)) {
ExitWeight = HeaderIdx ? TrueWeight : FalseWeight;
- BackEdgeWeight = HeaderIdx ? FalseWeight : TrueWeight;
+ // The # of times the loop body executes is the sum of the exit block
+ // weight and the # of times the backedges are taken.
+ CurHeaderWeight = TrueWeight + FalseWeight;
}
// For each peeled-off iteration, make a copy of the loop.
@@ -346,15 +348,14 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
SmallVector<BasicBlock *, 8> NewBlocks;
ValueToValueMapTy VMap;
- // The exit weight of the previous iteration is the header entry weight
- // of the current iteration. So this is exactly how many dynamic iterations
- // the current peeled-off static iteration uses up.
+ // Subtract the exit weight from the current header weight -- the exit
+ // weight is exactly the weight of the previous iteration's header.
// FIXME: due to the way the distribution is constructed, we need a
// guard here to make sure we don't end up with non-positive weights.
- if (ExitWeight < BackEdgeWeight)
- BackEdgeWeight -= ExitWeight;
+ if (ExitWeight < CurHeaderWeight)
+ CurHeaderWeight -= ExitWeight;
else
- BackEdgeWeight = 1;
+ CurHeaderWeight = 1;
cloneLoopBlocks(L, Iter, InsertTop, InsertBot, Exit,
NewBlocks, LoopBlocks, VMap, LVMap, LI);
@@ -388,6 +389,14 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
// Adjust the branch weights on the loop exit.
if (ExitWeight) {
+ // The backedge count is the difference of current header weight and
+ // current loop exit weight. If the current header weight is smaller than
+ // the current loop exit weight, we mark the loop backedge weight as 1.
+ uint64_t BackEdgeWeight = 0;
+ if (ExitWeight < CurHeaderWeight)
+ BackEdgeWeight = CurHeaderWeight - ExitWeight;
+ else
+ BackEdgeWeight = 1;
MDBuilder MDB(LatchBR->getContext());
MDNode *WeightNode =
HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight)
diff --git a/llvm/test/Transforms/LoopUnroll/peel-loop-pgo.ll b/llvm/test/Transforms/LoopUnroll/peel-loop-pgo.ll
index 2987b03c04d..18309b0691f 100644
--- a/llvm/test/Transforms/LoopUnroll/peel-loop-pgo.ll
+++ b/llvm/test/Transforms/LoopUnroll/peel-loop-pgo.ll
@@ -43,5 +43,5 @@ for.end: ; preds = %for.cond.for.end_cr
;CHECK: !1 = !{!"branch_weights", i32 900, i32 101}
;CHECK: !2 = !{!"branch_weights", i32 540, i32 360}
;CHECK: !3 = !{!"branch_weights", i32 162, i32 378}
-;CHECK: !4 = !{!"branch_weights", i32 560, i32 162}
+;CHECK: !4 = !{!"branch_weights", i32 1399, i32 162}
OpenPOWER on IntegriCloud