summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/include/llvm/Transforms/Scalar/JumpThreading.h2
-rw-r--r--llvm/lib/Transforms/Scalar/JumpThreading.cpp53
-rw-r--r--llvm/test/Transforms/JumpThreading/static-profile.ll119
3 files changed, 173 insertions, 1 deletions
diff --git a/llvm/include/llvm/Transforms/Scalar/JumpThreading.h b/llvm/include/llvm/Transforms/Scalar/JumpThreading.h
index 13ab78ade46..6fcb2f17b0e 100644
--- a/llvm/include/llvm/Transforms/Scalar/JumpThreading.h
+++ b/llvm/include/llvm/Transforms/Scalar/JumpThreading.h
@@ -134,6 +134,8 @@ private:
const char *Suffix);
void UpdateBlockFreqAndEdgeWeight(BasicBlock *PredBB, BasicBlock *BB,
BasicBlock *NewBB, BasicBlock *SuccBB);
+ /// Check if the block has profile metadata for its outgoing edges.
+ bool doesBlockHaveProfileData(BasicBlock *BB);
};
} // end namespace llvm
diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp
index 080f7d74145..6375f089790 100644
--- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -1620,6 +1620,23 @@ BasicBlock *JumpThreadingPass::SplitBlockPreds(BasicBlock *BB,
return PredBB;
}
+bool JumpThreadingPass::doesBlockHaveProfileData(BasicBlock *BB) {
+ const TerminatorInst *TI = BB->getTerminator();
+ assert(TI->getNumSuccessors() > 1 && "not a split");
+
+ MDNode *WeightsNode = TI->getMetadata(LLVMContext::MD_prof);
+ if (!WeightsNode)
+ return false;
+
+ MDString *MDName = cast<MDString>(WeightsNode->getOperand(0));
+ if (MDName->getString() != "branch_weights")
+ return false;
+
+ // Ensure there are weights for all of the successors. Note that the first
+ // operand to the metadata node is a name, not a weight.
+ return WeightsNode->getNumOperands() == TI->getNumSuccessors() + 1;
+}
+
/// Update the block frequency of BB and branch weight and the metadata on the
/// edge BB->SuccBB. This is done by scaling the weight of BB->SuccBB by 1 -
/// Freq(PredBB->BB) / Freq(BB->SuccBB).
@@ -1670,7 +1687,41 @@ void JumpThreadingPass::UpdateBlockFreqAndEdgeWeight(BasicBlock *PredBB,
for (int I = 0, E = BBSuccProbs.size(); I < E; I++)
BPI->setEdgeProbability(BB, I, BBSuccProbs[I]);
- if (BBSuccProbs.size() >= 2) {
+ // Update the profile metadata as well.
+ //
+ // Don't do this if the profile of the transformed blocks was statically
+ // estimated. (This could occur despite the function having an entry
+ // frequency in completely cold parts of the CFG.)
+ //
+ // In this case we don't want to suggest to subsequent passes that the
+ // calculated weights are fully consistent. Consider this graph:
+ //
+ // check_1
+ // 50% / |
+ // eq_1 | 50%
+ // \ |
+ // check_2
+ // 50% / |
+ // eq_2 | 50%
+ // \ |
+ // check_3
+ // 50% / |
+ // eq_3 | 50%
+ // \ |
+ //
+ // Assuming the blocks check_* all compare the same value against 1, 2 and 3,
+ // the overall probabilities are inconsistent; the total probability that the
+ // value is either 1, 2 or 3 is 150%.
+ //
+ // As a consequence if we thread eq_1 -> check_2 to check_3, check_2->check_3
+ // becomes 0%. This is even worse if the edge whose probability becomes 0% is
+ // the loop exit edge. Then based solely on static estimation we would assume
+ // the loop was extremely hot.
+ //
+ // FIXME this locally as well so that BPI and BFI are consistent as well. We
+ // shouldn't make edges extremely likely or unlikely based solely on static
+ // estimation.
+ if (BBSuccProbs.size() >= 2 && doesBlockHaveProfileData(BB)) {
SmallVector<uint32_t, 4> Weights;
for (auto Prob : BBSuccProbs)
Weights.push_back(Prob.getNumerator());
diff --git a/llvm/test/Transforms/JumpThreading/static-profile.ll b/llvm/test/Transforms/JumpThreading/static-profile.ll
new file mode 100644
index 00000000000..d634a607eab
--- /dev/null
+++ b/llvm/test/Transforms/JumpThreading/static-profile.ll
@@ -0,0 +1,119 @@
+; RUN: opt -S -jump-threading < %s | FileCheck %s
+
+; Check that based solely on static profile estimation we don't update the
+; branch-weight metadata. Even if the function has an entry frequency, a
+; completely cold part of the CFG may be statically estimated.
+
+; For example in the loop below, jump threading would update the weight of the
+; loop-exiting branch to 0, drastically inflating the frequency of the loop
+; (in the range of billions).
+;
+; This is the CFG of the loop. There is no run-time profile info for edges
+; inside the loop, so branch and block frequencies are estimated as shown:
+;
+; check_1 (16)
+; (8) / |
+; eq_1 | (8)
+; \ |
+; check_2 (16)
+; (8) / |
+; eq_2 | (8)
+; \ |
+; check_3 (16)
+; (1) / |
+; (loop exit) | (15)
+; |
+; (back edge)
+;
+; First we thread eq_1->check_2 to check_3. Frequencies are updated to remove
+; the frequency of eq_1 from check_2 and then the false edge leaving check_2
+; (changed frequencies are highlighted with * *):
+;
+; check_1 (16)
+; (8) / |
+; eq_1~ | (8)
+; / |
+; / check_2 (*8*)
+; / (8) / |
+; \ eq_2 | (*0*)
+; \ \ |
+; ` --- check_3 (16)
+; (1) / |
+; (loop exit) | (15)
+; |
+; (back edge)
+;
+; Next we thread eq_1->check_3 and eq_2->check_3 to check_1 as new back edges.
+; Frequencies are updated to remove the frequency of eq_1 and eq_3 from
+; check_3 and then the false edge leaving check_3 (changed frequencies are
+; highlighted with * *):
+;
+; check_1 (16)
+; (8) / |
+; eq_1~ | (8)
+; / |
+; / check_2 (*8*)
+; / (8) / |
+; /-- eq_2~ | (*0*)
+; (back edge) |
+; check_3 (*0*)
+; (*0*) / |
+; (loop exit) | (*0*)
+; |
+; (back edge)
+;
+; As a result, the loop exit edge ends up with 0 frequency which in turn makes
+; the loop header to have maximum frequency.
+
+declare void @bar()
+
+define void @foo(i32 *%p, i32 %n) !prof !0 {
+entry:
+ %enter_loop = icmp eq i32 %n, 0
+ br i1 %enter_loop, label %exit, label %check_1, !prof !1
+; CHECK: br i1 %enter_loop, label %exit, label %check_1, !prof !1
+
+check_1:
+ %v = load i32, i32* %p
+ %cond1 = icmp eq i32 %v, 1
+ br i1 %cond1, label %eq_1, label %check_2
+; No metadata:
+; CHECK: br i1 %cond1, label %check_2.thread, label %check_2{{$}}
+
+eq_1:
+ call void @bar()
+ br label %check_2
+; Verify the new backedge:
+; CHECK: check_2.thread:
+; CHECK-NEXT: call void @bar()
+; CHECK-NEXT: br label %check_1
+
+check_2:
+ %cond2 = icmp eq i32 %v, 2
+ br i1 %cond2, label %eq_2, label %check_3
+; No metadata:
+; CHECK: br i1 %cond2, label %eq_2, label %check_3{{$}}
+
+eq_2:
+ call void @bar()
+ br label %check_3
+; Verify the new backedge:
+; CHECK: eq_2:
+; CHECK-NEXT: call void @bar()
+; CHECK-NEXT: br label %check_1
+
+check_3:
+ %condE = icmp eq i32 %v, 3
+ br i1 %condE, label %exit, label %check_1
+; No metadata:
+; CHECK: br i1 %condE, label %exit, label %check_1{{$}}
+
+exit:
+ ret void
+}
+
+!0 = !{!"function_entry_count", i64 120}
+; CHECK-NOT: branch_weights
+!1 = !{!"branch_weights", i32 119, i32 1}
+; CHECK: !1 = !{!"branch_weights", i32 119, i32 1}
+; CHECK-NOT: branch_weights
OpenPOWER on IntegriCloud