summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms')
-rw-r--r--llvm/lib/Transforms/Scalar/JumpThreading.cpp53
1 files changed, 52 insertions, 1 deletions
diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp
index 080f7d74145..6375f089790 100644
--- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -1620,6 +1620,23 @@ BasicBlock *JumpThreadingPass::SplitBlockPreds(BasicBlock *BB,
return PredBB;
}
+bool JumpThreadingPass::doesBlockHaveProfileData(BasicBlock *BB) {
+ const TerminatorInst *TI = BB->getTerminator();
+ assert(TI->getNumSuccessors() > 1 && "not a split");
+
+ MDNode *WeightsNode = TI->getMetadata(LLVMContext::MD_prof);
+ if (!WeightsNode)
+ return false;
+
+ MDString *MDName = cast<MDString>(WeightsNode->getOperand(0));
+ if (MDName->getString() != "branch_weights")
+ return false;
+
+ // Ensure there are weights for all of the successors. Note that the first
+ // operand to the metadata node is a name, not a weight.
+ return WeightsNode->getNumOperands() == TI->getNumSuccessors() + 1;
+}
+
/// Update the block frequency of BB and branch weight and the metadata on the
/// edge BB->SuccBB. This is done by scaling the weight of BB->SuccBB by 1 -
/// Freq(PredBB->BB) / Freq(BB->SuccBB).
@@ -1670,7 +1687,41 @@ void JumpThreadingPass::UpdateBlockFreqAndEdgeWeight(BasicBlock *PredBB,
for (int I = 0, E = BBSuccProbs.size(); I < E; I++)
BPI->setEdgeProbability(BB, I, BBSuccProbs[I]);
- if (BBSuccProbs.size() >= 2) {
+ // Update the profile metadata as well.
+ //
+ // Don't do this if the profile of the transformed blocks was statically
+ // estimated. (This could occur despite the function having an entry
+ // frequency in completely cold parts of the CFG.)
+ //
+ // In this case we don't want to suggest to subsequent passes that the
+ // calculated weights are fully consistent. Consider this graph:
+ //
+ // check_1
+ // 50% / |
+ // eq_1 | 50%
+ // \ |
+ // check_2
+ // 50% / |
+ // eq_2 | 50%
+ // \ |
+ // check_3
+ // 50% / |
+ // eq_3 | 50%
+ // \ |
+ //
+ // Assuming the blocks check_* all compare the same value against 1, 2 and 3,
+ // the overall probabilities are inconsistent; the total probability that the
+ // value is either 1, 2 or 3 is 150%.
+ //
+ // As a consequence if we thread eq_1 -> check_2 to check_3, check_2->check_3
+ // becomes 0%. This is even worse if the edge whose probability becomes 0% is
+ // the loop exit edge. Then based solely on static estimation we would assume
+ // the loop was extremely hot.
+ //
+ // FIXME this locally as well so that BPI and BFI are consistent as well. We
+ // shouldn't make edges extremely likely or unlikely based solely on static
+ // estimation.
+ if (BBSuccProbs.size() >= 2 && doesBlockHaveProfileData(BB)) {
SmallVector<uint32_t, 4> Weights;
for (auto Prob : BBSuccProbs)
Weights.push_back(Prob.getNumerator());
OpenPOWER on IntegriCloud