summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h44
-rw-r--r--llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp13
-rw-r--r--llvm/test/Analysis/BlockFrequencyInfo/irreducible_pgo.ll65
-rw-r--r--llvm/test/Transforms/PGOProfile/irreducible.ll2
4 files changed, 112 insertions, 12 deletions
diff --git a/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h b/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h
index 7b916e3653b..91056797faa 100644
--- a/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h
+++ b/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h
@@ -16,6 +16,7 @@
#define LLVM_ANALYSIS_BLOCKFREQUENCYINFOIMPL_H
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/GraphTraits.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/PostOrderIterator.h"
@@ -1155,35 +1156,56 @@ bool BlockFrequencyInfoImpl<BT>::computeMassInLoop(LoopData &Loop) {
DEBUG(dbgs() << "isIrreducible = true\n");
Distribution Dist;
unsigned NumHeadersWithWeight = 0;
+ Optional<uint64_t> MinHeaderWeight;
+ DenseSet<uint32_t> HeadersWithoutWeight;
+ HeadersWithoutWeight.reserve(Loop.NumHeaders);
for (uint32_t H = 0; H < Loop.NumHeaders; ++H) {
auto &HeaderNode = Loop.Nodes[H];
const BlockT *Block = getBlock(HeaderNode);
IsIrrLoopHeader.set(Loop.Nodes[H].Index);
Optional<uint64_t> HeaderWeight = Block->getIrrLoopHeaderWeight();
- if (!HeaderWeight)
+ if (!HeaderWeight) {
+ DEBUG(dbgs() << "Missing irr loop header metadata on "
+ << getBlockName(HeaderNode) << "\n");
+ HeadersWithoutWeight.insert(H);
continue;
+ }
DEBUG(dbgs() << getBlockName(HeaderNode)
<< " has irr loop header weight " << HeaderWeight.getValue()
<< "\n");
NumHeadersWithWeight++;
uint64_t HeaderWeightValue = HeaderWeight.getValue();
- if (HeaderWeightValue)
+ if (!MinHeaderWeight || HeaderWeightValue < MinHeaderWeight)
+ MinHeaderWeight = HeaderWeightValue;
+ if (HeaderWeightValue) {
Dist.addLocal(HeaderNode, HeaderWeightValue);
- }
- if (NumHeadersWithWeight != Loop.NumHeaders) {
- // Not all headers have a weight metadata. Distribute weight evenly.
- Dist = Distribution();
- for (uint32_t H = 0; H < Loop.NumHeaders; ++H) {
- auto &HeaderNode = Loop.Nodes[H];
- Dist.addLocal(HeaderNode, 1);
}
}
+ // As a heuristic, if some headers don't have a weight, give them the
+ // minimium weight seen (not to disrupt the existing trends too much by
+ // using a weight that's in the general range of the other headers' weights,
+ // and the minimum seems to perform better than the average.)
+ // FIXME: better update in the passes that drop the header weight.
+ // If no headers have a weight, give them even weight (use weight 1).
+ if (!MinHeaderWeight)
+ MinHeaderWeight = 1;
+ for (uint32_t H : HeadersWithoutWeight) {
+ auto &HeaderNode = Loop.Nodes[H];
+ const BlockT *Block = getBlock(HeaderNode);
+ assert(!Block->getIrrLoopHeaderWeight() &&
+ "Shouldn't have a weight metadata");
+ uint64_t MinWeight = MinHeaderWeight.getValue();
+ DEBUG(dbgs() << "Giving weight " << MinWeight
+ << " to " << getBlockName(HeaderNode) << "\n");
+ if (MinWeight)
+ Dist.addLocal(HeaderNode, MinWeight);
+ }
distributeIrrLoopHeaderMass(Dist);
for (const BlockNode &M : Loop.Nodes)
if (!propagateMassToSuccessors(&Loop, M))
llvm_unreachable("unhandled irreducible control flow");
- if (NumHeadersWithWeight != Loop.NumHeaders)
- // Not all headers have a weight metadata. Adjust header mass.
+ if (NumHeadersWithWeight == 0)
+ // No headers have a metadata. Adjust header mass.
adjustLoopHeaderMass(Loop);
} else {
Working[Loop.getHeader().Index].getMass() = BlockMass::getFull();
diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index c92d48396c8..47278e19283 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -1188,11 +1188,22 @@ void PGOUseFunc::setBranchWeights() {
}
}
+static bool isIndirectBrTarget(BasicBlock *BB) {
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ if (isa<IndirectBrInst>((*PI)->getTerminator()))
+ return true;
+ }
+ return false;
+}
+
void PGOUseFunc::annotateIrrLoopHeaderWeights() {
DEBUG(dbgs() << "\nAnnotating irreducible loop header weights.\n");
// Find irr loop headers
for (auto &BB : F) {
- if (BFI->isIrrLoopHeader(&BB)) {
+ // As a heuristic also annotate indrectbr targets as they have a high chance
+ // to become an irreducible loop header after the indirectbr tail
+ // duplication.
+ if (BFI->isIrrLoopHeader(&BB) || isIndirectBrTarget(&BB)) {
TerminatorInst *TI = BB.getTerminator();
const UseBBInfo &BBCountInfo = getBBInfo(&BB);
setIrrLoopHeaderMetadata(M, TI, BBCountInfo.CountValue);
diff --git a/llvm/test/Analysis/BlockFrequencyInfo/irreducible_pgo.ll b/llvm/test/Analysis/BlockFrequencyInfo/irreducible_pgo.ll
index 3eb0597a957..8a18cbaf896 100644
--- a/llvm/test/Analysis/BlockFrequencyInfo/irreducible_pgo.ll
+++ b/llvm/test/Analysis/BlockFrequencyInfo/irreducible_pgo.ll
@@ -159,3 +159,68 @@ indirectgoto: ; preds = %if.then18, %if.then
; CHECK-NEXT: - sw.default: {{.*}} count = 0
; CHECK-NEXT: - exit: {{.*}} count = 1
; CHECK-NEXT: - indirectgoto: {{.*}} count = 399, irr_loop_header_weight = 400
+
+; Missing some irr loop annotations.
+; Function Attrs: noinline norecurse nounwind uwtable
+define i32 @_Z11irreduciblePh2(i8* nocapture readonly %p) !prof !27 {
+entry:
+ %0 = load i32, i32* @tracing, align 4
+ %1 = trunc i32 %0 to i8
+ %tobool = icmp eq i32 %0, 0
+ br label %for.cond1
+
+for.cond1: ; preds = %sw.default, %entry
+ br label %dispatch_op
+
+dispatch_op: ; preds = %sw.bb6, %for.cond1
+switch i8 %1, label %sw.default [
+ i8 0, label %sw.bb
+ i8 1, label %dispatch_op.sw.bb6_crit_edge
+ i8 2, label %sw.bb15
+ ], !prof !36
+
+dispatch_op.sw.bb6_crit_edge: ; preds = %dispatch_op
+ br label %sw.bb6
+
+sw.bb: ; preds = %indirectgoto, %dispatch_op
+ br label %exit
+
+TARGET_1: ; preds = %indirectgoto
+ br label %sw.bb6
+
+sw.bb6: ; preds = %TARGET_1, %dispatch_op.sw.bb6_crit_edge
+ br i1 %tobool, label %dispatch_op, label %if.then, !prof !37 ; Missing !irr_loop !38
+
+if.then: ; preds = %sw.bb6
+ br label %indirectgoto
+
+TARGET_2: ; preds = %indirectgoto
+ br label %sw.bb15
+
+sw.bb15: ; preds = %TARGET_2, %dispatch_op
+ br i1 %tobool, label %if.then18, label %exit, !prof !39, !irr_loop !40
+
+if.then18: ; preds = %sw.bb15
+ br label %indirectgoto
+
+unknown_op: ; preds = %indirectgoto
+ br label %sw.default
+
+sw.default: ; preds = %unknown_op, %dispatch_op
+ br label %for.cond1
+
+exit: ; preds = %sw.bb15, %sw.bb
+ ret i32 0
+
+indirectgoto: ; preds = %if.then18, %if.then
+ %idxprom21 = zext i32 %0 to i64
+ %arrayidx22 = getelementptr inbounds [256 x i8*], [256 x i8*]* @targets, i64 0, i64 %idxprom21
+ %target = load i8*, i8** %arrayidx22, align 8
+ indirectbr i8* %target, [label %unknown_op, label %sw.bb, label %TARGET_1, label %TARGET_2], !prof !41, !irr_loop !42
+}
+
+; CHECK-LABEL: Printing analysis {{.*}} for function '_Z11irreduciblePh2':
+; CHECK: block-frequency-info: _Z11irreduciblePh2
+; CHECK: - sw.bb6: {{.*}} count = 100
+; CHECK: - sw.bb15: {{.*}} count = 100, irr_loop_header_weight = 100
+; CHECK: - indirectgoto: {{.*}} count = 400, irr_loop_header_weight = 400
diff --git a/llvm/test/Transforms/PGOProfile/irreducible.ll b/llvm/test/Transforms/PGOProfile/irreducible.ll
index 9b2c8f638ed..9394b724f7e 100644
--- a/llvm/test/Transforms/PGOProfile/irreducible.ll
+++ b/llvm/test/Transforms/PGOProfile/irreducible.ll
@@ -91,6 +91,7 @@ sw.bb: ; preds = %indirectgoto, %disp
TARGET_1: ; preds = %indirectgoto
br label %sw.bb6
+; USE: br label %sw.bb6, !irr_loop {{.*}}
sw.bb6: ; preds = %TARGET_1, %dispatch_op.sw.bb6_crit_edge
br i1 %tobool, label %dispatch_op, label %if.then
@@ -102,6 +103,7 @@ if.then: ; preds = %sw.bb6
TARGET_2: ; preds = %indirectgoto
br label %sw.bb15
+; USE: br label %sw.bb15, !irr_loop {{.*}}
sw.bb15: ; preds = %TARGET_2, %dispatch_op
br i1 %tobool, label %if.then18, label %exit
OpenPOWER on IntegriCloud