summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
Diffstat (limited to 'llvm')
-rw-r--r--llvm/include/llvm/IR/ModuleSummaryIndex.h9
-rw-r--r--llvm/lib/Analysis/ModuleSummaryAnalysis.cpp34
-rw-r--r--llvm/lib/Bitcode/Writer/BitcodeWriter.cpp69
-rw-r--r--llvm/lib/Transforms/IPO/PassManagerBuilder.cpp5
-rw-r--r--llvm/test/Transforms/PGOProfile/Inputs/thinlto_indirect_call_promotion.ll7
-rw-r--r--llvm/test/Transforms/PGOProfile/thinlto_indirect_call_promotion.ll32
6 files changed, 143 insertions, 13 deletions
diff --git a/llvm/include/llvm/IR/ModuleSummaryIndex.h b/llvm/include/llvm/IR/ModuleSummaryIndex.h
index 6b866dbd9e9..45d9bf7af70 100644
--- a/llvm/include/llvm/IR/ModuleSummaryIndex.h
+++ b/llvm/include/llvm/IR/ModuleSummaryIndex.h
@@ -80,6 +80,7 @@ struct ValueInfo {
assert(Kind == VI_Value && "Not a Value type");
return TheValue.V;
}
+ bool isGUID() const { return Kind == VI_GUID; }
};
/// \brief Function and variable summary information to aid decisions and
@@ -261,6 +262,14 @@ public:
CallGraphEdgeList.push_back(std::make_pair(CalleeGUID, Info));
}
+ /// Record a call graph edge from this function to each function GUID recorded
+ /// in \p CallGraphEdges.
+ void
+ addCallGraphEdges(DenseMap<GlobalValue::GUID, CalleeInfo> &CallGraphEdges) {
+ for (auto &EI : CallGraphEdges)
+ addCallGraphEdge(EI.first, EI.second);
+ }
+
/// Record a call graph edge from this function to the function identified
/// by \p CalleeV, with \p CalleeInfo including the cumulative profile
/// count (across all calls from this function) or 0 if no PGO.
diff --git a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
index ff41581e599..c9ac2bdb794 100644
--- a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
+++ b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
@@ -16,6 +16,7 @@
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/IndirectCallPromotionAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Dominators.h"
@@ -73,7 +74,9 @@ void ModuleSummaryIndexBuilder::computeFunctionSummary(
// Map from callee ValueId to profile count. Used to accumulate profile
// counts for all static calls to a given callee.
DenseMap<const Value *, CalleeInfo> CallGraphEdges;
+ DenseMap<GlobalValue::GUID, CalleeInfo> IndirectCallEdges;
DenseSet<const Value *> RefEdges;
+ ICallPromotionAnalysis ICallAnalysis;
SmallPtrSet<const User *, 8> Visited;
for (const BasicBlock &BB : F)
@@ -83,13 +86,29 @@ void ModuleSummaryIndexBuilder::computeFunctionSummary(
if (auto CS = ImmutableCallSite(&I)) {
auto *CalledFunction = CS.getCalledFunction();
- if (CalledFunction && CalledFunction->hasName() &&
- !CalledFunction->isIntrinsic()) {
- auto ScaledCount = BFI ? BFI->getBlockProfileCount(&BB) : None;
- auto *CalleeId =
- M->getValueSymbolTable().lookup(CalledFunction->getName());
- CallGraphEdges[CalleeId] +=
- (ScaledCount ? ScaledCount.getValue() : 0);
+ // Check if this is a direct call to a known function.
+ if (CalledFunction) {
+ if (CalledFunction->hasName() && !CalledFunction->isIntrinsic()) {
+ auto ScaledCount = BFI ? BFI->getBlockProfileCount(&BB) : None;
+ auto *CalleeId =
+ M->getValueSymbolTable().lookup(CalledFunction->getName());
+ CallGraphEdges[CalleeId] +=
+ (ScaledCount ? ScaledCount.getValue() : 0);
+ }
+ } else {
+ // Otherwise, check for an indirect call (call to a non-const value
+ // that isn't an inline assembly call).
+ const CallInst *CI = dyn_cast<CallInst>(&I);
+ if (CS.getCalledValue() && !isa<Constant>(CS.getCalledValue()) &&
+ !(CI && CI->isInlineAsm())) {
+ uint32_t NumVals, NumCandidates;
+ uint64_t TotalCount;
+ auto CandidateProfileData =
+ ICallAnalysis.getPromotionCandidatesForInstruction(
+ &I, NumVals, TotalCount, NumCandidates);
+ for (auto &Candidate : CandidateProfileData)
+ IndirectCallEdges[Candidate.Value] += Candidate.Count;
+ }
}
}
findRefEdges(&I, RefEdges, Visited);
@@ -99,6 +118,7 @@ void ModuleSummaryIndexBuilder::computeFunctionSummary(
std::unique_ptr<FunctionSummary> FuncSummary =
llvm::make_unique<FunctionSummary>(Flags, NumInsts);
FuncSummary->addCallGraphEdges(CallGraphEdges);
+ FuncSummary->addCallGraphEdges(IndirectCallEdges);
FuncSummary->addRefEdges(RefEdges);
Index->addGlobalValueSummary(F.getName(), std::move(FuncSummary));
}
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 1ee7b7b25e4..dcb8b58cd7b 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -119,6 +119,14 @@ class ModuleBitcodeWriter : public BitcodeWriter {
/// The start bit of the module block, for use in generating a module hash
uint64_t BitcodeStartBit = 0;
+ /// Map that holds the correspondence between GUIDs in the summary index,
+ /// that came from indirect call profiles, and a value id generated by this
+ /// class to use in the VST and summary block records.
+ std::map<GlobalValue::GUID, unsigned> GUIDToValueIdMap;
+
+ /// Tracks the last value id recorded in the GUIDToValueMap.
+ unsigned GlobalValueId;
+
public:
/// Constructs a ModuleBitcodeWriter object for the given Module,
/// writing to the provided \p Buffer.
@@ -132,6 +140,25 @@ public:
// will start at the bitcode, and we need the offset of the VST
// to line up.
BitcodeStartBit = Stream.GetCurrentBitNo();
+
+ // Assign ValueIds to any callee values in the index that came from
+ // indirect call profiles and were recorded as a GUID not a Value*
+ // (which would have been assigned an ID by the ValueEnumerator).
+ // The starting ValueId is just after the number of values in the
+ // ValueEnumerator, so that they can be emitted in the VST.
+ GlobalValueId = VE.getValues().size();
+ if (Index)
+ for (const auto &GUIDSummaryLists : *Index)
+ // Examine all summaries for this GUID.
+ for (auto &Summary : GUIDSummaryLists.second)
+ if (auto FS = dyn_cast<FunctionSummary>(Summary.get()))
+ // For each call in the function summary, see if the call
+ // is to a GUID (which means it is for an indirect call,
+ // otherwise we would have a Value for it). If so, synthesize
+ // a value id.
+ for (auto &CallEdge : FS->calls())
+ if (CallEdge.first.isGUID())
+ assignValueId(CallEdge.first.getGUID());
}
private:
@@ -260,6 +287,22 @@ private:
unsigned FSModRefsAbbrev);
void writePerModuleGlobalValueSummary();
void writeModuleHash(size_t BlockStartPos);
+
+ void assignValueId(GlobalValue::GUID ValGUID) {
+ GUIDToValueIdMap[ValGUID] = ++GlobalValueId;
+ }
+ unsigned getValueId(GlobalValue::GUID ValGUID) {
+ const auto &VMI = GUIDToValueIdMap.find(ValGUID);
+ assert(VMI != GUIDToValueIdMap.end());
+ return VMI->second;
+ }
+ // Helper to get the valueId for the type of value recorded in VI.
+ unsigned getValueId(ValueInfo VI) {
+ if (VI.isGUID())
+ return getValueId(VI.getGUID());
+ return VE.getValueID(VI.getValue());
+ }
+ std::map<GlobalValue::GUID, unsigned> &valueIds() { return GUIDToValueIdMap; }
};
/// Class to manage the bitcode writing for a combined index.
@@ -2707,6 +2750,7 @@ void ModuleBitcodeWriter::writeValueSymbolTable(
unsigned FnEntry8BitAbbrev;
unsigned FnEntry7BitAbbrev;
unsigned FnEntry6BitAbbrev;
+ unsigned GUIDEntryAbbrev;
if (IsModuleLevel && hasVSTOffsetPlaceholder()) {
// 8-bit fixed-width VST_CODE_FNENTRY function strings.
BitCodeAbbrev *Abbv = new BitCodeAbbrev();
@@ -2734,11 +2778,19 @@ void ModuleBitcodeWriter::writeValueSymbolTable(
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
FnEntry6BitAbbrev = Stream.EmitAbbrev(Abbv);
+
+ // FIXME: Change the name of this record as it is now used by
+ // the per-module index as well.
+ Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_COMBINED_ENTRY));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // refguid
+ GUIDEntryAbbrev = Stream.EmitAbbrev(Abbv);
}
// FIXME: Set up the abbrev, we know how many values there are!
// FIXME: We know if the type names can use 7-bit ascii.
- SmallVector<unsigned, 64> NameVals;
+ SmallVector<uint64_t, 64> NameVals;
for (const ValueName &Name : VST) {
// Figure out the encoding to use for the name.
@@ -2799,6 +2851,16 @@ void ModuleBitcodeWriter::writeValueSymbolTable(
Stream.EmitRecord(Code, NameVals, AbbrevToUse);
NameVals.clear();
}
+ // Emit any GUID valueIDs created for indirect call edges into the
+ // module-level VST.
+ if (IsModuleLevel && hasVSTOffsetPlaceholder())
+ for (const auto &GI : valueIds()) {
+ NameVals.push_back(GI.second);
+ NameVals.push_back(GI.first);
+ Stream.EmitRecord(bitc::VST_CODE_COMBINED_ENTRY, NameVals,
+ GUIDEntryAbbrev);
+ NameVals.clear();
+ }
Stream.ExitBlock();
}
@@ -3220,12 +3282,11 @@ void ModuleBitcodeWriter::writePerModuleFunctionSummaryRecord(
std::sort(Calls.begin(), Calls.end(),
[this](const FunctionSummary::EdgeTy &L,
const FunctionSummary::EdgeTy &R) {
- return VE.getValueID(L.first.getValue()) <
- VE.getValueID(R.first.getValue());
+ return getValueId(L.first) < getValueId(R.first);
});
bool HasProfileData = F.getEntryCount().hasValue();
for (auto &ECI : Calls) {
- NameVals.push_back(VE.getValueID(ECI.first.getValue()));
+ NameVals.push_back(getValueId(ECI.first));
assert(ECI.second.CallsiteCount > 0 && "Expected at least one callsite");
NameVals.push_back(ECI.second.CallsiteCount);
if (HasProfileData)
diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
index 537919d76f7..cf5b76dc365 100644
--- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -407,10 +407,11 @@ void PassManagerBuilder::populateModulePassManager(
/// PGO instrumentation is added during the compile phase for ThinLTO, do
/// not run it a second time
addPGOInstrPasses(MPM);
- // Indirect call promotion that promotes intra-module targets only.
- MPM.add(createPGOIndirectCallPromotionLegacyPass());
}
+ // Indirect call promotion that promotes intra-module targets only.
+ MPM.add(createPGOIndirectCallPromotionLegacyPass());
+
if (EnableNonLTOGlobalsModRef)
// We add a module alias analysis pass here. In part due to bugs in the
// analysis infrastructure this "works" in that the analysis stays alive
diff --git a/llvm/test/Transforms/PGOProfile/Inputs/thinlto_indirect_call_promotion.ll b/llvm/test/Transforms/PGOProfile/Inputs/thinlto_indirect_call_promotion.ll
new file mode 100644
index 00000000000..c77eb5172d8
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/Inputs/thinlto_indirect_call_promotion.ll
@@ -0,0 +1,7 @@
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @a() {
+entry:
+ ret void
+}
diff --git a/llvm/test/Transforms/PGOProfile/thinlto_indirect_call_promotion.ll b/llvm/test/Transforms/PGOProfile/thinlto_indirect_call_promotion.ll
new file mode 100644
index 00000000000..49dc7fa39e0
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/thinlto_indirect_call_promotion.ll
@@ -0,0 +1,32 @@
+; Do setup work for all below tests: generate bitcode and combined index
+; RUN: opt -module-summary %s -o %t.bc
+; RUN: opt -module-summary %p/Inputs/thinlto_indirect_call_promotion.ll -o %t2.bc
+; RUN: llvm-lto -thinlto -o %t3 %t.bc %t2.bc
+
+; RUN: opt -function-import -summary-file %t3.thinlto.bc %t.bc -o %t4.bc -print-imports 2>&1 | FileCheck %s --check-prefix=IMPORTS
+; IMPORTS: Import a
+
+; RUN: opt %t4.bc -pgo-icall-prom -S -icp-count-threshold=1 | FileCheck %s --check-prefix=ICALL-PROM
+; RUN: opt %t4.bc -pgo-icall-prom -S -pass-remarks=pgo-icall-prom -icp-count-threshold=1 2>&1 | FileCheck %s --check-prefix=PASS-REMARK
+; PASS-REMARK: Promote indirect call to a with count 1 out of 1
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@foo = external local_unnamed_addr global void ()*, align 8
+
+define i32 @main() local_unnamed_addr {
+entry:
+ %0 = load void ()*, void ()** @foo, align 8
+; ICALL-PROM: br i1 %{{[0-9]+}}, label %if.true.direct_targ, label %if.false.orig_indirect, !prof [[BRANCH_WEIGHT:![0-9]+]]
+ tail call void %0(), !prof !1
+ ret i32 0
+}
+
+!1 = !{!"VP", i32 0, i64 1, i64 -6289574019528802036, i64 1}
+
+; Should not have a VP annotation on new indirect call (check before and after
+; branch_weights annotation).
+; ICALL-PROM-NOT: !"VP"
+; ICALL-PROM: [[BRANCH_WEIGHT]] = !{!"branch_weights", i32 1, i32 0}
+; ICALL-PROM-NOT: !"VP"
OpenPOWER on IntegriCloud