diff options
author | Rong Xu <xur@google.com> | 2017-04-04 16:42:20 +0000 |
---|---|---|
committer | Rong Xu <xur@google.com> | 2017-04-04 16:42:20 +0000 |
commit | 48596b6f7a422650987c7f2c2566917c7dfff61c (patch) | |
tree | 64be6663f30ef95bfcbab1f9d22f64184f0f2188 /llvm/lib | |
parent | e7cdcdea730e5edcaacb86e7feb8accbd0810d88 (diff) | |
download | bcm5719-llvm-48596b6f7a422650987c7f2c2566917c7dfff61c.tar.gz bcm5719-llvm-48596b6f7a422650987c7f2c2566917c7dfff61c.zip |
[PGO] Memory intrinsic calls optimization based on profiled size
This patch optimizes two memory intrinsic operations: memset and memcpy based
on the profiled size of the operation. The high level transformation is like:
mem_op(..., size)
==>
switch (size) {
case s1:
mem_op(..., s1);
goto merge_bb;
case s2:
mem_op(..., s2);
goto merge_bb;
...
default:
mem_op(..., size);
goto merge_bb;
}
merge_bb:
Differential Revision: http://reviews.llvm.org/D28966
llvm-svn: 299446
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Passes/PassRegistry.def | 1 | ||||
-rw-r--r-- | llvm/lib/ProfileData/InstrProf.cpp | 21 | ||||
-rw-r--r-- | llvm/lib/Transforms/IPO/PassManagerBuilder.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp | 346 | ||||
-rw-r--r-- | llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp | 63 | ||||
-rw-r--r-- | llvm/lib/Transforms/Instrumentation/Instrumentation.cpp | 1 | ||||
-rw-r--r-- | llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp | 30 |
7 files changed, 411 insertions, 55 deletions
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 9234677ec00..efd4c097a67 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -174,6 +174,7 @@ FUNCTION_PASS("loop-data-prefetch", LoopDataPrefetchPass()) FUNCTION_PASS("loop-load-elim", LoopLoadEliminationPass()) FUNCTION_PASS("loop-distribute", LoopDistributePass()) FUNCTION_PASS("loop-vectorize", LoopVectorizePass()) +FUNCTION_PASS("pgo-memop-opt", PGOMemOPSizeOpt()) FUNCTION_PASS("print", PrintFunctionPass(dbgs())) FUNCTION_PASS("print<assumptions>", AssumptionPrinterPass(dbgs())) FUNCTION_PASS("print<block-freq>", BlockFrequencyPrinterPass(dbgs())) diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp index bbefde709cf..0ec3fce4b23 100644 --- a/llvm/lib/ProfileData/InstrProf.cpp +++ b/llvm/lib/ProfileData/InstrProf.cpp @@ -936,4 +936,25 @@ bool canRenameComdatFunc(const Function &F, bool CheckAddressTaken) { return true; } +// Parse the value profile options. +void getMemOPSizeRangeFromOption(std::string MemOPSizeRange, + int64_t &RangeStart, int64_t &RangeLast) { + static const int64_t DefaultMemOPSizeRangeStart = 0; + static const int64_t DefaultMemOPSizeRangeLast = 8; + RangeStart = DefaultMemOPSizeRangeStart; + RangeLast = DefaultMemOPSizeRangeLast; + + if (!MemOPSizeRange.empty()) { + auto Pos = MemOPSizeRange.find(":"); + if (Pos != std::string::npos) { + if (Pos > 0) + RangeStart = atoi(MemOPSizeRange.substr(0, Pos).c_str()); + if (Pos < MemOPSizeRange.size() - 1) + RangeLast = atoi(MemOPSizeRange.substr(Pos + 1).c_str()); + } else + RangeLast = atoi(MemOPSizeRange.c_str()); + } + assert(RangeLast >= RangeStart); +} + } // end namespace llvm diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index b9336099c7a..6e1edbb21c7 100644 --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -302,6 +302,10 @@ void PassManagerBuilder::addFunctionSimplificationPasses( MPM.add(createLibCallsShrinkWrapPass()); addExtensionsToPM(EP_Peephole, MPM); + // Optimize memory intrinsic calls based on the profiled size information. + if (SizeLevel == 0) + MPM.add(createPGOMemOPSizeOptLegacyPass()); + MPM.add(createTailCallEliminationPass()); // Eliminate tail calls MPM.add(createCFGSimplificationPass()); // Merge & remove BBs MPM.add(createReassociatePass()); // Reassociate expressions diff --git a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp index 700ed4e8576..3dfafee146e 100644 --- a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp +++ b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp @@ -1,4 +1,4 @@ -//===-- IndirectCallPromotion.cpp - Promote indirect calls to direct calls ===// +//===-- IndirectCallPromotion.cpp - Optimizations based on value profiling ===// // // The LLVM Compiler Infrastructure // @@ -17,6 +17,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/IndirectCallPromotionAnalysis.h" #include "llvm/Analysis/IndirectCallSiteVisitor.h" #include "llvm/IR/BasicBlock.h" @@ -40,6 +41,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/PGOInstrumentation.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -53,6 +55,8 @@ using namespace llvm; STATISTIC(NumOfPGOICallPromotion, "Number of indirect call promotions."); STATISTIC(NumOfPGOICallsites, "Number of indirect call candidate sites."); +STATISTIC(NumOfPGOMemOPOpt, "Number of memop intrinsics optimized."); +STATISTIC(NumOfPGOMemOPAnnotate, "Number of memop intrinsics annotated."); // Command line option to disable indirect-call promotion with the default as // false. This is for debug purpose. @@ -106,6 +110,44 @@ static cl::opt<bool> ICPDUMPAFTER("icp-dumpafter", cl::init(false), cl::Hidden, cl::desc("Dump IR after transformation happens")); +// The minimum call count to optimize memory intrinsic calls. +static cl::opt<unsigned> + MemOPCountThreshold("pgo-memop-count-threshold", cl::Hidden, cl::ZeroOrMore, + cl::init(1000), + cl::desc("The minimum count to optimize memory " + "intrinsic calls")); + +// Command line option to disable memory intrinsic optimization. The default is +// false. This is for debug purpose. +static cl::opt<bool> DisableMemOPOPT("disable-memop-opt", cl::init(false), + cl::Hidden, cl::desc("Disable optimize")); + +// The percent threshold to optimize memory intrinsic calls. +static cl::opt<unsigned> + MemOPPercentThreshold("pgo-memop-percent-threshold", cl::init(40), + cl::Hidden, cl::ZeroOrMore, + cl::desc("The percentage threshold for the " + "memory intrinsic calls optimization")); + +// Maximum number of versions for optimizing memory intrinsic call. +static cl::opt<unsigned> + MemOPMaxVersion("pgo-memop-max-version", cl::init(3), cl::Hidden, + cl::ZeroOrMore, + cl::desc("The max version for the optimized memory " + " intrinsic calls")); + +// Scale the counts from the annotation using the BB count value. +static cl::opt<bool> + MemOPScaleCount("pgo-memop-scale-count", cl::init(true), cl::Hidden, + cl::desc("Scale the memop size counts using the basic " + " block count value")); + +// This option sets the rangge of precise profile memop sizes. +extern cl::opt<std::string> MemOPSizeRange; + +// This option sets the value that groups large memop sizes +extern cl::opt<unsigned> MemOPSizeLarge; + namespace { class PGOIndirectCallPromotionLegacyPass : public ModulePass { public: @@ -130,6 +172,23 @@ private: // the promoted direct call. bool SamplePGO; }; + +class PGOMemOPSizeOptLegacyPass : public FunctionPass { +public: + static char ID; + + PGOMemOPSizeOptLegacyPass() : FunctionPass(ID) { + initializePGOMemOPSizeOptLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + StringRef getPassName() const override { return "PGOMemOPSize"; } + +private: + bool runOnFunction(Function &F) override; + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<BlockFrequencyInfoWrapperPass>(); + } +}; } // end anonymous namespace char PGOIndirectCallPromotionLegacyPass::ID = 0; @@ -143,6 +202,19 @@ ModulePass *llvm::createPGOIndirectCallPromotionLegacyPass(bool InLTO, return new PGOIndirectCallPromotionLegacyPass(InLTO, SamplePGO); } +char PGOMemOPSizeOptLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(PGOMemOPSizeOptLegacyPass, "pgo-memop-opt", + "Optimize memory intrinsic using its size value profile", + false, false) +INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) +INITIALIZE_PASS_END(PGOMemOPSizeOptLegacyPass, "pgo-memop-opt", + "Optimize memory intrinsic using its size value profile", + false, false) + +FunctionPass *llvm::createPGOMemOPSizeOptLegacyPass() { + return new PGOMemOPSizeOptLegacyPass(); +} + namespace { // The class for main data structure to promote indirect calls to conditional // direct calls. @@ -675,3 +747,275 @@ PreservedAnalyses PGOIndirectCallPromotion::run(Module &M, return PreservedAnalyses::none(); } + +namespace { +class MemOPSizeOpt : public InstVisitor<MemOPSizeOpt> { +public: + MemOPSizeOpt(Function &Func, BlockFrequencyInfo &BFI) + : Func(Func), BFI(BFI), Changed(false) { + ValueDataArray = + llvm::make_unique<InstrProfValueData[]>(MemOPMaxVersion + 2); + // Get the MemOPSize range information from option MemOPSizeRange, + getMemOPSizeRangeFromOption(MemOPSizeRange, PreciseRangeStart, + PreciseRangeLast); + } + bool isChanged() const { return Changed; } + void perform() { + WorkList.clear(); + visit(Func); + + for (auto &MI : WorkList) { + ++NumOfPGOMemOPAnnotate; + if (perform(MI)) { + Changed = true; + ++NumOfPGOMemOPOpt; + DEBUG(dbgs() << "MemOP calls: " << MI->getCalledFunction()->getName() + << "is Transformed.\n"); + } + } + } + + void visitMemIntrinsic(MemIntrinsic &MI) { + Value *Length = MI.getLength(); + // Not perform on constant length calls. + if (dyn_cast<ConstantInt>(Length)) + return; + WorkList.push_back(&MI); + } + +private: + Function &Func; + BlockFrequencyInfo &BFI; + bool Changed; + std::vector<MemIntrinsic *> WorkList; + // Start of the previse range. + int64_t PreciseRangeStart; + // Last value of the previse range. + int64_t PreciseRangeLast; + // The space to read the profile annotation. + std::unique_ptr<InstrProfValueData[]> ValueDataArray; + bool perform(MemIntrinsic *MI); + + // This kind shows which group the value falls in. For PreciseValue, we have + // the profile count for that value. LargeGroup groups the values that are in + // range [LargeValue, +inf). NonLargeGroup groups the rest of values. + enum MemOPSizeKind { PreciseValue, NonLargeGroup, LargeGroup }; + + MemOPSizeKind getMemOPSizeKind(int64_t Value) const { + if (Value == MemOPSizeLarge && MemOPSizeLarge != 0) + return LargeGroup; + if (Value == PreciseRangeLast + 1) + return NonLargeGroup; + return PreciseValue; + } +}; + +static const char *getMIName(const MemIntrinsic *MI) { + switch (MI->getIntrinsicID()) { + case Intrinsic::memcpy: + return "memcpy"; + case Intrinsic::memmove: + return "memmove"; + case Intrinsic::memset: + return "memset"; + default: + return "unknown"; + } +} + +static bool isProfitable(uint64_t Count, uint64_t TotalCount) { + assert(Count <= TotalCount); + if (Count < MemOPCountThreshold) + return false; + if (Count < TotalCount * MemOPPercentThreshold / 100) + return false; + return true; +} + +static inline uint64_t getScaledCount(uint64_t Count, uint64_t Num, + uint64_t Denom) { + if (!MemOPScaleCount) + return Count; + bool Overflowed; + uint64_t ScaleCount = SaturatingMultiply(Count, Num, &Overflowed); + return ScaleCount / Denom; +} + +bool MemOPSizeOpt::perform(MemIntrinsic *MI) { + assert(MI); + if (MI->getIntrinsicID() == Intrinsic::memmove) + return false; + + uint32_t NumVals, MaxNumPromotions = MemOPMaxVersion + 2; + uint64_t TotalCount; + if (!getValueProfDataFromInst(*MI, IPVK_MemOPSize, MaxNumPromotions, + ValueDataArray.get(), NumVals, TotalCount)) + return false; + + uint64_t ActualCount = TotalCount; + uint64_t SavedTotalCount = TotalCount; + if (MemOPScaleCount) { + auto BBEdgeCount = BFI.getBlockProfileCount(MI->getParent()); + if (!BBEdgeCount) + return false; + ActualCount = *BBEdgeCount; + } + + if (ActualCount < MemOPCountThreshold) + return false; + + ArrayRef<InstrProfValueData> VDs(ValueDataArray.get(), NumVals); + TotalCount = ActualCount; + if (MemOPScaleCount) + DEBUG(dbgs() << "Scale counts: numberator = " << ActualCount + << " denominator = " << SavedTotalCount << "\n"); + + // Keeping track of the count of the default case: + uint64_t RemainCount = TotalCount; + SmallVector<uint64_t, 16> SizeIds; + SmallVector<uint64_t, 16> CaseCounts; + uint64_t MaxCount = 0; + unsigned Version = 0; + // Default case is in the front -- save the slot here. + CaseCounts.push_back(0); + for (auto &VD : VDs) { + int64_t V = VD.Value; + uint64_t C = VD.Count; + if (MemOPScaleCount) + C = getScaledCount(C, ActualCount, SavedTotalCount); + + // Only care precise value here. + if (getMemOPSizeKind(V) != PreciseValue) + continue; + + // ValueCounts are sorted on the count. Break at the first un-profitable + // value. + if (!isProfitable(C, RemainCount)) + break; + + SizeIds.push_back(V); + CaseCounts.push_back(C); + if (C > MaxCount) + MaxCount = C; + + assert(RemainCount >= C); + RemainCount -= C; + + if (++Version > MemOPMaxVersion && MemOPMaxVersion != 0) + break; + } + + if (Version == 0) + return false; + + CaseCounts[0] = RemainCount; + if (RemainCount > MaxCount) + MaxCount = RemainCount; + + uint64_t SumForOpt = TotalCount - RemainCount; + DEBUG(dbgs() << "Read one memory intrinsic profile: " << SumForOpt << " vs " + << TotalCount << "\n"); + DEBUG( + for (auto &VD + : VDs) { dbgs() << " (" << VD.Value << "," << VD.Count << ")\n"; }); + + DEBUG(dbgs() << "Optimize one memory intrinsic call to " << Version + << " Versions\n"); + + // mem_op(..., size) + // ==> + // switch (size) { + // case s1: + // mem_op(..., s1); + // goto merge_bb; + // case s2: + // mem_op(..., s2); + // goto merge_bb; + // ... + // default: + // mem_op(..., size); + // goto merge_bb; + // } + // merge_bb: + + BasicBlock *BB = MI->getParent(); + DEBUG(dbgs() << "\n\n== Basic Block Before ==\n"); + DEBUG(dbgs() << *BB << "\n"); + + BasicBlock *DefaultBB = SplitBlock(BB, MI); + BasicBlock::iterator It(*MI); + ++It; + assert(It != DefaultBB->end()); + BasicBlock *MergeBB = SplitBlock(DefaultBB, &(*It)); + DefaultBB->setName("MemOP.Default"); + MergeBB->setName("MemOP.Merge"); + + auto &Ctx = Func.getContext(); + IRBuilder<> IRB(BB); + BB->getTerminator()->eraseFromParent(); + Value *SizeVar = MI->getLength(); + SwitchInst *SI = IRB.CreateSwitch(SizeVar, DefaultBB, SizeIds.size()); + + // Clear the value profile data. + MI->setMetadata(LLVMContext::MD_prof, nullptr); + + DEBUG(dbgs() << "\n\n== Basic Block After==\n"); + + for (uint64_t SizeId : SizeIds) { + ConstantInt *CaseSizeId = ConstantInt::get(Type::getInt64Ty(Ctx), SizeId); + BasicBlock *CaseBB = BasicBlock::Create( + Ctx, Twine("MemOP.Case.") + Twine(SizeId), &Func, DefaultBB); + Instruction *NewInst = MI->clone(); + // Fix the argument. + dyn_cast<MemIntrinsic>(NewInst)->setLength(CaseSizeId); + CaseBB->getInstList().push_back(NewInst); + IRBuilder<> IRBCase(CaseBB); + IRBCase.CreateBr(MergeBB); + SI->addCase(CaseSizeId, CaseBB); + DEBUG(dbgs() << *CaseBB << "\n"); + } + setProfMetadata(Func.getParent(), SI, CaseCounts, MaxCount); + + DEBUG(dbgs() << *BB << "\n"); + DEBUG(dbgs() << *DefaultBB << "\n"); + DEBUG(dbgs() << *MergeBB << "\n"); + + emitOptimizationRemark(Func.getContext(), "memop-opt", Func, + MI->getDebugLoc(), + Twine("optimize ") + getMIName(MI) + " with count " + + Twine(SumForOpt) + " out of " + Twine(TotalCount) + + " for " + Twine(Version) + " versions"); + + return true; +} +} // namespace + +static bool PGOMemOPSizeOptImpl(Function &F, BlockFrequencyInfo &BFI) { + if (DisableMemOPOPT) + return false; + + if (F.hasFnAttribute(Attribute::OptimizeForSize)) + return false; + MemOPSizeOpt MemOPSizeOpt(F, BFI); + MemOPSizeOpt.perform(); + return MemOPSizeOpt.isChanged(); +} + +bool PGOMemOPSizeOptLegacyPass::runOnFunction(Function &F) { + BlockFrequencyInfo &BFI = + getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI(); + return PGOMemOPSizeOptImpl(F, BFI); +} + +namespace llvm { +char &PGOMemOPSizeOptID = PGOMemOPSizeOptLegacyPass::ID; + +PreservedAnalyses PGOMemOPSizeOpt::run(Function &F, + FunctionAnalysisManager &FAM) { + auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F); + bool Changed = PGOMemOPSizeOptImpl(F, BFI); + if (!Changed) + return PreservedAnalyses::all(); + return PreservedAnalyses::none(); +} +} // namespace llvm diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp index f2c19328ff1..1f8bcb9a330 100644 --- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -51,6 +51,21 @@ using namespace llvm; #define DEBUG_TYPE "instrprof" +// The start and end values of precise value profile range for memory +// intrinsic sizes +cl::opt<std::string> MemOPSizeRange( + "memop-size-range", + cl::desc("Set the range of size in memory intrinsic calls to be profiled " + "precisely, in a format of <start_val>:<end_val>"), + cl::init("")); + +// The value that considered to be large value in memory intrinsic. +cl::opt<unsigned> MemOPSizeLarge( + "memop-size-large", + cl::desc("Set large value thresthold in memory intrinsic size profiling. " + "Value of 0 disables the large value profiling."), + cl::init(8192)); + namespace { cl::opt<bool> DoNameCompression("enable-name-compression", @@ -77,17 +92,6 @@ cl::opt<double> NumCountersPerValueSite( // is usually smaller than 2. cl::init(1.0)); -cl::opt<std::string> MemOPSizeRange( - "memop-size-range", - cl::desc("Set the range of size in memory intrinsic calls to be profiled " - "precisely, in a format of <start_val>:<end_val>"), - cl::init("")); -cl::opt<unsigned> MemOPSizeLarge( - "memop-size-large", - cl::desc("Set large value thresthold in memory intrinsic size profiling. " - "Value of 0 disables the large value profiling."), - cl::init(8192)); - class InstrProfilingLegacyPass : public ModulePass { InstrProfiling InstrProf; @@ -176,7 +180,8 @@ bool InstrProfiling::run(Module &M, const TargetLibraryInfo &TLI) { NamesSize = 0; ProfileDataMap.clear(); UsedVars.clear(); - getMemOPSizeOptions(); + getMemOPSizeRangeFromOption(MemOPSizeRange, MemOPSizeRangeStart, + MemOPSizeRangeLast); // We did not know how many value sites there would be inside // the instrumented function. This is counting the number of instrumented @@ -299,12 +304,13 @@ void InstrProfiling::lowerValueProfileInst(InstrProfValueProfileInst *Ind) { Builder.getInt32(Index)}; Call = Builder.CreateCall(getOrInsertValueProfilingCall(*M, *TLI), Args); } else { - Value *Args[6] = {Ind->getTargetValue(), - Builder.CreateBitCast(DataVar, Builder.getInt8PtrTy()), - Builder.getInt32(Index), - Builder.getInt64(MemOPSizeRangeStart), - Builder.getInt64(MemOPSizeRangeLast), - Builder.getInt64(MemOPSizeLargeVal)}; + Value *Args[6] = { + Ind->getTargetValue(), + Builder.CreateBitCast(DataVar, Builder.getInt8PtrTy()), + Builder.getInt32(Index), + Builder.getInt64(MemOPSizeRangeStart), + Builder.getInt64(MemOPSizeRangeLast), + Builder.getInt64(MemOPSizeLarge == 0 ? INT64_MIN : MemOPSizeLarge)}; Call = Builder.CreateCall(getOrInsertValueProfilingCall(*M, *TLI, true), Args); } @@ -700,24 +706,3 @@ void InstrProfiling::emitInitialization() { appendToGlobalCtors(*M, F, 0); } - -void InstrProfiling::getMemOPSizeOptions() { - // Parse the value profile options. - MemOPSizeRangeStart = DefaultMemOPSizeRangeStart; - MemOPSizeRangeLast = DefaultMemOPSizeRangeLast; - if (!MemOPSizeRange.empty()) { - auto Pos = MemOPSizeRange.find(":"); - if (Pos != std::string::npos) { - if (Pos > 0) - MemOPSizeRangeStart = atoi(MemOPSizeRange.substr(0, Pos).c_str()); - if (Pos < MemOPSizeRange.size() - 1) - MemOPSizeRangeLast = atoi(MemOPSizeRange.substr(Pos + 1).c_str()); - } else - MemOPSizeRangeLast = atoi(MemOPSizeRange.c_str()); - } - assert(MemOPSizeRangeLast >= MemOPSizeRangeStart); - - MemOPSizeLargeVal = MemOPSizeLarge; - if (MemOPSizeLargeVal == 0) - MemOPSizeLargeVal = INT64_MIN; -} diff --git a/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp b/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp index 2963d08752c..7bb62d2c845 100644 --- a/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp @@ -63,6 +63,7 @@ void llvm::initializeInstrumentation(PassRegistry &Registry) { initializePGOInstrumentationGenLegacyPassPass(Registry); initializePGOInstrumentationUseLegacyPassPass(Registry); initializePGOIndirectCallPromotionLegacyPassPass(Registry); + initializePGOMemOPSizeOptLegacyPassPass(Registry); initializeInstrProfilingLegacyPassPass(Registry); initializeMemorySanitizerPass(Registry); initializeThreadSanitizerPass(Registry); diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index 7241a2a2e00..990bcec109d 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -1038,21 +1038,6 @@ void PGOUseFunc::populateCounters() { DEBUG(FuncInfo.dumpInfo("after reading profile.")); } -static void setProfMetadata(Module *M, Instruction *TI, - ArrayRef<uint64_t> EdgeCounts, uint64_t MaxCount) { - MDBuilder MDB(M->getContext()); - assert(MaxCount > 0 && "Bad max count"); - uint64_t Scale = calculateCountScale(MaxCount); - SmallVector<unsigned, 4> Weights; - for (const auto &ECI : EdgeCounts) - Weights.push_back(scaleBranchCount(ECI, Scale)); - - DEBUG(dbgs() << "Weight is: "; - for (const auto &W : Weights) { dbgs() << W << " "; } - dbgs() << "\n";); - TI->setMetadata(llvm::LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); -} - // Assign the scaled count values to the BB with multiple out edges. void PGOUseFunc::setBranchWeights() { // Generate MD_prof metadata for every branch instruction. @@ -1426,6 +1411,21 @@ bool PGOInstrumentationUseLegacyPass::runOnModule(Module &M) { } namespace llvm { +void setProfMetadata(Module *M, Instruction *TI, ArrayRef<uint64_t> EdgeCounts, + uint64_t MaxCount) { + MDBuilder MDB(M->getContext()); + assert(MaxCount > 0 && "Bad max count"); + uint64_t Scale = calculateCountScale(MaxCount); + SmallVector<unsigned, 4> Weights; + for (const auto &ECI : EdgeCounts) + Weights.push_back(scaleBranchCount(ECI, Scale)); + + DEBUG(dbgs() << "Weight is: "; + for (const auto &W : Weights) { dbgs() << W << " "; } + dbgs() << "\n";); + TI->setMetadata(llvm::LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); +} + template <> struct GraphTraits<PGOUseFunc *> { typedef const BasicBlock *NodeRef; typedef succ_const_iterator ChildIteratorType; |