diff options
-rw-r--r-- | llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp | 37 | ||||
-rw-r--r-- | llvm/test/Other/opt-O2-pipeline.ll | 1 | ||||
-rw-r--r-- | llvm/test/Other/opt-O3-pipeline.ll | 1 | ||||
-rw-r--r-- | llvm/test/Transforms/PGOProfile/memop_clone.ll | 2 | ||||
-rw-r--r-- | llvm/test/Transforms/PGOProfile/memop_size_opt.ll | 8 | ||||
-rw-r--r-- | llvm/test/Transforms/PGOProfile/memop_size_opt_zero.ll | 4 |
6 files changed, 36 insertions, 17 deletions
diff --git a/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp b/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp index 73d419aa6cc..2c71e75dadc 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp @@ -25,6 +25,8 @@ #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/DomTreeUpdater.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstVisitor.h" @@ -112,6 +114,7 @@ private: AU.addRequired<BlockFrequencyInfoWrapperPass>(); AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); AU.addPreserved<GlobalsAAWrapperPass>(); + AU.addPreserved<DominatorTreeWrapperPass>(); } }; } // end anonymous namespace @@ -133,8 +136,8 @@ namespace { class MemOPSizeOpt : public InstVisitor<MemOPSizeOpt> { public: MemOPSizeOpt(Function &Func, BlockFrequencyInfo &BFI, - OptimizationRemarkEmitter &ORE) - : Func(Func), BFI(BFI), ORE(ORE), Changed(false) { + OptimizationRemarkEmitter &ORE, DominatorTree *DT) + : Func(Func), BFI(BFI), ORE(ORE), DT(DT), Changed(false) { ValueDataArray = llvm::make_unique<InstrProfValueData[]>(MemOPMaxVersion + 2); // Get the MemOPSize range information from option MemOPSizeRange, @@ -170,6 +173,7 @@ private: Function &Func; BlockFrequencyInfo &BFI; OptimizationRemarkEmitter &ORE; + DominatorTree *DT; bool Changed; std::vector<MemIntrinsic *> WorkList; // Start of the previse range. @@ -336,15 +340,16 @@ bool MemOPSizeOpt::perform(MemIntrinsic *MI) { LLVM_DEBUG(dbgs() << *BB << "\n"); auto OrigBBFreq = BFI.getBlockFreq(BB); - BasicBlock *DefaultBB = SplitBlock(BB, MI); + BasicBlock *DefaultBB = SplitBlock(BB, MI, DT); BasicBlock::iterator It(*MI); ++It; assert(It != DefaultBB->end()); - BasicBlock *MergeBB = SplitBlock(DefaultBB, &(*It)); + BasicBlock *MergeBB = SplitBlock(DefaultBB, &(*It), DT); MergeBB->setName("MemOP.Merge"); BFI.setBlockFreq(MergeBB, OrigBBFreq.getFrequency()); DefaultBB->setName("MemOP.Default"); + DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); auto &Ctx = Func.getContext(); IRBuilder<> IRB(BB); BB->getTerminator()->eraseFromParent(); @@ -361,6 +366,10 @@ bool MemOPSizeOpt::perform(MemIntrinsic *MI) { LLVM_DEBUG(dbgs() << "\n\n== Basic Block After==\n"); + std::vector<DominatorTree::UpdateType> Updates; + if (DT) + Updates.reserve(2 * SizeIds.size()); + for (uint64_t SizeId : SizeIds) { BasicBlock *CaseBB = BasicBlock::Create( Ctx, Twine("MemOP.Case.") + Twine(SizeId), &Func, DefaultBB); @@ -375,8 +384,15 @@ bool MemOPSizeOpt::perform(MemIntrinsic *MI) { IRBuilder<> IRBCase(CaseBB); IRBCase.CreateBr(MergeBB); SI->addCase(CaseSizeId, CaseBB); + if (DT) { + Updates.push_back({DominatorTree::Insert, CaseBB, MergeBB}); + Updates.push_back({DominatorTree::Insert, BB, CaseBB}); + } LLVM_DEBUG(dbgs() << *CaseBB << "\n"); } + DTU.applyUpdates(Updates); + Updates.clear(); + setProfMetadata(Func.getParent(), SI, CaseCounts, MaxCount); LLVM_DEBUG(dbgs() << *BB << "\n"); @@ -397,13 +413,14 @@ bool MemOPSizeOpt::perform(MemIntrinsic *MI) { } // namespace static bool PGOMemOPSizeOptImpl(Function &F, BlockFrequencyInfo &BFI, - OptimizationRemarkEmitter &ORE) { + OptimizationRemarkEmitter &ORE, + DominatorTree *DT) { if (DisableMemOPOPT) return false; if (F.hasFnAttribute(Attribute::OptimizeForSize)) return false; - MemOPSizeOpt MemOPSizeOpt(F, BFI, ORE); + MemOPSizeOpt MemOPSizeOpt(F, BFI, ORE, DT); MemOPSizeOpt.perform(); return MemOPSizeOpt.isChanged(); } @@ -412,7 +429,9 @@ bool PGOMemOPSizeOptLegacyPass::runOnFunction(Function &F) { BlockFrequencyInfo &BFI = getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI(); auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); - return PGOMemOPSizeOptImpl(F, BFI, ORE); + auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); + DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr; + return PGOMemOPSizeOptImpl(F, BFI, ORE, DT); } namespace llvm { @@ -422,11 +441,13 @@ PreservedAnalyses PGOMemOPSizeOpt::run(Function &F, FunctionAnalysisManager &FAM) { auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F); auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); - bool Changed = PGOMemOPSizeOptImpl(F, BFI, ORE); + auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(F); + bool Changed = PGOMemOPSizeOptImpl(F, BFI, ORE, DT); if (!Changed) return PreservedAnalyses::all(); auto PA = PreservedAnalyses(); PA.preserve<GlobalsAA>(); + PA.preserve<DominatorTreeAnalysis>(); return PA; } } // namespace llvm diff --git a/llvm/test/Other/opt-O2-pipeline.ll b/llvm/test/Other/opt-O2-pipeline.ll index 0aba51e0c66..2ebb6ed909f 100644 --- a/llvm/test/Other/opt-O2-pipeline.ll +++ b/llvm/test/Other/opt-O2-pipeline.ll @@ -80,7 +80,6 @@ ; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: PGOMemOPSize -; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) ; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Natural Loop Information diff --git a/llvm/test/Other/opt-O3-pipeline.ll b/llvm/test/Other/opt-O3-pipeline.ll index 442de37baef..d9ffc96d434 100644 --- a/llvm/test/Other/opt-O3-pipeline.ll +++ b/llvm/test/Other/opt-O3-pipeline.ll @@ -84,7 +84,6 @@ ; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: PGOMemOPSize -; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) ; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Natural Loop Information diff --git a/llvm/test/Transforms/PGOProfile/memop_clone.ll b/llvm/test/Transforms/PGOProfile/memop_clone.ll index 4b12e7c5b57..292dc10b5e8 100644 --- a/llvm/test/Transforms/PGOProfile/memop_clone.ll +++ b/llvm/test/Transforms/PGOProfile/memop_clone.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -pgo-memop-opt -S | FileCheck %s +; RUN: opt < %s -pgo-memop-opt -verify-dom-info -S | FileCheck %s define i32 @test(i8* %a, i8* %b) !prof !1 { ; CHECK_LABEL: test diff --git a/llvm/test/Transforms/PGOProfile/memop_size_opt.ll b/llvm/test/Transforms/PGOProfile/memop_size_opt.ll index 4a2487a0b09..69cdd81ddf7 100644 --- a/llvm/test/Transforms/PGOProfile/memop_size_opt.ll +++ b/llvm/test/Transforms/PGOProfile/memop_size_opt.ll @@ -1,8 +1,8 @@ -; RUN: opt < %s -pgo-memop-opt -pgo-memop-count-threshold=90 -pgo-memop-percent-threshold=15 -S | FileCheck %s --check-prefix=MEMOP_OPT -; RUN: opt < %s -passes=pgo-memop-opt -pgo-memop-count-threshold=90 -pgo-memop-percent-threshold=15 -S | FileCheck %s --check-prefix=MEMOP_OPT -; RUN: opt < %s -pgo-memop-opt -pgo-memop-count-threshold=90 -pgo-memop-percent-threshold=15 -pass-remarks-with-hotness -pass-remarks-output=%t.opt.yaml -S | FileCheck %s --check-prefix=MEMOP_OPT +; RUN: opt < %s -pgo-memop-opt -verify-dom-info -pgo-memop-count-threshold=90 -pgo-memop-percent-threshold=15 -S | FileCheck %s --check-prefix=MEMOP_OPT +; RUN: opt < %s -passes=pgo-memop-opt -verify-dom-info -pgo-memop-count-threshold=90 -pgo-memop-percent-threshold=15 -S | FileCheck %s --check-prefix=MEMOP_OPT +; RUN: opt < %s -pgo-memop-opt -verify-dom-info -pgo-memop-count-threshold=90 -pgo-memop-percent-threshold=15 -pass-remarks-with-hotness -pass-remarks-output=%t.opt.yaml -S | FileCheck %s --check-prefix=MEMOP_OPT ; RUN: FileCheck %s -input-file=%t.opt.yaml --check-prefix=YAML -; RUN: opt < %s -passes=pgo-memop-opt -pgo-memop-count-threshold=90 -pgo-memop-percent-threshold=15 -pass-remarks-with-hotness -pass-remarks-output=%t.opt.yaml -S | FileCheck %s --check-prefix=MEMOP_OPT +; RUN: opt < %s -passes=pgo-memop-opt -verify-dom-info -pgo-memop-count-threshold=90 -pgo-memop-percent-threshold=15 -pass-remarks-with-hotness -pass-remarks-output=%t.opt.yaml -S | FileCheck %s --check-prefix=MEMOP_OPT ; RUN: FileCheck %s -input-file=%t.opt.yaml --check-prefix=YAML diff --git a/llvm/test/Transforms/PGOProfile/memop_size_opt_zero.ll b/llvm/test/Transforms/PGOProfile/memop_size_opt_zero.ll index de48b622e46..43ca710534e 100644 --- a/llvm/test/Transforms/PGOProfile/memop_size_opt_zero.ll +++ b/llvm/test/Transforms/PGOProfile/memop_size_opt_zero.ll @@ -1,7 +1,7 @@ ; Test to ensure the pgo memop optimization pass doesn't try to scale ; up a value profile with a 0 count, which would lead to divide by 0. -; RUN: opt < %s -passes=pgo-memop-opt -pgo-memop-count-threshold=1 -S | FileCheck %s --check-prefix=MEMOP_OPT -; RUN: opt < %s -pgo-memop-opt -pgo-memop-count-threshold=1 -S | FileCheck %s --check-prefix=MEMOP_OPT +; RUN: opt < %s -passes=pgo-memop-opt -verify-dom-info -pgo-memop-count-threshold=1 -S | FileCheck %s --check-prefix=MEMOP_OPT +; RUN: opt < %s -pgo-memop-opt -verify-dom-info -pgo-memop-count-threshold=1 -S | FileCheck %s --check-prefix=MEMOP_OPT target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" |