diff options
-rw-r--r-- | llvm/include/llvm/InitializePasses.h | 1 | ||||
-rw-r--r-- | llvm/include/llvm/LinkAllPasses.h | 1 | ||||
-rw-r--r-- | llvm/include/llvm/Transforms/Scalar.h | 7 | ||||
-rw-r--r-- | llvm/include/llvm/Transforms/Scalar/GVN.h | 15 | ||||
-rw-r--r-- | llvm/lib/Passes/PassRegistry.def | 1 | ||||
-rw-r--r-- | llvm/lib/Transforms/IPO/PassManagerBuilder.cpp | 1 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/CMakeLists.txt | 1 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/GVNHoist.cpp | 820 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/Scalar.cpp | 5 | ||||
-rw-r--r-- | llvm/test/Transforms/GVN/hoist-pr20242.ll | 74 | ||||
-rw-r--r-- | llvm/test/Transforms/GVN/hoist-pr22005.ll | 30 | ||||
-rw-r--r-- | llvm/test/Transforms/GVN/hoist.ll | 691 |
12 files changed, 1643 insertions, 4 deletions
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 34b156e8270..6f8aae73696 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -120,6 +120,7 @@ void initializeEarlyIfConverterPass(PassRegistry&); void initializeEdgeBundlesPass(PassRegistry&); void initializeEfficiencySanitizerPass(PassRegistry&); void initializeEliminateAvailableExternallyLegacyPassPass(PassRegistry &); +void initializeGVNHoistLegacyPassPass(PassRegistry &); void initializeExpandISelPseudosPass(PassRegistry&); void initializeExpandPostRAPass(PassRegistry&); void initializeExternalAAWrapperPassPass(PassRegistry&); diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h index a12913e9999..b2721d0a1fd 100644 --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -160,6 +160,7 @@ namespace { (void) llvm::createConstantHoistingPass(); (void) llvm::createCodeGenPreparePass(); (void) llvm::createEarlyCSEPass(); + (void) llvm::createGVNHoistPass(); (void) llvm::createMergedLoadStoreMotionPass(); (void) llvm::createGVNPass(); (void) llvm::createMemCpyOptPass(); diff --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h index 040e95c0ebd..167cc94ec81 100644 --- a/llvm/include/llvm/Transforms/Scalar.h +++ b/llvm/include/llvm/Transforms/Scalar.h @@ -326,6 +326,13 @@ FunctionPass *createEarlyCSEPass(); //===----------------------------------------------------------------------===// // +// GVNHoist - This pass performs a simple and fast GVN pass over the dominator +// tree to hoist common expressions from sibling branches. +// +FunctionPass *createGVNHoistPass(); + +//===----------------------------------------------------------------------===// +// // MergedLoadStoreMotion - This pass merges loads and stores in diamonds. Loads // are hoisted into the header, while stores sink into the footer. // diff --git a/llvm/include/llvm/Transforms/Scalar/GVN.h b/llvm/include/llvm/Transforms/Scalar/GVN.h index 9a53ca3154e..3bb5ec39227 100644 --- a/llvm/include/llvm/Transforms/Scalar/GVN.h +++ b/llvm/include/llvm/Transforms/Scalar/GVN.h @@ -58,11 +58,7 @@ public: AliasAnalysis *getAliasAnalysis() const { return VN.getAliasAnalysis(); } MemoryDependenceResults &getMemDep() const { return *MD; } -private: - friend class gvn::GVNLegacyPass; - struct Expression; - friend struct DenseMapInfo<Expression>; /// This class holds the mapping between values and value numbers. It is used /// as an efficient mechanism to determine the expression-wise equivalence of @@ -104,6 +100,10 @@ private: void verifyRemoved(const Value *) const; }; +private: + friend class gvn::GVNLegacyPass; + friend struct DenseMapInfo<Expression>; + MemoryDependenceResults *MD; DominatorTree *DT; const TargetLibraryInfo *TLI; @@ -228,6 +228,13 @@ private: /// loads are eliminated by the pass. FunctionPass *createGVNPass(bool NoLoads = false); +/// \brief A simple and fast domtree-based GVN pass to hoist common expressions +/// from sibling branches. +struct GVNHoistPass : PassInfoMixin<GVNHoistPass> { + /// \brief Run the pass over the function. + PreservedAnalyses run(Function &F, AnalysisManager<Function> &AM); +}; + } #endif diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index ffa0864edc2..c5f9a3cb92e 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -133,6 +133,7 @@ FUNCTION_PASS("correlated-propagation", CorrelatedValuePropagationPass()) FUNCTION_PASS("dce", DCEPass()) FUNCTION_PASS("dse", DSEPass()) FUNCTION_PASS("early-cse", EarlyCSEPass()) +FUNCTION_PASS("gvn-hoist", GVNHoistPass()) FUNCTION_PASS("instcombine", InstCombinePass()) FUNCTION_PASS("instsimplify", InstSimplifierPass()) FUNCTION_PASS("invalidate<all>", InvalidateAllAnalysesPass()) diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index e283d26f5c1..c6feb133bd1 100644 --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -223,6 +223,7 @@ void PassManagerBuilder::populateFunctionPassManager( FPM.add(createCFGSimplificationPass()); FPM.add(createSROAPass()); FPM.add(createEarlyCSEPass()); + FPM.add(createGVNHoistPass()); FPM.add(createLowerExpectIntrinsicPass()); } diff --git a/llvm/lib/Transforms/Scalar/CMakeLists.txt b/llvm/lib/Transforms/Scalar/CMakeLists.txt index ac162039037..9f04344b8b0 100644 --- a/llvm/lib/Transforms/Scalar/CMakeLists.txt +++ b/llvm/lib/Transforms/Scalar/CMakeLists.txt @@ -12,6 +12,7 @@ add_llvm_library(LLVMScalarOpts Float2Int.cpp GuardWidening.cpp GVN.cpp + GVNHoist.cpp InductiveRangeCheckElimination.cpp IndVarSimplify.cpp JumpThreading.cpp diff --git a/llvm/lib/Transforms/Scalar/GVNHoist.cpp b/llvm/lib/Transforms/Scalar/GVNHoist.cpp new file mode 100644 index 00000000000..c0fd9e2d5f2 --- /dev/null +++ b/llvm/lib/Transforms/Scalar/GVNHoist.cpp @@ -0,0 +1,820 @@ +//===- GVNHoist.cpp - Hoist scalar and load expressions -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass hoists expressions from branches to a common dominator. It uses +// GVN (global value numbering) to discover expressions computing the same +// values. The primary goal is to reduce the code size, and in some +// cases reduce critical path (by exposing more ILP). +// Hoisting may affect the performance in some cases. To mitigate that, hoisting +// is disabled in the following cases. +// 1. Scalars across calls. +// 2. geps when corresponding load/store cannot be hoisted. +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/GVN.h" +#include "llvm/Transforms/Utils/MemorySSA.h" +#include <functional> +#include <unordered_map> +#include <vector> + +using namespace llvm; + +#define DEBUG_TYPE "gvn-hoist" + +STATISTIC(NumHoisted, "Number of instructions hoisted"); +STATISTIC(NumRemoved, "Number of instructions removed"); +STATISTIC(NumLoadsHoisted, "Number of loads hoisted"); +STATISTIC(NumLoadsRemoved, "Number of loads removed"); +STATISTIC(NumStoresHoisted, "Number of stores hoisted"); +STATISTIC(NumStoresRemoved, "Number of stores removed"); +STATISTIC(NumCallsHoisted, "Number of calls hoisted"); +STATISTIC(NumCallsRemoved, "Number of calls removed"); + +static cl::opt<int> + MaxHoistedThreshold("gvn-max-hoisted", cl::Hidden, cl::init(-1), + cl::desc("Max number of instructions to hoist " + "(default unlimited = -1)")); +static cl::opt<int> MaxNumberOfBBSInPath( + "gvn-hoist-max-bbs", cl::Hidden, cl::init(4), + cl::desc("Max number of basic blocks on the path between " + "hoisting locations (default = 4, unlimited = -1)")); + +static int HoistedCtr = 0; + +namespace { + +// Provides a sorting function based on the execution order of two instructions. +struct SortByDFSIn { +private: + DenseMap<const BasicBlock *, unsigned> &DFSNumber; + +public: + SortByDFSIn(DenseMap<const BasicBlock *, unsigned> &D) : DFSNumber(D) {} + + // Returns true when A executes before B. + bool operator()(const Instruction *A, const Instruction *B) const { + assert(A != B); + const BasicBlock *BA = A->getParent(); + const BasicBlock *BB = B->getParent(); + unsigned NA = DFSNumber[BA]; + unsigned NB = DFSNumber[BB]; + if (NA < NB) + return true; + if (NA == NB) { + // Sort them in the order they occur in the same basic block. + BasicBlock::const_iterator AI(A), BI(B); + return std::distance(AI, BI) < 0; + } + return false; + } +}; + +// A map from a VN (value number) to all the instructions with that VN. +typedef DenseMap<unsigned, SmallVector<Instruction *, 4>> VNtoInsns; + +// Records all scalar instructions candidate for code hoisting. +class InsnInfo { + VNtoInsns VNtoScalars; + +public: + // Inserts I and its value number in VNtoScalars. + void insert(Instruction *I, GVN::ValueTable &VN) { + // Scalar instruction. + unsigned V = VN.lookupOrAdd(I); + VNtoScalars[V].push_back(I); + } + + const VNtoInsns &getVNTable() const { return VNtoScalars; } +}; + +// Records all load instructions candidate for code hoisting. +class LoadInfo { + VNtoInsns VNtoLoads; + +public: + // Insert Load and the value number of its memory address in VNtoLoads. + void insert(LoadInst *Load, GVN::ValueTable &VN) { + if (Load->isSimple()) { + unsigned V = VN.lookupOrAdd(Load->getPointerOperand()); + VNtoLoads[V].push_back(Load); + } + } + + const VNtoInsns &getVNTable() const { return VNtoLoads; } +}; + +// Records all store instructions candidate for code hoisting. +class StoreInfo { + VNtoInsns VNtoStores; + +public: + // Insert the Store and a hash number of the store address and the stored + // value in VNtoStores. + void insert(StoreInst *Store, GVN::ValueTable &VN) { + if (!Store->isSimple()) + return; + // Hash the store address and the stored value. + Value *Ptr = Store->getPointerOperand(); + Value *Val = Store->getValueOperand(); + VNtoStores[hash_combine(VN.lookupOrAdd(Ptr), VN.lookupOrAdd(Val))] + .push_back(Store); + } + + const VNtoInsns &getVNTable() const { return VNtoStores; } +}; + +// Records all call instructions candidate for code hoisting. +class CallInfo { + VNtoInsns VNtoCallsScalars; + VNtoInsns VNtoCallsLoads; + VNtoInsns VNtoCallsStores; + +public: + // Insert Call and its value numbering in one of the VNtoCalls* containers. + void insert(CallInst *Call, GVN::ValueTable &VN) { + // A call that doesNotAccessMemory is handled as a Scalar, + // onlyReadsMemory will be handled as a Load instruction, + // all other calls will be handled as stores. + unsigned V = VN.lookupOrAdd(Call); + + if (Call->doesNotAccessMemory()) + VNtoCallsScalars[V].push_back(Call); + else if (Call->onlyReadsMemory()) + VNtoCallsLoads[V].push_back(Call); + else + VNtoCallsStores[V].push_back(Call); + } + + const VNtoInsns &getScalarVNTable() const { return VNtoCallsScalars; } + + const VNtoInsns &getLoadVNTable() const { return VNtoCallsLoads; } + + const VNtoInsns &getStoreVNTable() const { return VNtoCallsStores; } +}; + +typedef DenseMap<const BasicBlock *, bool> BBSideEffectsSet; +typedef SmallVector<Instruction *, 4> SmallVecInsn; +typedef SmallVectorImpl<Instruction *> SmallVecImplInsn; + +// This pass hoists common computations across branches sharing common +// dominator. The primary goal is to reduce the code size, and in some +// cases reduce critical path (by exposing more ILP). +class GVNHoist { +public: + GVN::ValueTable VN; + DominatorTree *DT; + AliasAnalysis *AA; + MemoryDependenceResults *MD; + const bool OptForMinSize; + DenseMap<const BasicBlock *, unsigned> DFSNumber; + BBSideEffectsSet BBSideEffects; + MemorySSA *MSSA; + enum InsKind { Unknown, Scalar, Load, Store }; + + GVNHoist(DominatorTree *Dt, AliasAnalysis *Aa, MemoryDependenceResults *Md, + bool OptForMinSize) + : DT(Dt), AA(Aa), MD(Md), OptForMinSize(OptForMinSize) {} + + // Return true when there are exception handling in BB. + bool hasEH(const BasicBlock *BB) { + auto It = BBSideEffects.find(BB); + if (It != BBSideEffects.end()) + return It->second; + + if (BB->isEHPad() || BB->hasAddressTaken()) { + BBSideEffects[BB] = true; + return true; + } + + if (BB->getTerminator()->mayThrow()) { + BBSideEffects[BB] = true; + return true; + } + + BBSideEffects[BB] = false; + return false; + } + + // Return true when all paths from A to the end of the function pass through + // either B or C. + bool hoistingFromAllPaths(const BasicBlock *A, const BasicBlock *B, + const BasicBlock *C) { + // We fully copy the WL in order to be able to remove items from it. + SmallPtrSet<const BasicBlock *, 2> WL; + WL.insert(B); + WL.insert(C); + + for (auto It = df_begin(A), E = df_end(A); It != E;) { + // There exists a path from A to the exit of the function if we are still + // iterating in DF traversal and we removed all instructions from the work + // list. + if (WL.empty()) + return false; + + const BasicBlock *BB = *It; + if (WL.erase(BB)) { + // Stop DFS traversal when BB is in the work list. + It.skipChildren(); + continue; + } + + // Check for end of function, calls that do not return, etc. + if (!isGuaranteedToTransferExecutionToSuccessor(BB->getTerminator())) + return false; + + // Increment DFS traversal when not skipping children. + ++It; + } + + return true; + } + + /* Return true when I1 appears before I2 in the instructions of BB. */ + bool firstInBB(BasicBlock *BB, const Instruction *I1, const Instruction *I2) { + for (Instruction &I : *BB) { + if (&I == I1) + return true; + if (&I == I2) + return false; + } + + llvm_unreachable("I1 and I2 not found in BB"); + } + // Return true when there are users of Def in BB. + bool hasMemoryUseOnPath(MemoryAccess *Def, const BasicBlock *BB, + const Instruction *OldPt) { + Value::user_iterator UI = Def->user_begin(); + Value::user_iterator UE = Def->user_end(); + const BasicBlock *DefBB = Def->getBlock(); + const BasicBlock *OldBB = OldPt->getParent(); + + for (; UI != UE; ++UI) + if (MemoryUse *U = dyn_cast<MemoryUse>(*UI)) { + BasicBlock *UBB = U->getBlock(); + // Only analyze uses in BB. + if (BB != UBB) + continue; + + // A use in the same block as the Def is on the path. + if (UBB == DefBB) { + assert(MSSA->locallyDominates(Def, U) && "def not dominating use"); + return true; + } + + if (UBB != OldBB) + return true; + + // It is only harmful to hoist when the use is before OldPt. + if (firstInBB(UBB, U->getMemoryInst(), OldPt)) + return true; + } + + return false; + } + + // Return true when there are exception handling or loads of memory Def + // between OldPt and NewPt. + + // Decrement by 1 NBBsOnAllPaths for each block between HoistPt and BB, and + // return true when the counter NBBsOnAllPaths reaces 0, except when it is + // initialized to -1 which is unlimited. + bool hasEHOrLoadsOnPath(const Instruction *NewPt, const Instruction *OldPt, + MemoryAccess *Def, int &NBBsOnAllPaths) { + const BasicBlock *NewBB = NewPt->getParent(); + const BasicBlock *OldBB = OldPt->getParent(); + assert(DT->dominates(NewBB, OldBB) && "invalid path"); + assert(DT->dominates(Def->getBlock(), NewBB) && + "def does not dominate new hoisting point"); + + // Walk all basic blocks reachable in depth-first iteration on the inverse + // CFG from OldBB to NewBB. These blocks are all the blocks that may be + // executed between the execution of NewBB and OldBB. Hoisting an expression + // from OldBB into NewBB has to be safe on all execution paths. + for (auto I = idf_begin(OldBB), E = idf_end(OldBB); I != E;) { + if (*I == NewBB) { + // Stop traversal when reaching HoistPt. + I.skipChildren(); + continue; + } + + // Impossible to hoist with exceptions on the path. + if (hasEH(*I)) + return true; + + // Check that we do not move a store past loads. + if (hasMemoryUseOnPath(Def, *I, OldPt)) + return true; + + // Stop walk once the limit is reached. + if (NBBsOnAllPaths == 0) + return true; + + // -1 is unlimited number of blocks on all paths. + if (NBBsOnAllPaths != -1) + --NBBsOnAllPaths; + + ++I; + } + + return false; + } + + // Return true when there are exception handling between HoistPt and BB. + // Decrement by 1 NBBsOnAllPaths for each block between HoistPt and BB, and + // return true when the counter NBBsOnAllPaths reaches 0, except when it is + // initialized to -1 which is unlimited. + bool hasEHOnPath(const BasicBlock *HoistPt, const BasicBlock *BB, + int &NBBsOnAllPaths) { + assert(DT->dominates(HoistPt, BB) && "Invalid path"); + + // Walk all basic blocks reachable in depth-first iteration on + // the inverse CFG from BBInsn to NewHoistPt. These blocks are all the + // blocks that may be executed between the execution of NewHoistPt and + // BBInsn. Hoisting an expression from BBInsn into NewHoistPt has to be safe + // on all execution paths. + for (auto I = idf_begin(BB), E = idf_end(BB); I != E;) { + if (*I == HoistPt) { + // Stop traversal when reaching NewHoistPt. + I.skipChildren(); + continue; + } + + // Impossible to hoist with exceptions on the path. + if (hasEH(*I)) + return true; + + // Stop walk once the limit is reached. + if (NBBsOnAllPaths == 0) + return true; + + // -1 is unlimited number of blocks on all paths. + if (NBBsOnAllPaths != -1) + --NBBsOnAllPaths; + + ++I; + } + + return false; + } + + // Return true when it is safe to hoist a memory load or store U from OldPt + // to NewPt. + bool safeToHoistLdSt(const Instruction *NewPt, const Instruction *OldPt, + MemoryUseOrDef *U, InsKind K, int &NBBsOnAllPaths) { + + // In place hoisting is safe. + if (NewPt == OldPt) + return true; + + const BasicBlock *NewBB = NewPt->getParent(); + const BasicBlock *OldBB = OldPt->getParent(); + const BasicBlock *UBB = U->getBlock(); + + // Check for dependences on the Memory SSA. + MemoryAccess *D = U->getDefiningAccess(); + BasicBlock *DBB = D->getBlock(); + if (DT->properlyDominates(NewBB, DBB)) + // Cannot move the load or store to NewBB above its definition in DBB. + return false; + + if (NewBB == DBB && !MSSA->isLiveOnEntryDef(D)) + if (MemoryUseOrDef *UD = dyn_cast<MemoryUseOrDef>(D)) + if (firstInBB(DBB, NewPt, UD->getMemoryInst())) + // Cannot move the load or store to NewPt above its definition in D. + return false; + + // Check for unsafe hoistings due to side effects. + if (K == InsKind::Store) { + if (hasEHOrLoadsOnPath(NewPt, OldPt, D, NBBsOnAllPaths)) + return false; + } else if (hasEHOnPath(NewBB, OldBB, NBBsOnAllPaths)) + return false; + + if (UBB == NewBB) { + if (DT->properlyDominates(DBB, NewBB)) + return true; + assert(UBB == DBB); + assert(MSSA->locallyDominates(D, U)); + } + + // No side effects: it is safe to hoist. + return true; + } + + // Return true when it is safe to hoist scalar instructions from BB1 and BB2 + // to HoistBB. + bool safeToHoistScalar(const BasicBlock *HoistBB, const BasicBlock *BB1, + const BasicBlock *BB2, int &NBBsOnAllPaths) { + // Check that the hoisted expression is needed on all paths. When HoistBB + // already contains an instruction to be hoisted, the expression is needed + // on all paths. Enable scalar hoisting at -Oz as it is safe to hoist + // scalars to a place where they are partially needed. + if (!OptForMinSize && BB1 != HoistBB && + !hoistingFromAllPaths(HoistBB, BB1, BB2)) + return false; + + if (hasEHOnPath(HoistBB, BB1, NBBsOnAllPaths) || + hasEHOnPath(HoistBB, BB2, NBBsOnAllPaths)) + return false; + + // Safe to hoist scalars from BB1 and BB2 to HoistBB. + return true; + } + + // Each element of a hoisting list contains the basic block where to hoist and + // a list of instructions to be hoisted. + typedef std::pair<BasicBlock *, SmallVecInsn> HoistingPointInfo; + typedef SmallVector<HoistingPointInfo, 4> HoistingPointList; + + // Partition InstructionsToHoist into a set of candidates which can share a + // common hoisting point. The partitions are collected in HPL. IsScalar is + // true when the instructions in InstructionsToHoist are scalars. IsLoad is + // true when the InstructionsToHoist are loads, false when they are stores. + void partitionCandidates(SmallVecImplInsn &InstructionsToHoist, + HoistingPointList &HPL, InsKind K) { + // No need to sort for two instructions. + if (InstructionsToHoist.size() > 2) { + SortByDFSIn Pred(DFSNumber); + std::sort(InstructionsToHoist.begin(), InstructionsToHoist.end(), Pred); + } + + int NBBsOnAllPaths = MaxNumberOfBBSInPath; + + SmallVecImplInsn::iterator II = InstructionsToHoist.begin(); + SmallVecImplInsn::iterator Start = II; + Instruction *HoistPt = *II; + BasicBlock *HoistBB = HoistPt->getParent(); + MemoryUseOrDef *UD; + if (K != InsKind::Scalar) + UD = cast<MemoryUseOrDef>(MSSA->getMemoryAccess(HoistPt)); + + for (++II; II != InstructionsToHoist.end(); ++II) { + Instruction *Insn = *II; + BasicBlock *BB = Insn->getParent(); + BasicBlock *NewHoistBB; + Instruction *NewHoistPt; + + if (BB == HoistBB) { + NewHoistBB = HoistBB; + NewHoistPt = firstInBB(BB, Insn, HoistPt) ? Insn : HoistPt; + } else { + NewHoistBB = DT->findNearestCommonDominator(HoistBB, BB); + if (NewHoistBB == BB) + NewHoistPt = Insn; + else if (NewHoistBB == HoistBB) + NewHoistPt = HoistPt; + else + NewHoistPt = NewHoistBB->getTerminator(); + } + + if (K == InsKind::Scalar) { + if (safeToHoistScalar(NewHoistBB, HoistBB, BB, NBBsOnAllPaths)) { + // Extend HoistPt to NewHoistPt. + HoistPt = NewHoistPt; + HoistBB = NewHoistBB; + continue; + } + } else { + // When NewBB already contains an instruction to be hoisted, the + // expression is needed on all paths. + // Check that the hoisted expression is needed on all paths: it is + // unsafe to hoist loads to a place where there may be a path not + // loading from the same address: for instance there may be a branch on + // which the address of the load may not be initialized. + if ((HoistBB == NewHoistBB || BB == NewHoistBB || + hoistingFromAllPaths(NewHoistBB, HoistBB, BB)) && + // Also check that it is safe to move the load or store from HoistPt + // to NewHoistPt, and from Insn to NewHoistPt. + safeToHoistLdSt(NewHoistPt, HoistPt, UD, K, NBBsOnAllPaths) && + safeToHoistLdSt(NewHoistPt, Insn, + cast<MemoryUseOrDef>(MSSA->getMemoryAccess(Insn)), + K, NBBsOnAllPaths)) { + // Extend HoistPt to NewHoistPt. + HoistPt = NewHoistPt; + HoistBB = NewHoistBB; + continue; + } + } + + // At this point it is not safe to extend the current hoisting to + // NewHoistPt: save the hoisting list so far. + if (std::distance(Start, II) > 1) + HPL.push_back(std::make_pair(HoistBB, SmallVecInsn(Start, II))); + + // Start over from BB. + Start = II; + if (K != InsKind::Scalar) + UD = cast<MemoryUseOrDef>(MSSA->getMemoryAccess(*Start)); + HoistPt = Insn; + HoistBB = BB; + NBBsOnAllPaths = MaxNumberOfBBSInPath; + } + + // Save the last partition. + if (std::distance(Start, II) > 1) + HPL.push_back(std::make_pair(HoistBB, SmallVecInsn(Start, II))); + } + + // Initialize HPL from Map. + void computeInsertionPoints(const VNtoInsns &Map, HoistingPointList &HPL, + InsKind K) { + for (VNtoInsns::const_iterator It = Map.begin(); It != Map.end(); ++It) { + if (MaxHoistedThreshold != -1 && ++HoistedCtr > MaxHoistedThreshold) + return; + + const SmallVecInsn &V = It->second; + if (V.size() < 2) + continue; + + // Compute the insertion point and the list of expressions to be hoisted. + SmallVecInsn InstructionsToHoist; + for (auto I : V) + if (!hasEH(I->getParent())) + InstructionsToHoist.push_back(I); + + if (InstructionsToHoist.size()) + partitionCandidates(InstructionsToHoist, HPL, K); + } + } + + // Return true when all operands of Instr are available at insertion point + // HoistPt. When limiting the number of hoisted expressions, one could hoist + // a load without hoisting its access function. So before hoisting any + // expression, make sure that all its operands are available at insert point. + bool allOperandsAvailable(const Instruction *I, + const BasicBlock *HoistPt) const { + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + const Value *Op = I->getOperand(i); + const Instruction *Inst = dyn_cast<Instruction>(Op); + if (Inst && !DT->dominates(Inst->getParent(), HoistPt)) + return false; + } + + return true; + } + + Instruction *firstOfTwo(Instruction *I, Instruction *J) const { + for (Instruction &I1 : *I->getParent()) + if (&I1 == I || &I1 == J) + return &I1; + llvm_unreachable("Both I and J must be from same BB"); + } + + // Replace the use of From with To in Insn. + void replaceUseWith(Instruction *Insn, Value *From, Value *To) const { + for (Value::use_iterator UI = From->use_begin(), UE = From->use_end(); + UI != UE;) { + Use &U = *UI++; + if (U.getUser() == Insn) { + U.set(To); + return; + } + } + llvm_unreachable("should replace exactly once"); + } + + bool makeOperandsAvailable(Instruction *Repl, BasicBlock *HoistPt) const { + // Check whether the GEP of a ld/st can be synthesized at HoistPt. + Instruction *Gep = nullptr; + Instruction *Val = nullptr; + if (LoadInst *Ld = dyn_cast<LoadInst>(Repl)) + Gep = dyn_cast<Instruction>(Ld->getPointerOperand()); + if (StoreInst *St = dyn_cast<StoreInst>(Repl)) { + Gep = dyn_cast<Instruction>(St->getPointerOperand()); + Val = dyn_cast<Instruction>(St->getValueOperand()); + } + + if (!Gep || !isa<GetElementPtrInst>(Gep)) + return false; + + // Check whether we can compute the Gep at HoistPt. + if (!allOperandsAvailable(Gep, HoistPt)) + return false; + + // Also check that the stored value is available. + if (Val && !allOperandsAvailable(Val, HoistPt)) + return false; + + // Copy the gep before moving the ld/st. + Instruction *ClonedGep = Gep->clone(); + ClonedGep->insertBefore(HoistPt->getTerminator()); + replaceUseWith(Repl, Gep, ClonedGep); + + // Also copy Val when it is a gep: geps are not hoisted by default. + if (Val && isa<GetElementPtrInst>(Val)) { + Instruction *ClonedVal = Val->clone(); + ClonedVal->insertBefore(HoistPt->getTerminator()); + replaceUseWith(Repl, Val, ClonedVal); + } + + return true; + } + + std::pair<unsigned, unsigned> hoist(HoistingPointList &HPL) { + unsigned NI = 0, NL = 0, NS = 0, NC = 0, NR = 0; + for (const HoistingPointInfo &HP : HPL) { + // Find out whether we already have one of the instructions in HoistPt, + // in which case we do not have to move it. + BasicBlock *HoistPt = HP.first; + const SmallVecInsn &InstructionsToHoist = HP.second; + Instruction *Repl = nullptr; + for (Instruction *I : InstructionsToHoist) + if (I->getParent() == HoistPt) { + // If there are two instructions in HoistPt to be hoisted in place: + // update Repl to be the first one, such that we can rename the uses + // of the second based on the first. + Repl = !Repl ? I : firstOfTwo(Repl, I); + } + + if (Repl) { + // Repl is already in HoistPt: it remains in place. + assert(allOperandsAvailable(Repl, HoistPt) && + "instruction depends on operands that are not available"); + } else { + // When we do not find Repl in HoistPt, select the first in the list + // and move it to HoistPt. + Repl = InstructionsToHoist.front(); + + // We can move Repl in HoistPt only when all operands are available. + // The order in which hoistings are done may influence the availability + // of operands. + if (!allOperandsAvailable(Repl, HoistPt) && + !makeOperandsAvailable(Repl, HoistPt)) + continue; + Repl->moveBefore(HoistPt->getTerminator()); + } + + if (isa<LoadInst>(Repl)) + ++NL; + else if (isa<StoreInst>(Repl)) + ++NS; + else if (isa<CallInst>(Repl)) + ++NC; + else // Scalar + ++NI; + + // Remove and rename all other instructions. + for (Instruction *I : InstructionsToHoist) + if (I != Repl) { + ++NR; + if (isa<LoadInst>(Repl)) + ++NumLoadsRemoved; + else if (isa<StoreInst>(Repl)) + ++NumStoresRemoved; + else if (isa<CallInst>(Repl)) + ++NumCallsRemoved; + I->replaceAllUsesWith(Repl); + I->eraseFromParent(); + } + } + + NumHoisted += NL + NS + NC + NI; + NumRemoved += NR; + NumLoadsHoisted += NL; + NumStoresHoisted += NS; + NumCallsHoisted += NC; + return {NI, NL + NC + NS}; + } + + // Hoist all expressions. Returns Number of scalars hoisted + // and number of non-scalars hoisted. + std::pair<unsigned, unsigned> hoistExpressions(Function &F) { + InsnInfo II; + LoadInfo LI; + StoreInfo SI; + CallInfo CI; + for (BasicBlock *BB : depth_first(&F.getEntryBlock())) { + for (Instruction &I1 : *BB) { + if (LoadInst *Load = dyn_cast<LoadInst>(&I1)) + LI.insert(Load, VN); + else if (StoreInst *Store = dyn_cast<StoreInst>(&I1)) + SI.insert(Store, VN); + else if (CallInst *Call = dyn_cast<CallInst>(&I1)) { + if (IntrinsicInst *Intr = dyn_cast<IntrinsicInst>(Call)) { + if (isa<DbgInfoIntrinsic>(Intr) || + Intr->getIntrinsicID() == Intrinsic::assume) + continue; + } + if (Call->mayHaveSideEffects()) { + if (!OptForMinSize) + break; + // We may continue hoisting across calls which write to memory. + if (Call->mayThrow()) + break; + } + CI.insert(Call, VN); + } else if (OptForMinSize || !isa<GetElementPtrInst>(&I1)) + // Do not hoist scalars past calls that may write to memory because + // that could result in spills later. geps are handled separately. + // TODO: We can relax this for targets like AArch64 as they have more + // registers than X86. + II.insert(&I1, VN); + } + } + + HoistingPointList HPL; + computeInsertionPoints(II.getVNTable(), HPL, InsKind::Scalar); + computeInsertionPoints(LI.getVNTable(), HPL, InsKind::Load); + computeInsertionPoints(SI.getVNTable(), HPL, InsKind::Store); + computeInsertionPoints(CI.getScalarVNTable(), HPL, InsKind::Scalar); + computeInsertionPoints(CI.getLoadVNTable(), HPL, InsKind::Load); + computeInsertionPoints(CI.getStoreVNTable(), HPL, InsKind::Store); + return hoist(HPL); + } + + bool run(Function &F) { + VN.setDomTree(DT); + VN.setAliasAnalysis(AA); + VN.setMemDep(MD); + bool Res = false; + + unsigned I = 0; + for (const BasicBlock *BB : depth_first(&F.getEntryBlock())) + DFSNumber.insert(std::make_pair(BB, ++I)); + + // FIXME: use lazy evaluation of VN to avoid the fix-point computation. + while (1) { + // FIXME: only compute MemorySSA once. We need to update the analysis in + // the same time as transforming the code. + MemorySSA M(F, AA, DT); + MSSA = &M; + + auto HoistStat = hoistExpressions(F); + if (HoistStat.first + HoistStat.second == 0) { + return Res; + } + if (HoistStat.second > 0) { + // To address a limitation of the current GVN, we need to rerun the + // hoisting after we hoisted loads in order to be able to hoist all + // scalars dependent on the hoisted loads. Same for stores. + VN.clear(); + } + Res = true; + } + + return Res; + } +}; + +class GVNHoistLegacyPass : public FunctionPass { +public: + static char ID; + + GVNHoistLegacyPass() : FunctionPass(ID) { + initializeGVNHoistLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override { + auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); + auto &MD = getAnalysis<MemoryDependenceWrapperPass>().getMemDep(); + + GVNHoist G(&DT, &AA, &MD, F.optForMinSize()); + return G.run(F); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<DominatorTreeWrapperPass>(); + AU.addRequired<AAResultsWrapperPass>(); + AU.addRequired<MemoryDependenceWrapperPass>(); + AU.addPreserved<DominatorTreeWrapperPass>(); + } +}; +} // namespace + +PreservedAnalyses GVNHoistPass::run(Function &F, + AnalysisManager<Function> &AM) { + DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F); + AliasAnalysis &AA = AM.getResult<AAManager>(F); + MemoryDependenceResults &MD = AM.getResult<MemoryDependenceAnalysis>(F); + + GVNHoist G(&DT, &AA, &MD, F.optForMinSize()); + if (!G.run(F)) + return PreservedAnalyses::all(); + + PreservedAnalyses PA; + PA.preserve<DominatorTreeAnalysis>(); + return PA; +} + +char GVNHoistLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(GVNHoistLegacyPass, "gvn-hoist", + "Early GVN Hoisting of Expressions", false, false) +INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_END(GVNHoistLegacyPass, "gvn-hoist", + "Early GVN Hoisting of Expressions", false, false) + +FunctionPass *llvm::createGVNHoistPass() { return new GVNHoistLegacyPass(); } diff --git a/llvm/lib/Transforms/Scalar/Scalar.cpp b/llvm/lib/Transforms/Scalar/Scalar.cpp index 13610fa55e4..e57fda31f24 100644 --- a/llvm/lib/Transforms/Scalar/Scalar.cpp +++ b/llvm/lib/Transforms/Scalar/Scalar.cpp @@ -44,6 +44,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeGuardWideningLegacyPassPass(Registry); initializeGVNLegacyPassPass(Registry); initializeEarlyCSELegacyPassPass(Registry); + initializeGVNHoistLegacyPassPass(Registry); initializeFlattenCFGPassPass(Registry); initializeInductiveRangeCheckEliminationPass(Registry); initializeIndVarSimplifyLegacyPassPass(Registry); @@ -236,6 +237,10 @@ void LLVMAddEarlyCSEPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createEarlyCSEPass()); } +void LLVMAddGVNHoistLegacyPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createGVNHoistPass()); +} + void LLVMAddTypeBasedAliasAnalysisPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createTypeBasedAAWrapperPass()); } diff --git a/llvm/test/Transforms/GVN/hoist-pr20242.ll b/llvm/test/Transforms/GVN/hoist-pr20242.ll new file mode 100644 index 00000000000..b91f18a5cd6 --- /dev/null +++ b/llvm/test/Transforms/GVN/hoist-pr20242.ll @@ -0,0 +1,74 @@ +; RUN: opt -gvn-hoist -S < %s | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Check that all "or" expressions are hoisted. +; CHECK-LABEL: @encode +; CHECK: or i32 +; CHECK-NOT: or i32 + +define i8* @encode(i8* %p, i32 %v) { +entry: + %p.addr = alloca i8*, align 8 + %v.addr = alloca i32, align 4 + store i8* %p, i8** %p.addr, align 8 + store i32 %v, i32* %v.addr, align 4 + %0 = load i32, i32* %v.addr, align 4 + %cmp = icmp ult i32 %0, 23 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + %1 = load i32, i32* %v.addr, align 4 + %or = or i32 %1, 128 + %conv = trunc i32 %or to i8 + %2 = load i8*, i8** %p.addr, align 8 + %incdec.ptr = getelementptr inbounds i8, i8* %2, i32 1 + store i8* %incdec.ptr, i8** %p.addr, align 8 + store i8 %conv, i8* %2, align 1 + br label %if.end15 + +if.else: ; preds = %entry + %3 = load i32, i32* %v.addr, align 4 + %cmp1 = icmp ult i32 %3, 42 + br i1 %cmp1, label %if.then3, label %if.else9 + +if.then3: ; preds = %if.else + %4 = load i32, i32* %v.addr, align 4 + %or4 = or i32 %4, 128 + %conv5 = trunc i32 %or4 to i8 + %5 = load i8*, i8** %p.addr, align 8 + %incdec.ptr6 = getelementptr inbounds i8, i8* %5, i32 1 + store i8* %incdec.ptr6, i8** %p.addr, align 8 + store i8 %conv5, i8* %5, align 1 + %6 = load i32, i32* %v.addr, align 4 + %conv7 = trunc i32 %6 to i8 + %7 = load i8*, i8** %p.addr, align 8 + %incdec.ptr8 = getelementptr inbounds i8, i8* %7, i32 1 + store i8* %incdec.ptr8, i8** %p.addr, align 8 + store i8 %conv7, i8* %7, align 1 + br label %if.end + +if.else9: ; preds = %if.else + %8 = load i32, i32* %v.addr, align 4 + %or10 = or i32 %8, 128 + %conv11 = trunc i32 %or10 to i8 + %9 = load i8*, i8** %p.addr, align 8 + %incdec.ptr12 = getelementptr inbounds i8, i8* %9, i32 1 + store i8* %incdec.ptr12, i8** %p.addr, align 8 + store i8 %conv11, i8* %9, align 1 + %10 = load i32, i32* %v.addr, align 4 + %shr = lshr i32 %10, 7 + %conv13 = trunc i32 %shr to i8 + %11 = load i8*, i8** %p.addr, align 8 + %incdec.ptr14 = getelementptr inbounds i8, i8* %11, i32 1 + store i8* %incdec.ptr14, i8** %p.addr, align 8 + store i8 %conv13, i8* %11, align 1 + br label %if.end + +if.end: ; preds = %if.else9, %if.then3 + br label %if.end15 + +if.end15: ; preds = %if.end, %if.then + %12 = load i8*, i8** %p.addr, align 8 + ret i8* %12 +} diff --git a/llvm/test/Transforms/GVN/hoist-pr22005.ll b/llvm/test/Transforms/GVN/hoist-pr22005.ll new file mode 100644 index 00000000000..9299f4f48e5 --- /dev/null +++ b/llvm/test/Transforms/GVN/hoist-pr22005.ll @@ -0,0 +1,30 @@ +; RUN: opt -gvn-hoist -S < %s | FileCheck %s +target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Check that all "sub" expressions are hoisted. +; CHECK-LABEL: @fun +; CHECK: sub i64 +; CHECK-NOT: sub i64 + +define i64 @fun(i8* %out, i8* %end) { + %1 = icmp ult i8* %out, %end + br i1 %1, label %2, label %6 + +; <label>:2 ; preds = %0 + %3 = ptrtoint i8* %end to i64 + %4 = ptrtoint i8* %out to i64 + %5 = sub i64 %3, %4 + br label %10 + +; <label>:6 ; preds = %0 + %7 = ptrtoint i8* %out to i64 + %8 = ptrtoint i8* %end to i64 + %9 = sub i64 %8, %7 + br label %10 + +; <label>:10 ; preds = %6, %2 + %.in = phi i64 [ %5, %2 ], [ %9, %6 ] + %11 = add i64 %.in, 257 + ret i64 %11 +} diff --git a/llvm/test/Transforms/GVN/hoist.ll b/llvm/test/Transforms/GVN/hoist.ll new file mode 100644 index 00000000000..375dd9193f7 --- /dev/null +++ b/llvm/test/Transforms/GVN/hoist.ll @@ -0,0 +1,691 @@ +; RUN: opt -gvn-hoist -S < %s | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@GlobalVar = internal global float 1.000000e+00 + +; Check that all scalar expressions are hoisted. +; +; CHECK-LABEL: @scalarsHoisting +; CHECK: fsub +; CHECK: fmul +; CHECK: fsub +; CHECK: fmul +; CHECK-NOT: fmul +; CHECK-NOT: fsub +define float @scalarsHoisting(float %d, float %min, float %max, float %a) { +entry: + %div = fdiv float 1.000000e+00, %d + %cmp = fcmp oge float %div, 0.000000e+00 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + %sub = fsub float %min, %a + %mul = fmul float %sub, %div + %sub1 = fsub float %max, %a + %mul2 = fmul float %sub1, %div + br label %if.end + +if.else: ; preds = %entry + %sub3 = fsub float %max, %a + %mul4 = fmul float %sub3, %div + %sub5 = fsub float %min, %a + %mul6 = fmul float %sub5, %div + br label %if.end + +if.end: ; preds = %if.else, %if.then + %tmax.0 = phi float [ %mul2, %if.then ], [ %mul6, %if.else ] + %tmin.0 = phi float [ %mul, %if.then ], [ %mul4, %if.else ] + %add = fadd float %tmax.0, %tmin.0 + ret float %add +} + +; Check that all loads and scalars depending on the loads are hoisted. +; Check that getelementptr computation gets hoisted before the load. +; +; CHECK-LABEL: @readsAndScalarsHoisting +; CHECK: load +; CHECK: load +; CHECK: load +; CHECK: fsub +; CHECK: fmul +; CHECK: fsub +; CHECK: fmul +; CHECK-NOT: load +; CHECK-NOT: fmul +; CHECK-NOT: fsub +define float @readsAndScalarsHoisting(float %d, float* %min, float* %max, float* %a) { +entry: + %div = fdiv float 1.000000e+00, %d + %cmp = fcmp oge float %div, 0.000000e+00 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + %A = getelementptr float, float* %min, i32 1 + %0 = load float, float* %A, align 4 + %1 = load float, float* %a, align 4 + %sub = fsub float %0, %1 + %mul = fmul float %sub, %div + %2 = load float, float* %max, align 4 + %sub1 = fsub float %2, %1 + %mul2 = fmul float %sub1, %div + br label %if.end + +if.else: ; preds = %entry + %3 = load float, float* %max, align 4 + %4 = load float, float* %a, align 4 + %sub3 = fsub float %3, %4 + %mul4 = fmul float %sub3, %div + %B = getelementptr float, float* %min, i32 1 + %5 = load float, float* %B, align 4 + %sub5 = fsub float %5, %4 + %mul6 = fmul float %sub5, %div + br label %if.end + +if.end: ; preds = %if.else, %if.then + %tmax.0 = phi float [ %mul2, %if.then ], [ %mul6, %if.else ] + %tmin.0 = phi float [ %mul, %if.then ], [ %mul4, %if.else ] + %add = fadd float %tmax.0, %tmin.0 + ret float %add +} + +; Check that we do not hoist loads after a store: the first two loads will be +; hoisted, and then the third load will not be hoisted. +; +; CHECK-LABEL: @readsAndWrites +; CHECK: load +; CHECK: load +; CHECK: fsub +; CHECK: fmul +; CHECK: store +; CHECK: load +; CHECK: fsub +; CHECK: fmul +; CHECK: load +; CHECK: fsub +; CHECK: fmul +; CHECK-NOT: load +; CHECK-NOT: fmul +; CHECK-NOT: fsub +define float @readsAndWrites(float %d, float* %min, float* %max, float* %a) { +entry: + %div = fdiv float 1.000000e+00, %d + %cmp = fcmp oge float %div, 0.000000e+00 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + %0 = load float, float* %min, align 4 + %1 = load float, float* %a, align 4 + store float %0, float* @GlobalVar + %sub = fsub float %0, %1 + %mul = fmul float %sub, %div + %2 = load float, float* %max, align 4 + %sub1 = fsub float %2, %1 + %mul2 = fmul float %sub1, %div + br label %if.end + +if.else: ; preds = %entry + %3 = load float, float* %max, align 4 + %4 = load float, float* %a, align 4 + %sub3 = fsub float %3, %4 + %mul4 = fmul float %sub3, %div + %5 = load float, float* %min, align 4 + %sub5 = fsub float %5, %4 + %mul6 = fmul float %sub5, %div + br label %if.end + +if.end: ; preds = %if.else, %if.then + %tmax.0 = phi float [ %mul2, %if.then ], [ %mul6, %if.else ] + %tmin.0 = phi float [ %mul, %if.then ], [ %mul4, %if.else ] + %add = fadd float %tmax.0, %tmin.0 + ret float %add +} + +; Check that we do hoist loads when the store is above the insertion point. +; +; CHECK-LABEL: @readsAndWriteAboveInsertPt +; CHECK: load +; CHECK: load +; CHECK: load +; CHECK: fsub +; CHECK: fsub +; CHECK: fmul +; CHECK: fmul +; CHECK-NOT: load +; CHECK-NOT: fmul +; CHECK-NOT: fsub +define float @readsAndWriteAboveInsertPt(float %d, float* %min, float* %max, float* %a) { +entry: + %div = fdiv float 1.000000e+00, %d + store float 0.000000e+00, float* @GlobalVar + %cmp = fcmp oge float %div, 0.000000e+00 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + %0 = load float, float* %min, align 4 + %1 = load float, float* %a, align 4 + %sub = fsub float %0, %1 + %mul = fmul float %sub, %div + %2 = load float, float* %max, align 4 + %sub1 = fsub float %2, %1 + %mul2 = fmul float %sub1, %div + br label %if.end + +if.else: ; preds = %entry + %3 = load float, float* %max, align 4 + %4 = load float, float* %a, align 4 + %sub3 = fsub float %3, %4 + %mul4 = fmul float %sub3, %div + %5 = load float, float* %min, align 4 + %sub5 = fsub float %5, %4 + %mul6 = fmul float %sub5, %div + br label %if.end + +if.end: ; preds = %if.else, %if.then + %tmax.0 = phi float [ %mul2, %if.then ], [ %mul6, %if.else ] + %tmin.0 = phi float [ %mul, %if.then ], [ %mul4, %if.else ] + %add = fadd float %tmax.0, %tmin.0 + ret float %add +} + +; Check that dependent expressions are hoisted. +; CHECK-LABEL: @dependentScalarsHoisting +; CHECK: fsub +; CHECK: fadd +; CHECK: fdiv +; CHECK: fmul +; CHECK-NOT: fsub +; CHECK-NOT: fadd +; CHECK-NOT: fdiv +; CHECK-NOT: fmul +define float @dependentScalarsHoisting(float %a, float %b, i1 %c) { +entry: + br i1 %c, label %if.then, label %if.else + +if.then: + %d = fsub float %b, %a + %e = fadd float %d, %a + %f = fdiv float %e, %a + %g = fmul float %f, %a + br label %if.end + +if.else: + %h = fsub float %b, %a + %i = fadd float %h, %a + %j = fdiv float %i, %a + %k = fmul float %j, %a + br label %if.end + +if.end: + %r = phi float [ %g, %if.then ], [ %k, %if.else ] + ret float %r +} + +; Check that all independent expressions are hoisted. +; CHECK-LABEL: @independentScalarsHoisting +; CHECK: fmul +; CHECK: fadd +; CHECK: fdiv +; CHECK: fsub +; CHECK-NOT: fsub +; CHECK-NOT: fdiv +; CHECK-NOT: fmul +define float @independentScalarsHoisting(float %a, float %b, i1 %c) { +entry: + br i1 %c, label %if.then, label %if.else + +if.then: + %d = fadd float %b, %a + %e = fsub float %b, %a + %f = fdiv float %b, %a + %g = fmul float %b, %a + br label %if.end + +if.else: + %i = fadd float %b, %a + %h = fsub float %b, %a + %j = fdiv float %b, %a + %k = fmul float %b, %a + br label %if.end + +if.end: + %p = phi float [ %d, %if.then ], [ %i, %if.else ] + %q = phi float [ %e, %if.then ], [ %h, %if.else ] + %r = phi float [ %f, %if.then ], [ %j, %if.else ] + %s = phi float [ %g, %if.then ], [ %k, %if.else ] + %t = fadd float %p, %q + %u = fadd float %r, %s + %v = fadd float %t, %u + ret float %v +} + +; Check that we hoist load and scalar expressions in triangles. +; CHECK-LABEL: @triangleHoisting +; CHECK: load +; CHECK: load +; CHECK: load +; CHECK: fsub +; CHECK: fsub +; CHECK: fmul +; CHECK: fmul +; CHECK-NOT: load +; CHECK-NOT: fmul +; CHECK-NOT: fsub +define float @triangleHoisting(float %d, float* %min, float* %max, float* %a) { +entry: + %div = fdiv float 1.000000e+00, %d + %cmp = fcmp oge float %div, 0.000000e+00 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %0 = load float, float* %min, align 4 + %1 = load float, float* %a, align 4 + %sub = fsub float %0, %1 + %mul = fmul float %sub, %div + %2 = load float, float* %max, align 4 + %sub1 = fsub float %2, %1 + %mul2 = fmul float %sub1, %div + br label %if.end + +if.end: ; preds = %entry + %p1 = phi float [ %mul2, %if.then ], [ 0.000000e+00, %entry ] + %p2 = phi float [ %mul, %if.then ], [ 0.000000e+00, %entry ] + %3 = load float, float* %max, align 4 + %4 = load float, float* %a, align 4 + %sub3 = fsub float %3, %4 + %mul4 = fmul float %sub3, %div + %5 = load float, float* %min, align 4 + %sub5 = fsub float %5, %4 + %mul6 = fmul float %sub5, %div + + %x = fadd float %p1, %mul6 + %y = fadd float %p2, %mul4 + %z = fadd float %x, %y + ret float %z +} + +; Check that we hoist load and scalar expressions in dominator. +; CHECK-LABEL: @dominatorHoisting +; CHECK: load +; CHECK: load +; CHECK: fsub +; CHECK: fmul +; CHECK: load +; CHECK: fsub +; CHECK: fmul +; CHECK-NOT: load +; CHECK-NOT: fmul +; CHECK-NOT: fsub +define float @dominatorHoisting(float %d, float* %min, float* %max, float* %a) { +entry: + %div = fdiv float 1.000000e+00, %d + %0 = load float, float* %min, align 4 + %1 = load float, float* %a, align 4 + %sub = fsub float %0, %1 + %mul = fmul float %sub, %div + %2 = load float, float* %max, align 4 + %sub1 = fsub float %2, %1 + %mul2 = fmul float %sub1, %div + %cmp = fcmp oge float %div, 0.000000e+00 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %3 = load float, float* %max, align 4 + %4 = load float, float* %a, align 4 + %sub3 = fsub float %3, %4 + %mul4 = fmul float %sub3, %div + %5 = load float, float* %min, align 4 + %sub5 = fsub float %5, %4 + %mul6 = fmul float %sub5, %div + br label %if.end + +if.end: ; preds = %entry + %p1 = phi float [ %mul4, %if.then ], [ 0.000000e+00, %entry ] + %p2 = phi float [ %mul6, %if.then ], [ 0.000000e+00, %entry ] + + %x = fadd float %p1, %mul2 + %y = fadd float %p2, %mul + %z = fadd float %x, %y + ret float %z +} + +; Check that we hoist load and scalar expressions in dominator. +; CHECK-LABEL: @domHoisting +; CHECK: load +; CHECK: load +; CHECK: fsub +; CHECK: fmul +; CHECK: load +; CHECK: fsub +; CHECK: fmul +; CHECK-NOT: load +; CHECK-NOT: fmul +; CHECK-NOT: fsub +define float @domHoisting(float %d, float* %min, float* %max, float* %a) { +entry: + %div = fdiv float 1.000000e+00, %d + %0 = load float, float* %min, align 4 + %1 = load float, float* %a, align 4 + %sub = fsub float %0, %1 + %mul = fmul float %sub, %div + %2 = load float, float* %max, align 4 + %sub1 = fsub float %2, %1 + %mul2 = fmul float %sub1, %div + %cmp = fcmp oge float %div, 0.000000e+00 + br i1 %cmp, label %if.then, label %if.else + +if.then: + %3 = load float, float* %max, align 4 + %4 = load float, float* %a, align 4 + %sub3 = fsub float %3, %4 + %mul4 = fmul float %sub3, %div + %5 = load float, float* %min, align 4 + %sub5 = fsub float %5, %4 + %mul6 = fmul float %sub5, %div + br label %if.end + +if.else: + %6 = load float, float* %max, align 4 + %7 = load float, float* %a, align 4 + %sub9 = fsub float %6, %7 + %mul10 = fmul float %sub9, %div + %8 = load float, float* %min, align 4 + %sub12 = fsub float %8, %7 + %mul13 = fmul float %sub12, %div + br label %if.end + +if.end: + %p1 = phi float [ %mul4, %if.then ], [ %mul10, %if.else ] + %p2 = phi float [ %mul6, %if.then ], [ %mul13, %if.else ] + + %x = fadd float %p1, %mul2 + %y = fadd float %p2, %mul + %z = fadd float %x, %y + ret float %z +} + +; Check that we do not hoist loads past stores within a same basic block. +; CHECK-LABEL: @noHoistInSingleBBWithStore +; CHECK: load +; CHECK: store +; CHECK: load +; CHECK: store +define i32 @noHoistInSingleBBWithStore() { +entry: + %D = alloca i32, align 4 + %0 = bitcast i32* %D to i8* + %bf = load i8, i8* %0, align 4 + %bf.clear = and i8 %bf, -3 + store i8 %bf.clear, i8* %0, align 4 + %bf1 = load i8, i8* %0, align 4 + %bf.clear1 = and i8 %bf1, 1 + store i8 %bf.clear1, i8* %0, align 4 + ret i32 0 +} + +; Check that we do not hoist loads past calls within a same basic block. +; CHECK-LABEL: @noHoistInSingleBBWithCall +; CHECK: load +; CHECK: call +; CHECK: load +declare void @foo() +define i32 @noHoistInSingleBBWithCall() { +entry: + %D = alloca i32, align 4 + %0 = bitcast i32* %D to i8* + %bf = load i8, i8* %0, align 4 + %bf.clear = and i8 %bf, -3 + call void @foo() + %bf1 = load i8, i8* %0, align 4 + %bf.clear1 = and i8 %bf1, 1 + ret i32 0 +} + +; Check that we do not hoist loads past stores in any branch of a diamond. +; CHECK-LABEL: @noHoistInDiamondWithOneStore1 +; CHECK: fdiv +; CHECK: fcmp +; CHECK: br +define float @noHoistInDiamondWithOneStore1(float %d, float* %min, float* %max, float* %a) { +entry: + %div = fdiv float 1.000000e+00, %d + %cmp = fcmp oge float %div, 0.000000e+00 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + store float 0.000000e+00, float* @GlobalVar + %0 = load float, float* %min, align 4 + %1 = load float, float* %a, align 4 + %sub = fsub float %0, %1 + %mul = fmul float %sub, %div + %2 = load float, float* %max, align 4 + %sub1 = fsub float %2, %1 + %mul2 = fmul float %sub1, %div + br label %if.end + +if.else: ; preds = %entry + ; There are no side effects on the if.else branch. + %3 = load float, float* %max, align 4 + %4 = load float, float* %a, align 4 + %sub3 = fsub float %3, %4 + %mul4 = fmul float %sub3, %div + %5 = load float, float* %min, align 4 + %sub5 = fsub float %5, %4 + %mul6 = fmul float %sub5, %div + br label %if.end + +if.end: ; preds = %if.else, %if.then + %tmax.0 = phi float [ %mul2, %if.then ], [ %mul6, %if.else ] + %tmin.0 = phi float [ %mul, %if.then ], [ %mul4, %if.else ] + + %6 = load float, float* %max, align 4 + %7 = load float, float* %a, align 4 + %sub6 = fsub float %6, %7 + %mul7 = fmul float %sub6, %div + %8 = load float, float* %min, align 4 + %sub8 = fsub float %8, %7 + %mul9 = fmul float %sub8, %div + + %add = fadd float %tmax.0, %tmin.0 + ret float %add +} + +; Check that we do not hoist loads past stores from half diamond. +; CHECK-LABEL: @noHoistInHalfDiamondPastStore +; CHECK: load +; CHECK-NEXT: load +; CHECK-NEXT: store +; CHECK-NEXT: br +; CHECK: load +; CHECK: load +; CHECK: load +; CHECK: br +define float @noHoistInHalfDiamondPastStore(float %d, float* %min, float* %max, float* %a) { +entry: + %div = fdiv float 1.000000e+00, %d + %cmp = fcmp oge float %div, 0.000000e+00 + %0 = load float, float* %min, align 4 + %1 = load float, float* %a, align 4 + + ; Loads should not be hoisted above this store. + store float 0.000000e+00, float* @GlobalVar + + br i1 %cmp, label %if.then, label %if.end + +if.then: + ; There are no side effects on the if.then branch. + %2 = load float, float* %max, align 4 + %3 = load float, float* %a, align 4 + %sub3 = fsub float %2, %3 + %mul4 = fmul float %sub3, %div + %4 = load float, float* %min, align 4 + %sub5 = fsub float %4, %3 + %mul6 = fmul float %sub5, %div + br label %if.end + +if.end: + %tmax.0 = phi float [ %mul4, %if.then ], [ %0, %entry ] + %tmin.0 = phi float [ %mul6, %if.then ], [ %1, %entry ] + + %add = fadd float %tmax.0, %tmin.0 + ret float %add +} + +; Check that we do not hoist loads past a store in any branch of a diamond. +; CHECK-LABEL: @noHoistInDiamondWithOneStore2 +; CHECK: fdiv +; CHECK: fcmp +; CHECK: br +define float @noHoistInDiamondWithOneStore2(float %d, float* %min, float* %max, float* %a) { +entry: + %div = fdiv float 1.000000e+00, %d + %cmp = fcmp oge float %div, 0.000000e+00 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + ; There are no side effects on the if.then branch. + %0 = load float, float* %min, align 4 + %1 = load float, float* %a, align 4 + %sub = fsub float %0, %1 + %mul = fmul float %sub, %div + %2 = load float, float* %max, align 4 + %sub1 = fsub float %2, %1 + %mul2 = fmul float %sub1, %div + br label %if.end + +if.else: ; preds = %entry + store float 0.000000e+00, float* @GlobalVar + %3 = load float, float* %max, align 4 + %4 = load float, float* %a, align 4 + %sub3 = fsub float %3, %4 + %mul4 = fmul float %sub3, %div + %5 = load float, float* %min, align 4 + %sub5 = fsub float %5, %4 + %mul6 = fmul float %sub5, %div + br label %if.end + +if.end: ; preds = %if.else, %if.then + %tmax.0 = phi float [ %mul2, %if.then ], [ %mul6, %if.else ] + %tmin.0 = phi float [ %mul, %if.then ], [ %mul4, %if.else ] + + %6 = load float, float* %max, align 4 + %7 = load float, float* %a, align 4 + %sub6 = fsub float %6, %7 + %mul7 = fmul float %sub6, %div + %8 = load float, float* %min, align 4 + %sub8 = fsub float %8, %7 + %mul9 = fmul float %sub8, %div + + %add = fadd float %tmax.0, %tmin.0 + ret float %add +} + +; Check that we do not hoist loads outside a loop containing stores. +; CHECK-LABEL: @noHoistInLoopsWithStores +; CHECK: fdiv +; CHECK: fcmp +; CHECK: br +define float @noHoistInLoopsWithStores(float %d, float* %min, float* %max, float* %a) { +entry: + %div = fdiv float 1.000000e+00, %d + %cmp = fcmp oge float %div, 0.000000e+00 + br i1 %cmp, label %do.body, label %if.else + +do.body: + %0 = load float, float* %min, align 4 + %1 = load float, float* %a, align 4 + + ; It is unsafe to hoist the loads outside the loop because of the store. + store float 0.000000e+00, float* @GlobalVar + + %sub = fsub float %0, %1 + %mul = fmul float %sub, %div + %2 = load float, float* %max, align 4 + %sub1 = fsub float %2, %1 + %mul2 = fmul float %sub1, %div + br label %while.cond + +while.cond: + %cmp1 = fcmp oge float %mul2, 0.000000e+00 + br i1 %cmp1, label %if.end, label %do.body + +if.else: + %3 = load float, float* %max, align 4 + %4 = load float, float* %a, align 4 + %sub3 = fsub float %3, %4 + %mul4 = fmul float %sub3, %div + %5 = load float, float* %min, align 4 + %sub5 = fsub float %5, %4 + %mul6 = fmul float %sub5, %div + br label %if.end + +if.end: + %tmax.0 = phi float [ %mul2, %while.cond ], [ %mul6, %if.else ] + %tmin.0 = phi float [ %mul, %while.cond ], [ %mul4, %if.else ] + + %add = fadd float %tmax.0, %tmin.0 + ret float %add +} + +; Check that we hoist stores: all the instructions from the then branch +; should be hoisted. +; CHECK-LABEL: @hoistStores +; CHECK: zext +; CHECK: trunc +; CHECK: getelementptr +; CHECK: load +; CHECK: getelementptr +; CHECK: store +; CHECK: load +; CHECK: load +; CHECK: zext +; CHECK: add +; CHECK: store +; CHECK: br +; CHECK: if.then +; CHECK: br + +%struct.foo = type { i16* } + +define void @hoistStores(%struct.foo* %s, i32* %coord, i1 zeroext %delta) { +entry: + %frombool = zext i1 %delta to i8 + %tobool = trunc i8 %frombool to i1 + br i1 %tobool, label %if.then, label %if.else + +if.then: ; preds = %entry + %p = getelementptr inbounds %struct.foo, %struct.foo* %s, i32 0, i32 0 + %0 = load i16*, i16** %p, align 8 + %incdec.ptr = getelementptr inbounds i16, i16* %0, i32 1 + store i16* %incdec.ptr, i16** %p, align 8 + %1 = load i16, i16* %0, align 2 + %conv = zext i16 %1 to i32 + %2 = load i32, i32* %coord, align 4 + %add = add i32 %2, %conv + store i32 %add, i32* %coord, align 4 + br label %if.end + +if.else: ; preds = %entry + %p1 = getelementptr inbounds %struct.foo, %struct.foo* %s, i32 0, i32 0 + %3 = load i16*, i16** %p1, align 8 + %incdec.ptr2 = getelementptr inbounds i16, i16* %3, i32 1 + store i16* %incdec.ptr2, i16** %p1, align 8 + %4 = load i16, i16* %3, align 2 + %conv3 = zext i16 %4 to i32 + %5 = load i32, i32* %coord, align 4 + %add4 = add i32 %5, %conv3 + store i32 %add4, i32* %coord, align 4 + %6 = load i16*, i16** %p1, align 8 + %incdec.ptr6 = getelementptr inbounds i16, i16* %6, i32 1 + store i16* %incdec.ptr6, i16** %p1, align 8 + %7 = load i16, i16* %6, align 2 + %conv7 = zext i16 %7 to i32 + %shl = shl i32 %conv7, 8 + %8 = load i32, i32* %coord, align 4 + %add8 = add i32 %8, %shl + store i32 %add8, i32* %coord, align 4 + br label %if.end + +if.end: ; preds = %if.else, %if.then + ret void +} |