diff options
Diffstat (limited to 'llvm/lib/Transforms/IPO/ArgumentPromotion.cpp')
-rw-r--r-- | llvm/lib/Transforms/IPO/ArgumentPromotion.cpp | 1108 |
1 files changed, 547 insertions, 561 deletions
diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp index 65b7bad3b1e..61057a6cb08 100644 --- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -61,570 +61,9 @@ STATISTIC(NumAggregatesPromoted, "Number of aggregate arguments promoted"); STATISTIC(NumByValArgsPromoted , "Number of byval arguments promoted"); STATISTIC(NumArgumentsDead , "Number of dead pointer args eliminated"); -namespace { - /// ArgPromotion - The 'by reference' to 'by value' argument promotion pass. - /// - struct ArgPromotion : public CallGraphSCCPass { - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<AssumptionCacheTracker>(); - AU.addRequired<TargetLibraryInfoWrapperPass>(); - getAAResultsAnalysisUsage(AU); - CallGraphSCCPass::getAnalysisUsage(AU); - } - - bool runOnSCC(CallGraphSCC &SCC) override; - static char ID; // Pass identification, replacement for typeid - explicit ArgPromotion(unsigned maxElements = 3) - : CallGraphSCCPass(ID), maxElements(maxElements) { - initializeArgPromotionPass(*PassRegistry::getPassRegistry()); - } - - private: - - using llvm::Pass::doInitialization; - bool doInitialization(CallGraph &CG) override; - /// The maximum number of elements to expand, or 0 for unlimited. - unsigned maxElements; - }; -} - /// A vector used to hold the indices of a single GEP instruction typedef std::vector<uint64_t> IndicesVector; -static CallGraphNode * -PromoteArguments(CallGraphNode *CGN, CallGraph &CG, - function_ref<AAResults &(Function &F)> AARGetter, - unsigned MaxElements); -static bool isDenselyPacked(Type *type, const DataLayout &DL); -static bool canPaddingBeAccessed(Argument *Arg); -static bool isSafeToPromoteArgument(Argument *Arg, bool isByVal, AAResults &AAR, - unsigned MaxElements); -static CallGraphNode * -DoPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote, - SmallPtrSetImpl<Argument *> &ByValArgsToTransform, CallGraph &CG); - -char ArgPromotion::ID = 0; -INITIALIZE_PASS_BEGIN(ArgPromotion, "argpromotion", - "Promote 'by reference' arguments to scalars", false, false) -INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_PASS_END(ArgPromotion, "argpromotion", - "Promote 'by reference' arguments to scalars", false, false) - -Pass *llvm::createArgumentPromotionPass(unsigned maxElements) { - return new ArgPromotion(maxElements); -} - -static bool runImpl(CallGraphSCC &SCC, CallGraph &CG, - function_ref<AAResults &(Function &F)> AARGetter, - unsigned MaxElements) { - bool Changed = false, LocalChange; - - do { // Iterate until we stop promoting from this SCC. - LocalChange = false; - // Attempt to promote arguments from all functions in this SCC. - for (CallGraphNode *OldNode : SCC) { - if (CallGraphNode *NewNode = - PromoteArguments(OldNode, CG, AARGetter, MaxElements)) { - LocalChange = true; - SCC.ReplaceNode(OldNode, NewNode); - } - } - Changed |= LocalChange; // Remember that we changed something. - } while (LocalChange); - - return Changed; -} - -bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) { - if (skipSCC(SCC)) - return false; - - // Get the callgraph information that we need to update to reflect our - // changes. - CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); - - // We compute dedicated AA results for each function in the SCC as needed. We - // use a lambda referencing external objects so that they live long enough to - // be queried, but we re-use them each time. - Optional<BasicAAResult> BAR; - Optional<AAResults> AAR; - auto AARGetter = [&](Function &F) -> AAResults & { - BAR.emplace(createLegacyPMBasicAAResult(*this, F)); - AAR.emplace(createLegacyPMAAResults(*this, F, *BAR)); - return *AAR; - }; - - return runImpl(SCC, CG, AARGetter, maxElements); -} - -/// \brief Checks if a type could have padding bytes. -static bool isDenselyPacked(Type *type, const DataLayout &DL) { - - // There is no size information, so be conservative. - if (!type->isSized()) - return false; - - // If the alloc size is not equal to the storage size, then there are padding - // bytes. For x86_fp80 on x86-64, size: 80 alloc size: 128. - if (DL.getTypeSizeInBits(type) != DL.getTypeAllocSizeInBits(type)) - return false; - - if (!isa<CompositeType>(type)) - return true; - - // For homogenous sequential types, check for padding within members. - if (SequentialType *seqTy = dyn_cast<SequentialType>(type)) - return isDenselyPacked(seqTy->getElementType(), DL); - - // Check for padding within and between elements of a struct. - StructType *StructTy = cast<StructType>(type); - const StructLayout *Layout = DL.getStructLayout(StructTy); - uint64_t StartPos = 0; - for (unsigned i = 0, E = StructTy->getNumElements(); i < E; ++i) { - Type *ElTy = StructTy->getElementType(i); - if (!isDenselyPacked(ElTy, DL)) - return false; - if (StartPos != Layout->getElementOffsetInBits(i)) - return false; - StartPos += DL.getTypeAllocSizeInBits(ElTy); - } - - return true; -} - -/// \brief Checks if the padding bytes of an argument could be accessed. -static bool canPaddingBeAccessed(Argument *arg) { - - assert(arg->hasByValAttr()); - - // Track all the pointers to the argument to make sure they are not captured. - SmallPtrSet<Value *, 16> PtrValues; - PtrValues.insert(arg); - - // Track all of the stores. - SmallVector<StoreInst *, 16> Stores; - - // Scan through the uses recursively to make sure the pointer is always used - // sanely. - SmallVector<Value *, 16> WorkList; - WorkList.insert(WorkList.end(), arg->user_begin(), arg->user_end()); - while (!WorkList.empty()) { - Value *V = WorkList.back(); - WorkList.pop_back(); - if (isa<GetElementPtrInst>(V) || isa<PHINode>(V)) { - if (PtrValues.insert(V).second) - WorkList.insert(WorkList.end(), V->user_begin(), V->user_end()); - } else if (StoreInst *Store = dyn_cast<StoreInst>(V)) { - Stores.push_back(Store); - } else if (!isa<LoadInst>(V)) { - return true; - } - } - -// Check to make sure the pointers aren't captured - for (StoreInst *Store : Stores) - if (PtrValues.count(Store->getValueOperand())) - return true; - - return false; -} - -/// PromoteArguments - This method checks the specified function to see if there -/// are any promotable arguments and if it is safe to promote the function (for -/// example, all callers are direct). If safe to promote some arguments, it -/// calls the DoPromotion method. -/// -static CallGraphNode * -PromoteArguments(CallGraphNode *CGN, CallGraph &CG, - function_ref<AAResults &(Function &F)> AARGetter, - unsigned MaxElements) { - Function *F = CGN->getFunction(); - - // Make sure that it is local to this module. - if (!F || !F->hasLocalLinkage()) return nullptr; - - // Don't promote arguments for variadic functions. Adding, removing, or - // changing non-pack parameters can change the classification of pack - // parameters. Frontends encode that classification at the call site in the - // IR, while in the callee the classification is determined dynamically based - // on the number of registers consumed so far. - if (F->isVarArg()) return nullptr; - - // First check: see if there are any pointer arguments! If not, quick exit. - SmallVector<Argument*, 16> PointerArgs; - for (Argument &I : F->args()) - if (I.getType()->isPointerTy()) - PointerArgs.push_back(&I); - if (PointerArgs.empty()) return nullptr; - - // Second check: make sure that all callers are direct callers. We can't - // transform functions that have indirect callers. Also see if the function - // is self-recursive. - bool isSelfRecursive = false; - for (Use &U : F->uses()) { - CallSite CS(U.getUser()); - // Must be a direct call. - if (CS.getInstruction() == nullptr || !CS.isCallee(&U)) return nullptr; - - if (CS.getInstruction()->getParent()->getParent() == F) - isSelfRecursive = true; - } - - const DataLayout &DL = F->getParent()->getDataLayout(); - - AAResults &AAR = AARGetter(*F); - - // Check to see which arguments are promotable. If an argument is promotable, - // add it to ArgsToPromote. - SmallPtrSet<Argument*, 8> ArgsToPromote; - SmallPtrSet<Argument*, 8> ByValArgsToTransform; - for (Argument *PtrArg : PointerArgs) { - Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType(); - - // Replace sret attribute with noalias. This reduces register pressure by - // avoiding a register copy. - if (PtrArg->hasStructRetAttr()) { - unsigned ArgNo = PtrArg->getArgNo(); - F->setAttributes( - F->getAttributes() - .removeAttribute(F->getContext(), ArgNo + 1, Attribute::StructRet) - .addAttribute(F->getContext(), ArgNo + 1, Attribute::NoAlias)); - for (Use &U : F->uses()) { - CallSite CS(U.getUser()); - CS.setAttributes( - CS.getAttributes() - .removeAttribute(F->getContext(), ArgNo + 1, - Attribute::StructRet) - .addAttribute(F->getContext(), ArgNo + 1, Attribute::NoAlias)); - } - } - - // If this is a byval argument, and if the aggregate type is small, just - // pass the elements, which is always safe, if the passed value is densely - // packed or if we can prove the padding bytes are never accessed. This does - // not apply to inalloca. - bool isSafeToPromote = - PtrArg->hasByValAttr() && - (isDenselyPacked(AgTy, DL) || !canPaddingBeAccessed(PtrArg)); - if (isSafeToPromote) { - if (StructType *STy = dyn_cast<StructType>(AgTy)) { - if (MaxElements > 0 && STy->getNumElements() > MaxElements) { - DEBUG(dbgs() << "argpromotion disable promoting argument '" - << PtrArg->getName() << "' because it would require adding more" - << " than " << MaxElements << " arguments to the function.\n"); - continue; - } - - // If all the elements are single-value types, we can promote it. - bool AllSimple = true; - for (const auto *EltTy : STy->elements()) { - if (!EltTy->isSingleValueType()) { - AllSimple = false; - break; - } - } - - // Safe to transform, don't even bother trying to "promote" it. - // Passing the elements as a scalar will allow sroa to hack on - // the new alloca we introduce. - if (AllSimple) { - ByValArgsToTransform.insert(PtrArg); - continue; - } - } - } - - // If the argument is a recursive type and we're in a recursive - // function, we could end up infinitely peeling the function argument. - if (isSelfRecursive) { - if (StructType *STy = dyn_cast<StructType>(AgTy)) { - bool RecursiveType = false; - for (const auto *EltTy : STy->elements()) { - if (EltTy == PtrArg->getType()) { - RecursiveType = true; - break; - } - } - if (RecursiveType) - continue; - } - } - - // Otherwise, see if we can promote the pointer to its value. - if (isSafeToPromoteArgument(PtrArg, PtrArg->hasByValOrInAllocaAttr(), AAR, - MaxElements)) - ArgsToPromote.insert(PtrArg); - } - - // No promotable pointer arguments. - if (ArgsToPromote.empty() && ByValArgsToTransform.empty()) - return nullptr; - - return DoPromotion(F, ArgsToPromote, ByValArgsToTransform, CG); -} - -/// AllCallersPassInValidPointerForArgument - Return true if we can prove that -/// all callees pass in a valid pointer for the specified function argument. -static bool AllCallersPassInValidPointerForArgument(Argument *Arg) { - Function *Callee = Arg->getParent(); - const DataLayout &DL = Callee->getParent()->getDataLayout(); - - unsigned ArgNo = Arg->getArgNo(); - - // Look at all call sites of the function. At this point we know we only have - // direct callees. - for (User *U : Callee->users()) { - CallSite CS(U); - assert(CS && "Should only have direct calls!"); - - if (!isDereferenceablePointer(CS.getArgument(ArgNo), DL)) - return false; - } - return true; -} - -/// Returns true if Prefix is a prefix of longer. That means, Longer has a size -/// that is greater than or equal to the size of prefix, and each of the -/// elements in Prefix is the same as the corresponding elements in Longer. -/// -/// This means it also returns true when Prefix and Longer are equal! -static bool IsPrefix(const IndicesVector &Prefix, const IndicesVector &Longer) { - if (Prefix.size() > Longer.size()) - return false; - return std::equal(Prefix.begin(), Prefix.end(), Longer.begin()); -} - - -/// Checks if Indices, or a prefix of Indices, is in Set. -static bool PrefixIn(const IndicesVector &Indices, - std::set<IndicesVector> &Set) { - std::set<IndicesVector>::iterator Low; - Low = Set.upper_bound(Indices); - if (Low != Set.begin()) - Low--; - // Low is now the last element smaller than or equal to Indices. This means - // it points to a prefix of Indices (possibly Indices itself), if such - // prefix exists. - // - // This load is safe if any prefix of its operands is safe to load. - return Low != Set.end() && IsPrefix(*Low, Indices); -} - -/// Mark the given indices (ToMark) as safe in the given set of indices -/// (Safe). Marking safe usually means adding ToMark to Safe. However, if there -/// is already a prefix of Indices in Safe, Indices are implicitely marked safe -/// already. Furthermore, any indices that Indices is itself a prefix of, are -/// removed from Safe (since they are implicitely safe because of Indices now). -static void MarkIndicesSafe(const IndicesVector &ToMark, - std::set<IndicesVector> &Safe) { - std::set<IndicesVector>::iterator Low; - Low = Safe.upper_bound(ToMark); - // Guard against the case where Safe is empty - if (Low != Safe.begin()) - Low--; - // Low is now the last element smaller than or equal to Indices. This - // means it points to a prefix of Indices (possibly Indices itself), if - // such prefix exists. - if (Low != Safe.end()) { - if (IsPrefix(*Low, ToMark)) - // If there is already a prefix of these indices (or exactly these - // indices) marked a safe, don't bother adding these indices - return; - - // Increment Low, so we can use it as a "insert before" hint - ++Low; - } - // Insert - Low = Safe.insert(Low, ToMark); - ++Low; - // If there we're a prefix of longer index list(s), remove those - std::set<IndicesVector>::iterator End = Safe.end(); - while (Low != End && IsPrefix(ToMark, *Low)) { - std::set<IndicesVector>::iterator Remove = Low; - ++Low; - Safe.erase(Remove); - } -} - -/// isSafeToPromoteArgument - As you might guess from the name of this method, -/// it checks to see if it is both safe and useful to promote the argument. -/// This method limits promotion of aggregates to only promote up to three -/// elements of the aggregate in order to avoid exploding the number of -/// arguments passed in. -static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca, - AAResults &AAR, unsigned MaxElements) { - typedef std::set<IndicesVector> GEPIndicesSet; - - // Quick exit for unused arguments - if (Arg->use_empty()) - return true; - - // We can only promote this argument if all of the uses are loads, or are GEP - // instructions (with constant indices) that are subsequently loaded. - // - // Promoting the argument causes it to be loaded in the caller - // unconditionally. This is only safe if we can prove that either the load - // would have happened in the callee anyway (ie, there is a load in the entry - // block) or the pointer passed in at every call site is guaranteed to be - // valid. - // In the former case, invalid loads can happen, but would have happened - // anyway, in the latter case, invalid loads won't happen. This prevents us - // from introducing an invalid load that wouldn't have happened in the - // original code. - // - // This set will contain all sets of indices that are loaded in the entry - // block, and thus are safe to unconditionally load in the caller. - // - // This optimization is also safe for InAlloca parameters, because it verifies - // that the address isn't captured. - GEPIndicesSet SafeToUnconditionallyLoad; - - // This set contains all the sets of indices that we are planning to promote. - // This makes it possible to limit the number of arguments added. - GEPIndicesSet ToPromote; - - // If the pointer is always valid, any load with first index 0 is valid. - if (isByValOrInAlloca || AllCallersPassInValidPointerForArgument(Arg)) - SafeToUnconditionallyLoad.insert(IndicesVector(1, 0)); - - // First, iterate the entry block and mark loads of (geps of) arguments as - // safe. - BasicBlock &EntryBlock = Arg->getParent()->front(); - // Declare this here so we can reuse it - IndicesVector Indices; - for (Instruction &I : EntryBlock) - if (LoadInst *LI = dyn_cast<LoadInst>(&I)) { - Value *V = LI->getPointerOperand(); - if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V)) { - V = GEP->getPointerOperand(); - if (V == Arg) { - // This load actually loads (part of) Arg? Check the indices then. - Indices.reserve(GEP->getNumIndices()); - for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end(); - II != IE; ++II) - if (ConstantInt *CI = dyn_cast<ConstantInt>(*II)) - Indices.push_back(CI->getSExtValue()); - else - // We found a non-constant GEP index for this argument? Bail out - // right away, can't promote this argument at all. - return false; - - // Indices checked out, mark them as safe - MarkIndicesSafe(Indices, SafeToUnconditionallyLoad); - Indices.clear(); - } - } else if (V == Arg) { - // Direct loads are equivalent to a GEP with a single 0 index. - MarkIndicesSafe(IndicesVector(1, 0), SafeToUnconditionallyLoad); - } - } - - // Now, iterate all uses of the argument to see if there are any uses that are - // not (GEP+)loads, or any (GEP+)loads that are not safe to promote. - SmallVector<LoadInst*, 16> Loads; - IndicesVector Operands; - for (Use &U : Arg->uses()) { - User *UR = U.getUser(); - Operands.clear(); - if (LoadInst *LI = dyn_cast<LoadInst>(UR)) { - // Don't hack volatile/atomic loads - if (!LI->isSimple()) return false; - Loads.push_back(LI); - // Direct loads are equivalent to a GEP with a zero index and then a load. - Operands.push_back(0); - } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(UR)) { - if (GEP->use_empty()) { - // Dead GEP's cause trouble later. Just remove them if we run into - // them. - GEP->eraseFromParent(); - // TODO: This runs the above loop over and over again for dead GEPs - // Couldn't we just do increment the UI iterator earlier and erase the - // use? - return isSafeToPromoteArgument(Arg, isByValOrInAlloca, AAR, - MaxElements); - } - - // Ensure that all of the indices are constants. - for (User::op_iterator i = GEP->idx_begin(), e = GEP->idx_end(); - i != e; ++i) - if (ConstantInt *C = dyn_cast<ConstantInt>(*i)) - Operands.push_back(C->getSExtValue()); - else - return false; // Not a constant operand GEP! - - // Ensure that the only users of the GEP are load instructions. - for (User *GEPU : GEP->users()) - if (LoadInst *LI = dyn_cast<LoadInst>(GEPU)) { - // Don't hack volatile/atomic loads - if (!LI->isSimple()) return false; - Loads.push_back(LI); - } else { - // Other uses than load? - return false; - } - } else { - return false; // Not a load or a GEP. - } - - // Now, see if it is safe to promote this load / loads of this GEP. Loading - // is safe if Operands, or a prefix of Operands, is marked as safe. - if (!PrefixIn(Operands, SafeToUnconditionallyLoad)) - return false; - - // See if we are already promoting a load with these indices. If not, check - // to make sure that we aren't promoting too many elements. If so, nothing - // to do. - if (ToPromote.find(Operands) == ToPromote.end()) { - if (MaxElements > 0 && ToPromote.size() == MaxElements) { - DEBUG(dbgs() << "argpromotion not promoting argument '" - << Arg->getName() << "' because it would require adding more " - << "than " << MaxElements << " arguments to the function.\n"); - // We limit aggregate promotion to only promoting up to a fixed number - // of elements of the aggregate. - return false; - } - ToPromote.insert(std::move(Operands)); - } - } - - if (Loads.empty()) return true; // No users, this is a dead argument. - - // Okay, now we know that the argument is only used by load instructions and - // it is safe to unconditionally perform all of them. Use alias analysis to - // check to see if the pointer is guaranteed to not be modified from entry of - // the function to each of the load instructions. - - // Because there could be several/many load instructions, remember which - // blocks we know to be transparent to the load. - df_iterator_default_set<BasicBlock*, 16> TranspBlocks; - - for (LoadInst *Load : Loads) { - // Check to see if the load is invalidated from the start of the block to - // the load itself. - BasicBlock *BB = Load->getParent(); - - MemoryLocation Loc = MemoryLocation::get(Load); - if (AAR.canInstructionRangeModRef(BB->front(), *Load, Loc, MRI_Mod)) - return false; // Pointer is invalidated! - - // Now check every path from the entry block to the load for transparency. - // To do this, we perform a depth first search on the inverse CFG from the - // loading block. - for (BasicBlock *P : predecessors(BB)) { - for (BasicBlock *TranspBB : inverse_depth_first_ext(P, TranspBlocks)) - if (AAR.canBasicBlockModify(*TranspBB, Loc)) - return false; - } - } - - // If the path from the entry of the function to each load is free of - // instructions that potentially invalidate the load, we can make the - // transformation! - return true; -} - /// DoPromotion - This method actually performs the promotion of the specified /// arguments, and returns the new function. At this point, we know that it's /// safe to do so. @@ -1037,6 +476,553 @@ DoPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote, return NF_CGN; } + +/// AllCallersPassInValidPointerForArgument - Return true if we can prove that +/// all callees pass in a valid pointer for the specified function argument. +static bool AllCallersPassInValidPointerForArgument(Argument *Arg) { + Function *Callee = Arg->getParent(); + const DataLayout &DL = Callee->getParent()->getDataLayout(); + + unsigned ArgNo = Arg->getArgNo(); + + // Look at all call sites of the function. At this point we know we only have + // direct callees. + for (User *U : Callee->users()) { + CallSite CS(U); + assert(CS && "Should only have direct calls!"); + + if (!isDereferenceablePointer(CS.getArgument(ArgNo), DL)) + return false; + } + return true; +} + +/// Returns true if Prefix is a prefix of longer. That means, Longer has a size +/// that is greater than or equal to the size of prefix, and each of the +/// elements in Prefix is the same as the corresponding elements in Longer. +/// +/// This means it also returns true when Prefix and Longer are equal! +static bool IsPrefix(const IndicesVector &Prefix, const IndicesVector &Longer) { + if (Prefix.size() > Longer.size()) + return false; + return std::equal(Prefix.begin(), Prefix.end(), Longer.begin()); +} + + +/// Checks if Indices, or a prefix of Indices, is in Set. +static bool PrefixIn(const IndicesVector &Indices, + std::set<IndicesVector> &Set) { + std::set<IndicesVector>::iterator Low; + Low = Set.upper_bound(Indices); + if (Low != Set.begin()) + Low--; + // Low is now the last element smaller than or equal to Indices. This means + // it points to a prefix of Indices (possibly Indices itself), if such + // prefix exists. + // + // This load is safe if any prefix of its operands is safe to load. + return Low != Set.end() && IsPrefix(*Low, Indices); +} + +/// Mark the given indices (ToMark) as safe in the given set of indices +/// (Safe). Marking safe usually means adding ToMark to Safe. However, if there +/// is already a prefix of Indices in Safe, Indices are implicitely marked safe +/// already. Furthermore, any indices that Indices is itself a prefix of, are +/// removed from Safe (since they are implicitely safe because of Indices now). +static void MarkIndicesSafe(const IndicesVector &ToMark, + std::set<IndicesVector> &Safe) { + std::set<IndicesVector>::iterator Low; + Low = Safe.upper_bound(ToMark); + // Guard against the case where Safe is empty + if (Low != Safe.begin()) + Low--; + // Low is now the last element smaller than or equal to Indices. This + // means it points to a prefix of Indices (possibly Indices itself), if + // such prefix exists. + if (Low != Safe.end()) { + if (IsPrefix(*Low, ToMark)) + // If there is already a prefix of these indices (or exactly these + // indices) marked a safe, don't bother adding these indices + return; + + // Increment Low, so we can use it as a "insert before" hint + ++Low; + } + // Insert + Low = Safe.insert(Low, ToMark); + ++Low; + // If there we're a prefix of longer index list(s), remove those + std::set<IndicesVector>::iterator End = Safe.end(); + while (Low != End && IsPrefix(ToMark, *Low)) { + std::set<IndicesVector>::iterator Remove = Low; + ++Low; + Safe.erase(Remove); + } +} + +/// isSafeToPromoteArgument - As you might guess from the name of this method, +/// it checks to see if it is both safe and useful to promote the argument. +/// This method limits promotion of aggregates to only promote up to three +/// elements of the aggregate in order to avoid exploding the number of +/// arguments passed in. +static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca, + AAResults &AAR, unsigned MaxElements) { + typedef std::set<IndicesVector> GEPIndicesSet; + + // Quick exit for unused arguments + if (Arg->use_empty()) + return true; + + // We can only promote this argument if all of the uses are loads, or are GEP + // instructions (with constant indices) that are subsequently loaded. + // + // Promoting the argument causes it to be loaded in the caller + // unconditionally. This is only safe if we can prove that either the load + // would have happened in the callee anyway (ie, there is a load in the entry + // block) or the pointer passed in at every call site is guaranteed to be + // valid. + // In the former case, invalid loads can happen, but would have happened + // anyway, in the latter case, invalid loads won't happen. This prevents us + // from introducing an invalid load that wouldn't have happened in the + // original code. + // + // This set will contain all sets of indices that are loaded in the entry + // block, and thus are safe to unconditionally load in the caller. + // + // This optimization is also safe for InAlloca parameters, because it verifies + // that the address isn't captured. + GEPIndicesSet SafeToUnconditionallyLoad; + + // This set contains all the sets of indices that we are planning to promote. + // This makes it possible to limit the number of arguments added. + GEPIndicesSet ToPromote; + + // If the pointer is always valid, any load with first index 0 is valid. + if (isByValOrInAlloca || AllCallersPassInValidPointerForArgument(Arg)) + SafeToUnconditionallyLoad.insert(IndicesVector(1, 0)); + + // First, iterate the entry block and mark loads of (geps of) arguments as + // safe. + BasicBlock &EntryBlock = Arg->getParent()->front(); + // Declare this here so we can reuse it + IndicesVector Indices; + for (Instruction &I : EntryBlock) + if (LoadInst *LI = dyn_cast<LoadInst>(&I)) { + Value *V = LI->getPointerOperand(); + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V)) { + V = GEP->getPointerOperand(); + if (V == Arg) { + // This load actually loads (part of) Arg? Check the indices then. + Indices.reserve(GEP->getNumIndices()); + for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end(); + II != IE; ++II) + if (ConstantInt *CI = dyn_cast<ConstantInt>(*II)) + Indices.push_back(CI->getSExtValue()); + else + // We found a non-constant GEP index for this argument? Bail out + // right away, can't promote this argument at all. + return false; + + // Indices checked out, mark them as safe + MarkIndicesSafe(Indices, SafeToUnconditionallyLoad); + Indices.clear(); + } + } else if (V == Arg) { + // Direct loads are equivalent to a GEP with a single 0 index. + MarkIndicesSafe(IndicesVector(1, 0), SafeToUnconditionallyLoad); + } + } + + // Now, iterate all uses of the argument to see if there are any uses that are + // not (GEP+)loads, or any (GEP+)loads that are not safe to promote. + SmallVector<LoadInst*, 16> Loads; + IndicesVector Operands; + for (Use &U : Arg->uses()) { + User *UR = U.getUser(); + Operands.clear(); + if (LoadInst *LI = dyn_cast<LoadInst>(UR)) { + // Don't hack volatile/atomic loads + if (!LI->isSimple()) return false; + Loads.push_back(LI); + // Direct loads are equivalent to a GEP with a zero index and then a load. + Operands.push_back(0); + } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(UR)) { + if (GEP->use_empty()) { + // Dead GEP's cause trouble later. Just remove them if we run into + // them. + GEP->eraseFromParent(); + // TODO: This runs the above loop over and over again for dead GEPs + // Couldn't we just do increment the UI iterator earlier and erase the + // use? + return isSafeToPromoteArgument(Arg, isByValOrInAlloca, AAR, + MaxElements); + } + + // Ensure that all of the indices are constants. + for (User::op_iterator i = GEP->idx_begin(), e = GEP->idx_end(); + i != e; ++i) + if (ConstantInt *C = dyn_cast<ConstantInt>(*i)) + Operands.push_back(C->getSExtValue()); + else + return false; // Not a constant operand GEP! + + // Ensure that the only users of the GEP are load instructions. + for (User *GEPU : GEP->users()) + if (LoadInst *LI = dyn_cast<LoadInst>(GEPU)) { + // Don't hack volatile/atomic loads + if (!LI->isSimple()) return false; + Loads.push_back(LI); + } else { + // Other uses than load? + return false; + } + } else { + return false; // Not a load or a GEP. + } + + // Now, see if it is safe to promote this load / loads of this GEP. Loading + // is safe if Operands, or a prefix of Operands, is marked as safe. + if (!PrefixIn(Operands, SafeToUnconditionallyLoad)) + return false; + + // See if we are already promoting a load with these indices. If not, check + // to make sure that we aren't promoting too many elements. If so, nothing + // to do. + if (ToPromote.find(Operands) == ToPromote.end()) { + if (MaxElements > 0 && ToPromote.size() == MaxElements) { + DEBUG(dbgs() << "argpromotion not promoting argument '" + << Arg->getName() << "' because it would require adding more " + << "than " << MaxElements << " arguments to the function.\n"); + // We limit aggregate promotion to only promoting up to a fixed number + // of elements of the aggregate. + return false; + } + ToPromote.insert(std::move(Operands)); + } + } + + if (Loads.empty()) return true; // No users, this is a dead argument. + + // Okay, now we know that the argument is only used by load instructions and + // it is safe to unconditionally perform all of them. Use alias analysis to + // check to see if the pointer is guaranteed to not be modified from entry of + // the function to each of the load instructions. + + // Because there could be several/many load instructions, remember which + // blocks we know to be transparent to the load. + df_iterator_default_set<BasicBlock*, 16> TranspBlocks; + + for (LoadInst *Load : Loads) { + // Check to see if the load is invalidated from the start of the block to + // the load itself. + BasicBlock *BB = Load->getParent(); + + MemoryLocation Loc = MemoryLocation::get(Load); + if (AAR.canInstructionRangeModRef(BB->front(), *Load, Loc, MRI_Mod)) + return false; // Pointer is invalidated! + + // Now check every path from the entry block to the load for transparency. + // To do this, we perform a depth first search on the inverse CFG from the + // loading block. + for (BasicBlock *P : predecessors(BB)) { + for (BasicBlock *TranspBB : inverse_depth_first_ext(P, TranspBlocks)) + if (AAR.canBasicBlockModify(*TranspBB, Loc)) + return false; + } + } + + // If the path from the entry of the function to each load is free of + // instructions that potentially invalidate the load, we can make the + // transformation! + return true; +} + + +/// \brief Checks if a type could have padding bytes. +static bool isDenselyPacked(Type *type, const DataLayout &DL) { + + // There is no size information, so be conservative. + if (!type->isSized()) + return false; + + // If the alloc size is not equal to the storage size, then there are padding + // bytes. For x86_fp80 on x86-64, size: 80 alloc size: 128. + if (DL.getTypeSizeInBits(type) != DL.getTypeAllocSizeInBits(type)) + return false; + + if (!isa<CompositeType>(type)) + return true; + + // For homogenous sequential types, check for padding within members. + if (SequentialType *seqTy = dyn_cast<SequentialType>(type)) + return isDenselyPacked(seqTy->getElementType(), DL); + + // Check for padding within and between elements of a struct. + StructType *StructTy = cast<StructType>(type); + const StructLayout *Layout = DL.getStructLayout(StructTy); + uint64_t StartPos = 0; + for (unsigned i = 0, E = StructTy->getNumElements(); i < E; ++i) { + Type *ElTy = StructTy->getElementType(i); + if (!isDenselyPacked(ElTy, DL)) + return false; + if (StartPos != Layout->getElementOffsetInBits(i)) + return false; + StartPos += DL.getTypeAllocSizeInBits(ElTy); + } + + return true; +} + +/// \brief Checks if the padding bytes of an argument could be accessed. +static bool canPaddingBeAccessed(Argument *arg) { + + assert(arg->hasByValAttr()); + + // Track all the pointers to the argument to make sure they are not captured. + SmallPtrSet<Value *, 16> PtrValues; + PtrValues.insert(arg); + + // Track all of the stores. + SmallVector<StoreInst *, 16> Stores; + + // Scan through the uses recursively to make sure the pointer is always used + // sanely. + SmallVector<Value *, 16> WorkList; + WorkList.insert(WorkList.end(), arg->user_begin(), arg->user_end()); + while (!WorkList.empty()) { + Value *V = WorkList.back(); + WorkList.pop_back(); + if (isa<GetElementPtrInst>(V) || isa<PHINode>(V)) { + if (PtrValues.insert(V).second) + WorkList.insert(WorkList.end(), V->user_begin(), V->user_end()); + } else if (StoreInst *Store = dyn_cast<StoreInst>(V)) { + Stores.push_back(Store); + } else if (!isa<LoadInst>(V)) { + return true; + } + } + +// Check to make sure the pointers aren't captured + for (StoreInst *Store : Stores) + if (PtrValues.count(Store->getValueOperand())) + return true; + + return false; +} + +/// PromoteArguments - This method checks the specified function to see if there +/// are any promotable arguments and if it is safe to promote the function (for +/// example, all callers are direct). If safe to promote some arguments, it +/// calls the DoPromotion method. +/// +static CallGraphNode * +PromoteArguments(CallGraphNode *CGN, CallGraph &CG, + function_ref<AAResults &(Function &F)> AARGetter, + unsigned MaxElements) { + Function *F = CGN->getFunction(); + + // Make sure that it is local to this module. + if (!F || !F->hasLocalLinkage()) return nullptr; + + // Don't promote arguments for variadic functions. Adding, removing, or + // changing non-pack parameters can change the classification of pack + // parameters. Frontends encode that classification at the call site in the + // IR, while in the callee the classification is determined dynamically based + // on the number of registers consumed so far. + if (F->isVarArg()) return nullptr; + + // First check: see if there are any pointer arguments! If not, quick exit. + SmallVector<Argument*, 16> PointerArgs; + for (Argument &I : F->args()) + if (I.getType()->isPointerTy()) + PointerArgs.push_back(&I); + if (PointerArgs.empty()) return nullptr; + + // Second check: make sure that all callers are direct callers. We can't + // transform functions that have indirect callers. Also see if the function + // is self-recursive. + bool isSelfRecursive = false; + for (Use &U : F->uses()) { + CallSite CS(U.getUser()); + // Must be a direct call. + if (CS.getInstruction() == nullptr || !CS.isCallee(&U)) return nullptr; + + if (CS.getInstruction()->getParent()->getParent() == F) + isSelfRecursive = true; + } + + const DataLayout &DL = F->getParent()->getDataLayout(); + + AAResults &AAR = AARGetter(*F); + + // Check to see which arguments are promotable. If an argument is promotable, + // add it to ArgsToPromote. + SmallPtrSet<Argument*, 8> ArgsToPromote; + SmallPtrSet<Argument*, 8> ByValArgsToTransform; + for (Argument *PtrArg : PointerArgs) { + Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType(); + + // Replace sret attribute with noalias. This reduces register pressure by + // avoiding a register copy. + if (PtrArg->hasStructRetAttr()) { + unsigned ArgNo = PtrArg->getArgNo(); + F->setAttributes( + F->getAttributes() + .removeAttribute(F->getContext(), ArgNo + 1, Attribute::StructRet) + .addAttribute(F->getContext(), ArgNo + 1, Attribute::NoAlias)); + for (Use &U : F->uses()) { + CallSite CS(U.getUser()); + CS.setAttributes( + CS.getAttributes() + .removeAttribute(F->getContext(), ArgNo + 1, + Attribute::StructRet) + .addAttribute(F->getContext(), ArgNo + 1, Attribute::NoAlias)); + } + } + + // If this is a byval argument, and if the aggregate type is small, just + // pass the elements, which is always safe, if the passed value is densely + // packed or if we can prove the padding bytes are never accessed. This does + // not apply to inalloca. + bool isSafeToPromote = + PtrArg->hasByValAttr() && + (isDenselyPacked(AgTy, DL) || !canPaddingBeAccessed(PtrArg)); + if (isSafeToPromote) { + if (StructType *STy = dyn_cast<StructType>(AgTy)) { + if (MaxElements > 0 && STy->getNumElements() > MaxElements) { + DEBUG(dbgs() << "argpromotion disable promoting argument '" + << PtrArg->getName() << "' because it would require adding more" + << " than " << MaxElements << " arguments to the function.\n"); + continue; + } + + // If all the elements are single-value types, we can promote it. + bool AllSimple = true; + for (const auto *EltTy : STy->elements()) { + if (!EltTy->isSingleValueType()) { + AllSimple = false; + break; + } + } + + // Safe to transform, don't even bother trying to "promote" it. + // Passing the elements as a scalar will allow sroa to hack on + // the new alloca we introduce. + if (AllSimple) { + ByValArgsToTransform.insert(PtrArg); + continue; + } + } + } + + // If the argument is a recursive type and we're in a recursive + // function, we could end up infinitely peeling the function argument. + if (isSelfRecursive) { + if (StructType *STy = dyn_cast<StructType>(AgTy)) { + bool RecursiveType = false; + for (const auto *EltTy : STy->elements()) { + if (EltTy == PtrArg->getType()) { + RecursiveType = true; + break; + } + } + if (RecursiveType) + continue; + } + } + + // Otherwise, see if we can promote the pointer to its value. + if (isSafeToPromoteArgument(PtrArg, PtrArg->hasByValOrInAllocaAttr(), AAR, + MaxElements)) + ArgsToPromote.insert(PtrArg); + } + + // No promotable pointer arguments. + if (ArgsToPromote.empty() && ByValArgsToTransform.empty()) + return nullptr; + + return DoPromotion(F, ArgsToPromote, ByValArgsToTransform, CG); +} + +namespace { + /// ArgPromotion - The 'by reference' to 'by value' argument promotion pass. + /// + struct ArgPromotion : public CallGraphSCCPass { + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<AssumptionCacheTracker>(); + AU.addRequired<TargetLibraryInfoWrapperPass>(); + getAAResultsAnalysisUsage(AU); + CallGraphSCCPass::getAnalysisUsage(AU); + } + + bool runOnSCC(CallGraphSCC &SCC) override; + static char ID; // Pass identification, replacement for typeid + explicit ArgPromotion(unsigned MaxElements = 3) + : CallGraphSCCPass(ID), MaxElements(MaxElements) { + initializeArgPromotionPass(*PassRegistry::getPassRegistry()); + } + + private: + + using llvm::Pass::doInitialization; + bool doInitialization(CallGraph &CG) override; + /// The maximum number of elements to expand, or 0 for unlimited. + unsigned MaxElements; + }; +} + +char ArgPromotion::ID = 0; +INITIALIZE_PASS_BEGIN(ArgPromotion, "argpromotion", + "Promote 'by reference' arguments to scalars", false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_END(ArgPromotion, "argpromotion", + "Promote 'by reference' arguments to scalars", false, false) + +Pass *llvm::createArgumentPromotionPass(unsigned MaxElements) { + return new ArgPromotion(MaxElements); +} + +bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) { + if (skipSCC(SCC)) + return false; + + // Get the callgraph information that we need to update to reflect our + // changes. + CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); + + // We compute dedicated AA results for each function in the SCC as needed. We + // use a lambda referencing external objects so that they live long enough to + // be queried, but we re-use them each time. + Optional<BasicAAResult> BAR; + Optional<AAResults> AAR; + auto AARGetter = [&](Function &F) -> AAResults & { + BAR.emplace(createLegacyPMBasicAAResult(*this, F)); + AAR.emplace(createLegacyPMAAResults(*this, F, *BAR)); + return *AAR; + }; + + bool Changed = false, LocalChange; + + // Iterate until we stop promoting from this SCC. + do { + LocalChange = false; + // Attempt to promote arguments from all functions in this SCC. + for (CallGraphNode *OldNode : SCC) { + if (CallGraphNode *NewNode = + PromoteArguments(OldNode, CG, AARGetter, MaxElements)) { + LocalChange = true; + SCC.ReplaceNode(OldNode, NewNode); + } + } + // Remember that we changed something. + Changed |= LocalChange; + } while (LocalChange); + + return Changed; +} + bool ArgPromotion::doInitialization(CallGraph &CG) { return CallGraphSCCPass::doInitialization(CG); } |