diff options
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/CodeGen/CodeGenPrepare.cpp | 53 |
1 files changed, 45 insertions, 8 deletions
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 7fc018556e1..0df2971b104 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -1760,6 +1760,18 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) { } } + // If we have a cold call site, try to sink addressing computation into the + // cold block. This interacts with our handling for loads and stores to + // ensure that we can fold all uses of a potential addressing computation + // into their uses. TODO: generalize this to work over profiling data + if (!OptSize && CI->hasFnAttr(Attribute::Cold)) + for (auto &Arg : CI->arg_operands()) { + if (!Arg->getType()->isPointerTy()) + continue; + unsigned AS = Arg->getType()->getPointerAddressSpace(); + return optimizeMemoryInst(CI, Arg, Arg->getType(), AS); + } + IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI); if (II) { switch (II->getIntrinsicID()) { @@ -3443,6 +3455,8 @@ static bool FindAllMemoryUses( if (!MightBeFoldableInst(I)) return true; + const bool OptSize = I->getFunction()->optForSize(); + // Loop over all the uses, recursively processing them. for (Use &U : I->uses()) { Instruction *UserI = cast<Instruction>(U.getUser()); @@ -3460,6 +3474,11 @@ static bool FindAllMemoryUses( } if (CallInst *CI = dyn_cast<CallInst>(UserI)) { + // If this is a cold call, we can sink the addressing calculation into + // the cold path. See optimizeCallInst + if (!OptSize && CI->hasFnAttr(Attribute::Cold)) + continue; + InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue()); if (!IA) return true; @@ -3551,10 +3570,10 @@ isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, if (!BaseReg && !ScaledReg) return true; - // If all uses of this instruction are ultimately load/store/inlineasm's, - // check to see if their addressing modes will include this instruction. If - // so, we can fold it into all uses, so it doesn't matter if it has multiple - // uses. + // If all uses of this instruction can have the address mode sunk into them, + // we can remove the addressing mode and effectively trade one live register + // for another (at worst.) In this context, folding an addressing mode into + // the use is just a particularly nice way of sinking it. SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses; SmallPtrSet<Instruction*, 16> ConsideredInsts; if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TM)) @@ -3562,8 +3581,13 @@ isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, // Now that we know that all uses of this instruction are part of a chain of // computation involving only operations that could theoretically be folded - // into a memory use, loop over each of these uses and see if they could - // *actually* fold the instruction. + // into a memory use, loop over each of these memory operation uses and see + // if they could *actually* fold the instruction. The assumption is that + // addressing modes are cheap and that duplicating the computation involved + // many times is worthwhile, even on a fastpath. For sinking candidates + // (i.e. cold call sites), this serves as a way to prevent excessive code + // growth since most architectures have some reasonable small and fast way to + // compute an effective address. (i.e LEA on x86) SmallVector<Instruction*, 32> MatchedAddrModeInsts; for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) { Instruction *User = MemoryUses[i].first; @@ -3617,6 +3641,11 @@ static bool IsNonLocalValue(Value *V, BasicBlock *BB) { return false; } +/// Sink addressing mode computation immediate before MemoryInst if doing so +/// can be done without increasing register pressure. The need for the +/// register pressure constraint means this can end up being an all or nothing +/// decision for all uses of the same addressing computation. +/// /// Load and Store Instructions often have addressing modes that can do /// significant amounts of computation. As such, instruction selection will try /// to get the load or store to do as much computation as possible for the @@ -3624,7 +3653,13 @@ static bool IsNonLocalValue(Value *V, BasicBlock *BB) { /// such, we sink as much legal addressing mode work into the block as possible. /// /// This method is used to optimize both load/store and inline asms with memory -/// operands. +/// operands. It's also used to sink addressing computations feeding into cold +/// call sites into their (cold) basic block. +/// +/// The motivation for handling sinking into cold blocks is that doing so can +/// both enable other address mode sinking (by satisfying the register pressure +/// constraint above), and reduce register pressure globally (by removing the +/// addressing mode computation from the fast path entirely.). bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, Type *AccessTy, unsigned AddrSpace) { Value *Repl = Addr; @@ -3663,7 +3698,9 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, continue; } - // For non-PHIs, determine the addressing mode being computed. + // For non-PHIs, determine the addressing mode being computed. Note that + // the result may differ depending on what other uses our candidate + // addressing instructions might have. SmallVector<Instruction*, 16> NewAddrModeInsts; ExtAddrMode NewAddrMode = AddressingModeMatcher::Match( V, AccessTy, AddrSpace, MemoryInst, NewAddrModeInsts, *TM, |

