summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/CodeGen/CodeGenPrepare.cpp53
1 files changed, 45 insertions, 8 deletions
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 7fc018556e1..0df2971b104 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -1760,6 +1760,18 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) {
}
}
+ // If we have a cold call site, try to sink addressing computation into the
+ // cold block. This interacts with our handling for loads and stores to
+ // ensure that we can fold all uses of a potential addressing computation
+ // into their uses. TODO: generalize this to work over profiling data
+ if (!OptSize && CI->hasFnAttr(Attribute::Cold))
+ for (auto &Arg : CI->arg_operands()) {
+ if (!Arg->getType()->isPointerTy())
+ continue;
+ unsigned AS = Arg->getType()->getPointerAddressSpace();
+ return optimizeMemoryInst(CI, Arg, Arg->getType(), AS);
+ }
+
IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
if (II) {
switch (II->getIntrinsicID()) {
@@ -3443,6 +3455,8 @@ static bool FindAllMemoryUses(
if (!MightBeFoldableInst(I))
return true;
+ const bool OptSize = I->getFunction()->optForSize();
+
// Loop over all the uses, recursively processing them.
for (Use &U : I->uses()) {
Instruction *UserI = cast<Instruction>(U.getUser());
@@ -3460,6 +3474,11 @@ static bool FindAllMemoryUses(
}
if (CallInst *CI = dyn_cast<CallInst>(UserI)) {
+ // If this is a cold call, we can sink the addressing calculation into
+ // the cold path. See optimizeCallInst
+ if (!OptSize && CI->hasFnAttr(Attribute::Cold))
+ continue;
+
InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue());
if (!IA) return true;
@@ -3551,10 +3570,10 @@ isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
if (!BaseReg && !ScaledReg)
return true;
- // If all uses of this instruction are ultimately load/store/inlineasm's,
- // check to see if their addressing modes will include this instruction. If
- // so, we can fold it into all uses, so it doesn't matter if it has multiple
- // uses.
+ // If all uses of this instruction can have the address mode sunk into them,
+ // we can remove the addressing mode and effectively trade one live register
+ // for another (at worst.) In this context, folding an addressing mode into
+ // the use is just a particularly nice way of sinking it.
SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses;
SmallPtrSet<Instruction*, 16> ConsideredInsts;
if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TM))
@@ -3562,8 +3581,13 @@ isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
// Now that we know that all uses of this instruction are part of a chain of
// computation involving only operations that could theoretically be folded
- // into a memory use, loop over each of these uses and see if they could
- // *actually* fold the instruction.
+ // into a memory use, loop over each of these memory operation uses and see
+ // if they could *actually* fold the instruction. The assumption is that
+ // addressing modes are cheap and that duplicating the computation involved
+ // many times is worthwhile, even on a fastpath. For sinking candidates
+ // (i.e. cold call sites), this serves as a way to prevent excessive code
+ // growth since most architectures have some reasonable small and fast way to
+ // compute an effective address. (i.e LEA on x86)
SmallVector<Instruction*, 32> MatchedAddrModeInsts;
for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) {
Instruction *User = MemoryUses[i].first;
@@ -3617,6 +3641,11 @@ static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
return false;
}
+/// Sink addressing mode computation immediate before MemoryInst if doing so
+/// can be done without increasing register pressure. The need for the
+/// register pressure constraint means this can end up being an all or nothing
+/// decision for all uses of the same addressing computation.
+///
/// Load and Store Instructions often have addressing modes that can do
/// significant amounts of computation. As such, instruction selection will try
/// to get the load or store to do as much computation as possible for the
@@ -3624,7 +3653,13 @@ static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
/// such, we sink as much legal addressing mode work into the block as possible.
///
/// This method is used to optimize both load/store and inline asms with memory
-/// operands.
+/// operands. It's also used to sink addressing computations feeding into cold
+/// call sites into their (cold) basic block.
+///
+/// The motivation for handling sinking into cold blocks is that doing so can
+/// both enable other address mode sinking (by satisfying the register pressure
+/// constraint above), and reduce register pressure globally (by removing the
+/// addressing mode computation from the fast path entirely.).
bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
Type *AccessTy, unsigned AddrSpace) {
Value *Repl = Addr;
@@ -3663,7 +3698,9 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
continue;
}
- // For non-PHIs, determine the addressing mode being computed.
+ // For non-PHIs, determine the addressing mode being computed. Note that
+ // the result may differ depending on what other uses our candidate
+ // addressing instructions might have.
SmallVector<Instruction*, 16> NewAddrModeInsts;
ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
V, AccessTy, AddrSpace, MemoryInst, NewAddrModeInsts, *TM,
OpenPOWER on IntegriCloud