summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Lattner <sabre@nondot.org>2005-01-17 17:55:19 +0000
committerChris Lattner <sabre@nondot.org>2005-01-17 17:55:19 +0000
commite3c2cf4854dcb2a2a1a751bccd2264b8335dceff (patch)
tree5563ed5e881738a234e615cc4ceac5f609e2f3de
parent9098879472d1af18e17ea0886f43ddfd95c73e72 (diff)
downloadbcm5719-llvm-e3c2cf4854dcb2a2a1a751bccd2264b8335dceff.tar.gz
bcm5719-llvm-e3c2cf4854dcb2a2a1a751bccd2264b8335dceff.zip
Implement a target independent optimization to codegen arguments only into
the basic block that uses them if possible. This is a big win on X86, as it lets us fold the argument loads into instructions and reduce register pressure (by not loading all of the arguments in the entry block). For this (contrived to show the optimization) testcase: int %argtest(int %A, int %B) { %X = sub int 12345, %A br label %L L: %Y = add int %X, %B ret int %Y } we used to produce: argtest: mov %ECX, DWORD PTR [%ESP + 4] mov %EAX, 12345 sub %EAX, %ECX mov %EDX, DWORD PTR [%ESP + 8] .LBBargtest_1: # L add %EAX, %EDX ret now we produce: argtest: mov %EAX, 12345 sub %EAX, DWORD PTR [%ESP + 4] .LBBargtest_1: # L add %EAX, DWORD PTR [%ESP + 8] ret This also fixes the FIXME in the code. BTW, this occurs in real code. 164.gzip shrinks from 8623 to 8608 lines of .s file. The stack frame in huft_build shrinks from 1644->1628 bytes, inflate_codes shrinks from 116->108 bytes, and inflate_block from 2620->2612, due to fewer spills. Take that alkis. :-) llvm-svn: 19639
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp81
1 files changed, 69 insertions, 12 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 64940aa6b6c..d13030f847a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -68,6 +68,14 @@ namespace llvm {
/// anywhere in the function.
std::map<const AllocaInst*, int> StaticAllocaMap;
+ /// BlockLocalArguments - If any arguments are only used in a single basic
+ /// block, and if the target can access the arguments without side-effects,
+ /// avoid emitting CopyToReg nodes for those arguments. This map keeps
+ /// track of which arguments are local to each BB.
+ std::multimap<BasicBlock*, std::pair<Argument*,
+ unsigned> > BlockLocalArguments;
+
+
unsigned MakeReg(MVT::ValueType VT) {
return RegMap->createVirtualRegister(TLI.getRegClassFor(VT));
}
@@ -806,28 +814,77 @@ CopyValueToVirtualRegister(SelectionDAGLowering &SDL, Value *V, unsigned Reg) {
return DAG.getCopyToReg(DAG.getRoot(), Op, Reg);
}
+/// IsOnlyUsedInOneBasicBlock - If the specified argument is only used in a
+/// single basic block, return that block. Otherwise, return a null pointer.
+static BasicBlock *IsOnlyUsedInOneBasicBlock(Argument *A) {
+ if (A->use_empty()) return 0;
+ BasicBlock *BB = cast<Instruction>(A->use_back())->getParent();
+ for (Argument::use_iterator UI = A->use_begin(), E = A->use_end(); UI != E;
+ ++UI)
+ if (isa<PHINode>(*UI) || cast<Instruction>(*UI)->getParent() != BB)
+ return 0; // Disagreement among the users?
+ return BB;
+}
+
void SelectionDAGISel::
LowerArguments(BasicBlock *BB, SelectionDAGLowering &SDL,
std::vector<SDOperand> &UnorderedChains) {
// If this is the entry block, emit arguments.
Function &F = *BB->getParent();
+ FunctionLoweringInfo &FuncInfo = SDL.FuncInfo;
if (BB == &F.front()) {
- // FIXME: If an argument is only used in one basic block, we could directly
- // emit it (ONLY) into that block, not emitting the COPY_TO_VREG node. This
- // would improve codegen in several cases on X86 by allowing the loads to be
- // folded into the user operation.
+ SDOperand OldRoot = SDL.DAG.getRoot();
+
std::vector<SDOperand> Args = TLI.LowerArguments(F, SDL.DAG);
- FunctionLoweringInfo &FuncInfo = SDL.FuncInfo;
+ // If there were side effects accessing the argument list, do not do
+ // anything special.
+ if (OldRoot != SDL.DAG.getRoot()) {
+ unsigned a = 0;
+ for (Function::aiterator AI = F.abegin(), E = F.aend(); AI != E; ++AI,++a)
+ if (!AI->use_empty()) {
+ SDL.setValue(AI, Args[a]);
+ SDOperand Copy =
+ CopyValueToVirtualRegister(SDL, AI, FuncInfo.ValueMap[AI]);
+ UnorderedChains.push_back(Copy);
+ }
+ } else {
+ // Otherwise, if any argument is only accessed in a single basic block,
+ // emit that argument only to that basic block.
+ unsigned a = 0;
+ for (Function::aiterator AI = F.abegin(), E = F.aend(); AI != E; ++AI,++a)
+ if (!AI->use_empty()) {
+ if (BasicBlock *BBU = IsOnlyUsedInOneBasicBlock(AI)) {
+ FuncInfo.BlockLocalArguments.insert(std::make_pair(BBU,
+ std::make_pair(AI, a)));
+ } else {
+ SDL.setValue(AI, Args[a]);
+ SDOperand Copy =
+ CopyValueToVirtualRegister(SDL, AI, FuncInfo.ValueMap[AI]);
+ UnorderedChains.push_back(Copy);
+ }
+ }
+ }
+ }
- unsigned a = 0;
- for (Function::aiterator AI = F.abegin(), E = F.aend(); AI != E; ++AI,++a)
- if (!AI->use_empty()) {
- SDL.setValue(AI, Args[a]);
- UnorderedChains.push_back(
- CopyValueToVirtualRegister(SDL, AI, FuncInfo.ValueMap[AI]));
- }
+ // See if there are any block-local arguments that need to be emitted in this
+ // block.
+
+ if (!FuncInfo.BlockLocalArguments.empty()) {
+ std::multimap<BasicBlock*, std::pair<Argument*, unsigned> >::iterator BLAI =
+ FuncInfo.BlockLocalArguments.lower_bound(BB);
+ if (BLAI != FuncInfo.BlockLocalArguments.end() && BLAI->first == BB) {
+ // Lower the arguments into this block.
+ std::vector<SDOperand> Args = TLI.LowerArguments(F, SDL.DAG);
+
+ // Set up the value mapping for the local arguments.
+ for (; BLAI != FuncInfo.BlockLocalArguments.end() && BLAI->first == BB;
+ ++BLAI)
+ SDL.setValue(BLAI->second.first, Args[BLAI->second.second]);
+
+ // Any dead arguments will just be ignored here.
+ }
}
}
OpenPOWER on IntegriCloud