summaryrefslogtreecommitdiffstats
path: root/clang/lib/CodeGen/CGCall.cpp
diff options
context:
space:
mode:
authorChris Lattner <sabre@nondot.org>2010-06-27 01:06:27 +0000
committerChris Lattner <sabre@nondot.org>2010-06-27 01:06:27 +0000
commit3fcc790cd8da55bc18603535854039a20fb5eba4 (patch)
treec846d6b22e2c6e8afd5022ceadbe45e252ec69cf /clang/lib/CodeGen/CGCall.cpp
parent0875802d0f52a03d0deeb6bc5df5e32196142e13 (diff)
downloadbcm5719-llvm-3fcc790cd8da55bc18603535854039a20fb5eba4.tar.gz
bcm5719-llvm-3fcc790cd8da55bc18603535854039a20fb5eba4.zip
Change IR generation for return (in the simple case) to avoid doing silly
load/store nonsense in the epilog. For example, for: int foo(int X) { int A[100]; return A[X]; } we used to generate: %arrayidx = getelementptr inbounds [100 x i32]* %A, i32 0, i64 %idxprom ; <i32*> [#uses=1] %tmp1 = load i32* %arrayidx ; <i32> [#uses=1] store i32 %tmp1, i32* %retval %0 = load i32* %retval ; <i32> [#uses=1] ret i32 %0 } which codegen'd to this code: _foo: ## @foo ## BB#0: ## %entry subq $408, %rsp ## imm = 0x198 movl %edi, 400(%rsp) movl 400(%rsp), %edi movslq %edi, %rax movl (%rsp,%rax,4), %edi movl %edi, 404(%rsp) movl 404(%rsp), %eax addq $408, %rsp ## imm = 0x198 ret Now we generate: %arrayidx = getelementptr inbounds [100 x i32]* %A, i32 0, i64 %idxprom ; <i32*> [#uses=1] %tmp1 = load i32* %arrayidx ; <i32> [#uses=1] ret i32 %tmp1 } and: _foo: ## @foo ## BB#0: ## %entry subq $408, %rsp ## imm = 0x198 movl %edi, 404(%rsp) movl 404(%rsp), %edi movslq %edi, %rax movl (%rsp,%rax,4), %eax addq $408, %rsp ## imm = 0x198 ret This actually does matter, cutting out 2000 lines of IR from CGStmt.ll for example. Another interesting effect is that altivec.h functions which are dead now get dce'd by the inliner. Hence all the changes to builtins-ppc-altivec.c to ensure the calls aren't dead. llvm-svn: 106970
Diffstat (limited to 'clang/lib/CodeGen/CGCall.cpp')
-rw-r--r--clang/lib/CodeGen/CGCall.cpp55
1 files changed, 37 insertions, 18 deletions
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 283f3610af1..051ef086990 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -373,18 +373,18 @@ static llvm::Value *CreateCoercedLoad(llvm::Value *SrcPtr,
// FIXME: Use better alignment / avoid requiring aligned load.
Load->setAlignment(1);
return Load;
- } else {
- // Otherwise do coercion through memory. This is stupid, but
- // simple.
- llvm::Value *Tmp = CGF.CreateTempAlloca(Ty);
- llvm::Value *Casted =
- CGF.Builder.CreateBitCast(Tmp, llvm::PointerType::getUnqual(SrcTy));
- llvm::StoreInst *Store =
- CGF.Builder.CreateStore(CGF.Builder.CreateLoad(SrcPtr), Casted);
- // FIXME: Use better alignment / avoid requiring aligned store.
- Store->setAlignment(1);
- return CGF.Builder.CreateLoad(Tmp);
}
+
+ // Otherwise do coercion through memory. This is stupid, but
+ // simple.
+ llvm::Value *Tmp = CGF.CreateTempAlloca(Ty);
+ llvm::Value *Casted =
+ CGF.Builder.CreateBitCast(Tmp, llvm::PointerType::getUnqual(SrcTy));
+ llvm::StoreInst *Store =
+ CGF.Builder.CreateStore(CGF.Builder.CreateLoad(SrcPtr), Casted);
+ // FIXME: Use better alignment / avoid requiring aligned store.
+ Store->setAlignment(1);
+ return CGF.Builder.CreateLoad(Tmp);
}
/// CreateCoercedStore - Create a store to \arg DstPtr from \arg Src,
@@ -798,8 +798,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
assert(AI == Fn->arg_end() && "Argument mismatch!");
}
-void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI,
- llvm::Value *ReturnValue) {
+void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI) {
// Functions with no result always return void.
if (ReturnValue == 0) {
Builder.CreateRetVoid();
@@ -824,12 +823,32 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI,
break;
case ABIArgInfo::Extend:
- case ABIArgInfo::Direct:
- // The internal return value temp always will have
- // pointer-to-return-type type.
- RV = Builder.CreateLoad(ReturnValue);
+ case ABIArgInfo::Direct: {
+ // The internal return value temp always will have pointer-to-return-type
+ // type, just do a load.
+
+ // If the instruction right before the insertion point is a store to the
+ // return value, we can elide the load, zap the store, and usually zap the
+ // alloca.
+ llvm::BasicBlock *InsertBB = Builder.GetInsertBlock();
+ llvm::StoreInst *SI = 0;
+ if (InsertBB->empty() ||
+ !(SI = dyn_cast<llvm::StoreInst>(&InsertBB->back())) ||
+ SI->getPointerOperand() != ReturnValue || SI->isVolatile()) {
+ RV = Builder.CreateLoad(ReturnValue);
+ } else {
+ // Get the stored value and nuke the now-dead store.
+ RV = SI->getValueOperand();
+ SI->eraseFromParent();
+
+ // If that was the only use of the return value, nuke it as well now.
+ if (ReturnValue->use_empty() && isa<llvm::AllocaInst>(ReturnValue)) {
+ cast<llvm::AllocaInst>(ReturnValue)->eraseFromParent();
+ ReturnValue = 0;
+ }
+ }
break;
-
+ }
case ABIArgInfo::Ignore:
break;
OpenPOWER on IntegriCloud