Fix an issue where GVN was performing the return slot optimization when it was

not safe. This is fixed by more aggressively checking that the return slot is not used elsewhere in the function. llvm-svn: 47544
author: Owen Anderson <resistor@mac.com> 2008-02-25 04:08:09 +0000
committer: Owen Anderson <resistor@mac.com> 2008-02-25 04:08:09 +0000
commit: df1d2b02f9ac1a00a5a00d7f7ea617b6ac841e34 (patch)
tree: a1a4cfad1c55ab5b92fbd7ba5ae1f76038f13d93
parent: cb0d62c70d33324cc8e1c90d844165d5196094d8 (diff)
download: bcm5719-llvm-df1d2b02f9ac1a00a5a00d7f7ea617b6ac841e34.tar.gz
bcm5719-llvm-df1d2b02f9ac1a00a5a00d7f7ea617b6ac841e34.zip
2 files changed, 58 insertions, 8 deletions
diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp
index 650612144ad..4b7e82c927e 100644
--- a/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -752,6 +752,8 @@ namespace {
     bool iterateOnFunction(Function &F);
     Value* CollapsePhi(PHINode* p);
     bool isSafeReplacement(PHINode* p, Instruction* inst);
+    bool valueHasOnlyOneUseAfter(Value* val, MemCpyInst* use,
+                                 Instruction* cutoff);
   };
   
   char GVN::ID = 0;
@@ -1055,22 +1057,32 @@ bool GVN::processLoad(LoadInst* L,
   return deletedLoad;
 }
 
-/// isReturnSlotOptznProfitable - Determine if performing a return slot 
-/// fusion with the slot dest is profitable
-static bool isReturnSlotOptznProfitable(Value* dest, MemCpyInst* cpy) {
-  // We currently consider it profitable if dest is otherwise dead.
-  SmallVector<User*, 8> useList(dest->use_begin(), dest->use_end());
+/// valueHasOnlyOneUse - Returns true if a value has only one use after the
+/// cutoff that is in the current same block and is the same as the use
+/// parameter.
+bool GVN::valueHasOnlyOneUseAfter(Value* val, MemCpyInst* use,
+                                  Instruction* cutoff) {
+  DominatorTree& DT = getAnalysis<DominatorTree>();
+  
+  SmallVector<User*, 8> useList(val->use_begin(), val->use_end());
   while (!useList.empty()) {
     User* UI = useList.back();
     
+    
     if (isa<GetElementPtrInst>(UI) || isa<BitCastInst>(UI)) {
       useList.pop_back();
       for (User::use_iterator I = UI->use_begin(), E = UI->use_end();
            I != E; ++I)
         useList.push_back(*I);
-    } else if (UI == cpy)
+    } else if (UI == use) {
       useList.pop_back();
-    else
+    } else if (Instruction* inst = dyn_cast<Instruction>(UI)) {
+      if (inst->getParent() == use->getParent() &&
+          (inst == cutoff || !DT.dominates(cutoff, inst))) {
+        useList.pop_back();
+      } else
+        return false;
+    } else
       return false;
   }
   
@@ -1123,8 +1135,14 @@ bool GVN::performReturnSlotOptzn(MemCpyInst* cpy, CallInst* C,
   if (TD.getTypeStoreSize(PT->getElementType()) != cpyLength->getZExtValue())
     return false;
   
+  // For safety, we must ensure that the output parameter of the call only has
+  // a single use, the memcpy.  Otherwise this can introduce an invalid
+  // transformation.
+  if (!valueHasOnlyOneUseAfter(CS.getArgument(0), cpy, C))
+    return false;
+  
   // We only perform the transformation if it will be profitable. 
-  if (!isReturnSlotOptznProfitable(cpyDest, cpy))
+  if (!valueHasOnlyOneUseAfter(cpyDest, cpy, C))
     return false;
   
   // In addition to knowing that the call does not access the return slot
diff --git a/llvm/test/Transforms/GVN/2008-02-24-MultipleUseofSRet.ll b/llvm/test/Transforms/GVN/2008-02-24-MultipleUseofSRet.ll
new file mode 100644
index 00000000000..797dba2b698
--- /dev/null
+++ b/llvm/test/Transforms/GVN/2008-02-24-MultipleUseofSRet.ll
@@ -0,0 +1,32 @@
+; RUN: llvm-as < %s | opt -gvn -dse | llvm-dis | grep {call.*initialize} | grep memtmp | count 1
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+
+define internal fastcc void @initialize({ x86_fp80, x86_fp80 }* noalias sret  %agg.result) nounwind  {
+entry:
+	%agg.result.03 = getelementptr { x86_fp80, x86_fp80 }* %agg.result, i32 0, i32 0		; <x86_fp80*> [#uses=1]
+	store x86_fp80 0xK00000000000000000000, x86_fp80* %agg.result.03
+	%agg.result.15 = getelementptr { x86_fp80, x86_fp80 }* %agg.result, i32 0, i32 1		; <x86_fp80*> [#uses=1]
+	store x86_fp80 0xK00000000000000000000, x86_fp80* %agg.result.15
+	ret void
+}
+
+declare fastcc x86_fp80 @passed_uninitialized({ x86_fp80, x86_fp80 }* %x) nounwind
+
+define fastcc void @badly_optimized() nounwind  {
+entry:
+	%z = alloca { x86_fp80, x86_fp80 }		; <{ x86_fp80, x86_fp80 }*> [#uses=2]
+	%tmp = alloca { x86_fp80, x86_fp80 }		; <{ x86_fp80, x86_fp80 }*> [#uses=2]
+	%memtmp = alloca { x86_fp80, x86_fp80 }, align 8		; <{ x86_fp80, x86_fp80 }*> [#uses=2]
+	call fastcc void @initialize( { x86_fp80, x86_fp80 }* noalias sret  %memtmp )
+	%tmp1 = bitcast { x86_fp80, x86_fp80 }* %tmp to i8*		; <i8*> [#uses=1]
+	%memtmp2 = bitcast { x86_fp80, x86_fp80 }* %memtmp to i8*		; <i8*> [#uses=1]
+	call void @llvm.memcpy.i32( i8* %tmp1, i8* %memtmp2, i32 24, i32 8 )
+	%z3 = bitcast { x86_fp80, x86_fp80 }* %z to i8*		; <i8*> [#uses=1]
+	%tmp4 = bitcast { x86_fp80, x86_fp80 }* %tmp to i8*		; <i8*> [#uses=1]
+	call void @llvm.memcpy.i32( i8* %z3, i8* %tmp4, i32 24, i32 8 )
+	%tmp5 = call fastcc x86_fp80 @passed_uninitialized( { x86_fp80, x86_fp80 }* %z )		; <x86_fp80> [#uses=0]
+	ret void
+}
+
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind
+\ No newline at end of file
author	Owen Anderson <resistor@mac.com>	2008-02-25 04:08:09 +0000
committer	Owen Anderson <resistor@mac.com>	2008-02-25 04:08:09 +0000
commit	df1d2b02f9ac1a00a5a00d7f7ea617b6ac841e34 (patch)
tree	a1a4cfad1c55ab5b92fbd7ba5ae1f76038f13d93
parent	cb0d62c70d33324cc8e1c90d844165d5196094d8 (diff)
download	bcm5719-llvm-df1d2b02f9ac1a00a5a00d7f7ea617b6ac841e34.tar.gz bcm5719-llvm-df1d2b02f9ac1a00a5a00d7f7ea617b6ac841e34.zip