Drop ISD::MEMSET, ISD::MEMMOVE, and ISD::MEMCPY, which are not Legal

on any current target and aren't optimized in DAGCombiner. Instead of using intermediate nodes, expand the operations, choosing between simple loads/stores, target-specific code, and library calls, immediately. Previously, the code to emit optimized code for these operations was only used at initial SelectionDAG construction time; now it is used at all times. This fixes some cases where rep;movs was being used for small copies where simple loads/stores would be better. This also cleans up code that checks for alignments less than 4; let the targets make that decision instead of doing it in target-independent code. This allows x86 to use rep;movs in low-alignment cases. Also, this fixes a bug that resulted in the use of rep;stos for memsets of 0 with non-constant memory size when the alignment was at least 4. It's better to use the library in this case, which can be significantly faster when the size is large. This also preserves more SourceValue information when memory intrinsics are lowered into simple loads/stores. llvm-svn: 49572
author: Dan Gohman <gohman@apple.com> 2008-04-12 04:36:06 +0000
committer: Dan Gohman <gohman@apple.com> 2008-04-12 04:36:06 +0000
commit: 544ab2c50ba1acb803e57519ebf7ec81c3340f79 (patch)
tree: 53c0609d21cde14b6d7c7bd5e809f3b3a4060d25 /llvm/test/CodeGen
parent: 8c7cf88f7ea574d5c3831e0c50655e5ab60af85d (diff)
download: bcm5719-llvm-544ab2c50ba1acb803e57519ebf7ec81c3340f79.tar.gz
bcm5719-llvm-544ab2c50ba1acb803e57519ebf7ec81c3340f79.zip
8 files changed, 67 insertions, 7 deletions
diff --git a/llvm/test/CodeGen/X86/2004-02-12-Memcpy.llx b/llvm/test/CodeGen/X86/2004-02-12-Memcpy.llx
index 151c5a5e849..59364c1f6d6 100644
--- a/llvm/test/CodeGen/X86/2004-02-12-Memcpy.llx
+++ b/llvm/test/CodeGen/X86/2004-02-12-Memcpy.llx
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i686-pc-linux-gnu | grep movs | count 1
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i686-pc-linux-gnu | grep memcpy | count 2
+; RUN: llvm-as < %s | llc -march=x86 -mtriple=i686-pc-linux-gnu | grep movs | count 3
 
 @A = global [32 x i32] zeroinitializer
 @B = global [32 x i32] zeroinitializer
diff --git a/llvm/test/CodeGen/X86/byval2.ll b/llvm/test/CodeGen/X86/byval2.ll
index f438160bdaa..f85c8ffbe4f 100644
--- a/llvm/test/CodeGen/X86/byval2.ll
+++ b/llvm/test/CodeGen/X86/byval2.ll
@@ -1,7 +1,9 @@
 ; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsq | count 2
 ; RUN: llvm-as < %s | llc -march=x86    | grep rep.movsl | count 2
 
-%struct.s = type { i64, i64, i64 }
+%struct.s = type { i64, i64, i64, i64, i64, i64, i64, i64,
+                   i64, i64, i64, i64, i64, i64, i64, i64,
+                   i64 }
 
 define void @g(i64 %a, i64 %b, i64 %c) {
 entry:
diff --git a/llvm/test/CodeGen/X86/byval3.ll b/llvm/test/CodeGen/X86/byval3.ll
index b3794eccb46..074bab4c0a9 100644
--- a/llvm/test/CodeGen/X86/byval3.ll
+++ b/llvm/test/CodeGen/X86/byval3.ll
@@ -1,7 +1,11 @@
 ; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsl | count 2
 ; RUN: llvm-as < %s | llc -march=x86 | grep rep.movsl | count 2
 
-%struct.s = type { i32, i32, i32, i32, i32, i32 }
+%struct.s = type { i32, i32, i32, i32, i32, i32, i32, i32,
+                   i32, i32, i32, i32, i32, i32, i32, i32,
+                   i32, i32, i32, i32, i32, i32, i32, i32,
+                   i32, i32, i32, i32, i32, i32, i32, i32,
+                   i32 }
 
 define void @g(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6) {
 entry:
diff --git a/llvm/test/CodeGen/X86/byval4.ll b/llvm/test/CodeGen/X86/byval4.ll
index 591749f768e..d2fa9e289e7 100644
--- a/llvm/test/CodeGen/X86/byval4.ll
+++ b/llvm/test/CodeGen/X86/byval4.ll
@@ -1,7 +1,15 @@
 ; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsw | count 2
 ; RUN: llvm-as < %s | llc -march=x86 | grep rep.movsl	 | count 2
 
-%struct.s = type { i16, i16, i16, i16, i16, i16 }
+%struct.s = type { i16, i16, i16, i16, i16, i16, i16, i16,
+                   i16, i16, i16, i16, i16, i16, i16, i16,
+                   i16, i16, i16, i16, i16, i16, i16, i16,
+                   i16, i16, i16, i16, i16, i16, i16, i16,
+                   i16, i16, i16, i16, i16, i16, i16, i16,
+                   i16, i16, i16, i16, i16, i16, i16, i16,
+                   i16, i16, i16, i16, i16, i16, i16, i16,
+                   i16, i16, i16, i16, i16, i16, i16, i16,
+                   i16 }
 
 
 define void @g(i16 signext  %a1, i16 signext  %a2, i16 signext  %a3,
diff --git a/llvm/test/CodeGen/X86/byval5.ll b/llvm/test/CodeGen/X86/byval5.ll
index 4965d166666..fd9c197bbfd 100644
--- a/llvm/test/CodeGen/X86/byval5.ll
+++ b/llvm/test/CodeGen/X86/byval5.ll
@@ -1,7 +1,23 @@
 ; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsb | count 2
 ; RUN: llvm-as < %s | llc -march=x86 | grep rep.movsl	 | count 2
 
-%struct.s = type { i8, i8, i8, i8, i8, i8 }
+%struct.s = type { i8, i8, i8, i8, i8, i8, i8, i8,
+                   i8, i8, i8, i8, i8, i8, i8, i8,
+                   i8, i8, i8, i8, i8, i8, i8, i8,
+                   i8, i8, i8, i8, i8, i8, i8, i8,
+                   i8, i8, i8, i8, i8, i8, i8, i8,
+                   i8, i8, i8, i8, i8, i8, i8, i8,
+                   i8, i8, i8, i8, i8, i8, i8, i8,
+                   i8, i8, i8, i8, i8, i8, i8, i8,
+                   i8, i8, i8, i8, i8, i8, i8, i8,
+                   i8, i8, i8, i8, i8, i8, i8, i8,
+                   i8, i8, i8, i8, i8, i8, i8, i8,
+                   i8, i8, i8, i8, i8, i8, i8, i8,
+                   i8, i8, i8, i8, i8, i8, i8, i8,
+                   i8, i8, i8, i8, i8, i8, i8, i8,
+                   i8, i8, i8, i8, i8, i8, i8, i8,
+                   i8, i8, i8, i8, i8, i8, i8, i8,
+                   i8 }
 
 
 define void @g(i8 signext  %a1, i8 signext  %a2, i8 signext  %a3,
diff --git a/llvm/test/CodeGen/X86/byval7.ll b/llvm/test/CodeGen/X86/byval7.ll
index 4199bf062e7..fcbc59b838a 100644
--- a/llvm/test/CodeGen/X86/byval7.ll
+++ b/llvm/test/CodeGen/X86/byval7.ll
@@ -1,6 +1,7 @@
 ; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah | grep add | grep 16
 
-	%struct.S = type { <2 x i64> }
+	%struct.S = type { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>,
+                           <2 x i64> }
 
 define i32 @main() nounwind  {
 entry:
diff --git a/llvm/test/CodeGen/X86/small-byval-memcpy.ll b/llvm/test/CodeGen/X86/small-byval-memcpy.ll
new file mode 100644
index 00000000000..dedd948c297
--- /dev/null
+++ b/llvm/test/CodeGen/X86/small-byval-memcpy.ll
@@ -0,0 +1,22 @@
+; RUN: llvm-as < %s | llc | not grep movs
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin8"
+
+define void @ccosl({ x86_fp80, x86_fp80 }* noalias sret  %agg.result, { x86_fp80, x86_fp80 }* byval align 4  %z) nounwind  {
+entry:
+	%iz = alloca { x86_fp80, x86_fp80 }		; <{ x86_fp80, x86_fp80 }*> [#uses=3]
+	%tmp1 = getelementptr { x86_fp80, x86_fp80 }* %z, i32 0, i32 1		; <x86_fp80*> [#uses=1]
+	%tmp2 = load x86_fp80* %tmp1, align 16		; <x86_fp80> [#uses=1]
+	%tmp3 = sub x86_fp80 0xK80000000000000000000, %tmp2		; <x86_fp80> [#uses=1]
+	%tmp4 = getelementptr { x86_fp80, x86_fp80 }* %iz, i32 0, i32 1		; <x86_fp80*> [#uses=1]
+	%real = getelementptr { x86_fp80, x86_fp80 }* %iz, i32 0, i32 0		; <x86_fp80*> [#uses=1]
+	%tmp6 = getelementptr { x86_fp80, x86_fp80 }* %z, i32 0, i32 0		; <x86_fp80*> [#uses=1]
+	%tmp7 = load x86_fp80* %tmp6, align 16		; <x86_fp80> [#uses=1]
+	store x86_fp80 %tmp3, x86_fp80* %real, align 16
+	store x86_fp80 %tmp7, x86_fp80* %tmp4, align 16
+	call void @ccoshl( { x86_fp80, x86_fp80 }* noalias sret  %agg.result, { x86_fp80, x86_fp80 }* byval align 4  %iz ) nounwind 
+	ret void
+}
+
+declare void @ccoshl({ x86_fp80, x86_fp80 }* noalias sret , { x86_fp80, x86_fp80 }* byval align 4 ) nounwind 
diff --git a/llvm/test/CodeGen/X86/variable-sized-darwin-bzero.ll b/llvm/test/CodeGen/X86/variable-sized-darwin-bzero.ll
new file mode 100644
index 00000000000..b0cdf496d5f
--- /dev/null
+++ b/llvm/test/CodeGen/X86/variable-sized-darwin-bzero.ll
@@ -0,0 +1,8 @@
+; RUN: llvm-as < %s | llc -march=x86 -mtriple=i686-apple-darwin10 | grep __bzero
+
+declare void @llvm.memset.i64(i8*, i8, i64, i32)
+
+define void @foo(i8* %p, i64 %n) {
+  call void @llvm.memset.i64(i8* %p, i8 0, i64 %n, i32 4)
+  ret void
+}
author	Dan Gohman <gohman@apple.com>	2008-04-12 04:36:06 +0000
committer	Dan Gohman <gohman@apple.com>	2008-04-12 04:36:06 +0000
commit	544ab2c50ba1acb803e57519ebf7ec81c3340f79 (patch)
tree	53c0609d21cde14b6d7c7bd5e809f3b3a4060d25 /llvm/test/CodeGen
parent	8c7cf88f7ea574d5c3831e0c50655e5ab60af85d (diff)
download	bcm5719-llvm-544ab2c50ba1acb803e57519ebf7ec81c3340f79.tar.gz bcm5719-llvm-544ab2c50ba1acb803e57519ebf7ec81c3340f79.zip