Enable FeatureFastUAMem for btver2

Allow unaligned 16-byte memop codegen for btver2. No functional changes for any other subtargets. Replace the existing supposed small memcpy test with an actual test of a small memcpy. The previous test wasn't using FileCheck either. This patch should allow us to close PR21541 ( http://llvm.org/bugs/show_bug.cgi?id=21541 ). Differential Revision: http://reviews.llvm.org/D6360 llvm-svn: 222925
author: Sanjay Patel <spatel@rotateright.com> 2014-11-28 18:40:18 +0000
committer: Sanjay Patel <spatel@rotateright.com> 2014-11-28 18:40:18 +0000
commit: e57f3c0a4235573d824e18b01799e6a8e21a6f22 (patch)
tree: 520104f801c81a644436cbe2dec02b99b80b7886 /llvm/test
parent: 18bc3bf830a88fc7d745a97c2e9ff5288628da5c (diff)
download: bcm5719-llvm-e57f3c0a4235573d824e18b01799e6a8e21a6f22.tar.gz
bcm5719-llvm-e57f3c0a4235573d824e18b01799e6a8e21a6f22.zip
1 files changed, 23 insertions, 18 deletions
diff --git a/llvm/test/CodeGen/X86/small-byval-memcpy.ll b/llvm/test/CodeGen/X86/small-byval-memcpy.ll
index 1b596b58989..3c03750199c 100644
--- a/llvm/test/CodeGen/X86/small-byval-memcpy.ll
+++ b/llvm/test/CodeGen/X86/small-byval-memcpy.ll
@@ -1,20 +1,25 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=core2   | grep movsd  | count 8
-; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=nehalem | grep movups | count 2
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core2 | FileCheck %s --check-prefix=CORE2
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=nehalem | FileCheck %s --check-prefix=NEHALEM
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=btver2 | FileCheck %s --check-prefix=BTVER2
 
-define void @ccosl({ x86_fp80, x86_fp80 }* noalias sret  %agg.result, { x86_fp80, x86_fp80 }* byval align 4  %z) nounwind  {
-entry:
-	%iz = alloca { x86_fp80, x86_fp80 }		; <{ x86_fp80, x86_fp80 }*> [#uses=3]
-	%tmp1 = getelementptr { x86_fp80, x86_fp80 }* %z, i32 0, i32 1		; <x86_fp80*> [#uses=1]
-	%tmp2 = load x86_fp80* %tmp1, align 16		; <x86_fp80> [#uses=1]
-	%tmp3 = fsub x86_fp80 0xK80000000000000000000, %tmp2		; <x86_fp80> [#uses=1]
-	%tmp4 = getelementptr { x86_fp80, x86_fp80 }* %iz, i32 0, i32 1		; <x86_fp80*> [#uses=1]
-	%real = getelementptr { x86_fp80, x86_fp80 }* %iz, i32 0, i32 0		; <x86_fp80*> [#uses=1]
-	%tmp6 = getelementptr { x86_fp80, x86_fp80 }* %z, i32 0, i32 0		; <x86_fp80*> [#uses=1]
-	%tmp7 = load x86_fp80* %tmp6, align 16		; <x86_fp80> [#uses=1]
-	store x86_fp80 %tmp3, x86_fp80* %real, align 16
-	store x86_fp80 %tmp7, x86_fp80* %tmp4, align 16
-	call void @ccoshl( { x86_fp80, x86_fp80 }* noalias sret  %agg.result, { x86_fp80, x86_fp80 }* byval align 4  %iz ) nounwind 
-	ret void
-}
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1)
+
+define void @copy16bytes(i8* nocapture %a, i8* nocapture readonly %b) {
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 16, i32 1, i1 false)
+  ret void
+
+  ; CHECK-LABEL: copy16bytes
+  ; CORE2: movq
+  ; CORE2-NEXT: movq
+  ; CORE2-NEXT: movq
+  ; CORE2-NEXT: movq
+  ; CORE2-NEXT: retq
 
-declare void @ccoshl({ x86_fp80, x86_fp80 }* noalias sret , { x86_fp80, x86_fp80 }* byval align 4 ) nounwind 
+  ; NEHALEM: movups
+  ; NEHALEM-NEXT: movups
+  ; NEHALEM-NEXT: retq
+
+  ; BTVER2: movups
+  ; BTVER2-NEXT: movups
+  ; BTVER2-NEXT: retq
+}
author	Sanjay Patel <spatel@rotateright.com>	2014-11-28 18:40:18 +0000
committer	Sanjay Patel <spatel@rotateright.com>	2014-11-28 18:40:18 +0000
commit	e57f3c0a4235573d824e18b01799e6a8e21a6f22 (patch)
tree	520104f801c81a644436cbe2dec02b99b80b7886 /llvm/test
parent	18bc3bf830a88fc7d745a97c2e9ff5288628da5c (diff)
download	bcm5719-llvm-e57f3c0a4235573d824e18b01799e6a8e21a6f22.tar.gz bcm5719-llvm-e57f3c0a4235573d824e18b01799e6a8e21a6f22.zip