summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86
diff options
context:
space:
mode:
authorNadav Rotem <nrotem@apple.com>2013-01-18 23:10:30 +0000
committerNadav Rotem <nrotem@apple.com>2013-01-18 23:10:30 +0000
commit7431211214d54d0cd8cc0d069447abd22c5da0cb (patch)
tree74da55584f564ef5a3c8e07f1ea4df7c9e7c6c4a /llvm/test/CodeGen/X86
parent2affc1ea6d27dbd9258cef614725f92c7d2770b3 (diff)
downloadbcm5719-llvm-7431211214d54d0cd8cc0d069447abd22c5da0cb.tar.gz
bcm5719-llvm-7431211214d54d0cd8cc0d069447abd22c5da0cb.zip
On Sandybridge loading unaligned 256bits using two XMM loads (vmovups and vinsertf128) is faster than using a single vmovups instruction.
llvm-svn: 172868
Diffstat (limited to 'llvm/test/CodeGen/X86')
-rw-r--r--llvm/test/CodeGen/X86/sandybridge-loads.ll21
-rw-r--r--llvm/test/CodeGen/X86/v8i1-masks.ll2
2 files changed, 22 insertions, 1 deletions
diff --git a/llvm/test/CodeGen/X86/sandybridge-loads.ll b/llvm/test/CodeGen/X86/sandybridge-loads.ll
new file mode 100644
index 00000000000..d85c32eaa7e
--- /dev/null
+++ b/llvm/test/CodeGen/X86/sandybridge-loads.ll
@@ -0,0 +1,21 @@
+; RUN: llc -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -o - < %s | FileCheck %s
+
+;CHECK: wideloads
+;CHECK: vmovaps
+;CHECK: vinsertf128
+;CHECK: vmovups
+;CHECK-NOT: vinsertf128
+;CHECK: ret
+
+define void @wideloads(<8 x float>* %a, <8 x float>* %b, <8 x float>* %c) nounwind uwtable noinline ssp {
+ %v0 = load <8 x float>* %a, align 16 ; <---- unaligned!
+ %v1 = load <8 x float>* %b, align 32 ; <---- aligned!
+ %m0 = fcmp olt <8 x float> %v1, %v0
+ %v2 = load <8 x float>* %c, align 16
+ %m1 = fcmp olt <8 x float> %v2, %v0
+ %mand = and <8 x i1> %m1, %m0
+ %r = zext <8 x i1> %mand to <8 x i32>
+ store <8 x i32> %r, <8 x i32>* undef, align 16
+ ret void
+}
+
diff --git a/llvm/test/CodeGen/X86/v8i1-masks.ll b/llvm/test/CodeGen/X86/v8i1-masks.ll
index abb4b39bd62..ea231aff5b6 100644
--- a/llvm/test/CodeGen/X86/v8i1-masks.ll
+++ b/llvm/test/CodeGen/X86/v8i1-masks.ll
@@ -1,7 +1,7 @@
; RUN: llc -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -o - < %s | FileCheck %s
;CHECK: and_masks
-;CHECK: vmovups
+;CHECK: vmovaps
;CHECK: vcmpltp
;CHECK: vcmpltp
;CHECK: vandps
OpenPOWER on IntegriCloud