diff options
| author | Chris Lattner <sabre@nondot.org> | 2010-08-28 17:59:08 +0000 |
|---|---|---|
| committer | Chris Lattner <sabre@nondot.org> | 2010-08-28 17:59:08 +0000 |
| commit | 94656b1c8c60994a8bcc6c9b03981a78f0bba3a7 (patch) | |
| tree | e3bc5c3c8b9500f9af2d1cc290a4cc10d59e2003 /llvm/test | |
| parent | bcb6090ad0660f018a8365248d5467047f423e39 (diff) | |
| download | bcm5719-llvm-94656b1c8c60994a8bcc6c9b03981a78f0bba3a7.tar.gz bcm5719-llvm-94656b1c8c60994a8bcc6c9b03981a78f0bba3a7.zip | |
fix the buildvector->insertp[sd] logic to not always create a redundant
insertp[sd] $0, which is a noop. Before:
_f32: ## @f32
pshufd $1, %xmm1, %xmm2
pshufd $1, %xmm0, %xmm3
addss %xmm2, %xmm3
addss %xmm1, %xmm0
## kill: XMM0<def> XMM0<kill> XMM0<def>
insertps $0, %xmm0, %xmm0
insertps $16, %xmm3, %xmm0
ret
after:
_f32: ## @f32
movdqa %xmm0, %xmm2
addss %xmm1, %xmm2
pshufd $1, %xmm1, %xmm1
pshufd $1, %xmm0, %xmm3
addss %xmm1, %xmm3
movdqa %xmm2, %xmm0
insertps $16, %xmm3, %xmm0
ret
The extra movs are due to a random (poor) scheduling decision.
llvm-svn: 112379
Diffstat (limited to 'llvm/test')
| -rw-r--r-- | llvm/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/sse41.ll | 25 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vec_insert-9.ll | 2 |
3 files changed, 27 insertions, 2 deletions
diff --git a/llvm/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll b/llvm/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll index 4a97ac35afc..bb01e5afcef 100644 --- a/llvm/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll +++ b/llvm/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -stats |& grep {7 machine-licm} +; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -stats |& grep {6 machine-licm} ; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 | FileCheck %s ; rdar://6627786 ; rdar://7792037 diff --git a/llvm/test/CodeGen/X86/sse41.ll b/llvm/test/CodeGen/X86/sse41.ll index ef66d1a44a1..3a14fa26300 100644 --- a/llvm/test/CodeGen/X86/sse41.ll +++ b/llvm/test/CodeGen/X86/sse41.ll @@ -224,3 +224,28 @@ declare i32 @llvm.x86.sse41.ptestz(<4 x float>, <4 x float>) nounwind readnone declare i32 @llvm.x86.sse41.ptestc(<4 x float>, <4 x float>) nounwind readnone declare i32 @llvm.x86.sse41.ptestnzc(<4 x float>, <4 x float>) nounwind readnone +; This used to compile to insertps $0 + insertps $16. insertps $0 is always +; pointless. +define <2 x float> @buildvector(<2 x float> %A, <2 x float> %B) nounwind { +entry: + %tmp7 = extractelement <2 x float> %A, i32 0 + %tmp5 = extractelement <2 x float> %A, i32 1 + %tmp3 = extractelement <2 x float> %B, i32 0 + %tmp1 = extractelement <2 x float> %B, i32 1 + %add.r = fadd float %tmp7, %tmp3 + %add.i = fadd float %tmp5, %tmp1 + %tmp11 = insertelement <2 x float> undef, float %add.r, i32 0 + %tmp9 = insertelement <2 x float> %tmp11, float %add.i, i32 1 + ret <2 x float> %tmp9 +; X32: buildvector: +; X32-NOT: insertps $0 +; X32: insertps $16 +; X32-NOT: insertps $0 +; X32: ret +; X64: buildvector: +; X64-NOT: insertps $0 +; X64: insertps $16 +; X64-NOT: insertps $0 +; X64: ret +} + diff --git a/llvm/test/CodeGen/X86/vec_insert-9.ll b/llvm/test/CodeGen/X86/vec_insert-9.ll index 2e829df1f8d..e5a7ccc5ef9 100644 --- a/llvm/test/CodeGen/X86/vec_insert-9.ll +++ b/llvm/test/CodeGen/X86/vec_insert-9.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=x86 -mattr=+sse41 > %t -; RUN: grep pinsrd %t | count 2 +; RUN: grep pinsrd %t | count 1 define <4 x i32> @var_insert2(<4 x i32> %x, i32 %val, i32 %idx) nounwind { entry: |

