diff options
| author | Chris Lattner <sabre@nondot.org> | 2010-07-05 05:48:41 +0000 |
|---|---|---|
| committer | Chris Lattner <sabre@nondot.org> | 2010-07-05 05:48:41 +0000 |
| commit | 7b909ac785a333d8a3dd3cbe49f321fcd47e1d51 (patch) | |
| tree | 5388a5cea519fd9fe2201b7167840124399ab834 /llvm/lib/Target | |
| parent | 2c0315a0f31a3ee15d855b788fb0fcdc6831f47d (diff) | |
| download | bcm5719-llvm-7b909ac785a333d8a3dd3cbe49f321fcd47e1d51.tar.gz bcm5719-llvm-7b909ac785a333d8a3dd3cbe49f321fcd47e1d51.zip | |
some notes about suboptimal insertps's
llvm-svn: 107613
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/X86/README-SSE.txt | 31 |
1 files changed, 31 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/README-SSE.txt b/llvm/lib/Target/X86/README-SSE.txt index d761bde646f..2a8506fd4f8 100644 --- a/llvm/lib/Target/X86/README-SSE.txt +++ b/llvm/lib/Target/X86/README-SSE.txt @@ -846,3 +846,34 @@ This would be better kept in the SSE unit by treating XMM0 as a 4xfloat and doing a shuffle from v[1] to v[0] then a float store. //===---------------------------------------------------------------------===// + +On SSE4 machines, we compile this code: + +define <2 x float> @test2(<2 x float> %Q, <2 x float> %R, + <2 x float> *%P) nounwind { + %Z = fadd <2 x float> %Q, %R + + store <2 x float> %Z, <2 x float> *%P + ret <2 x float> %Z +} + +into: + +_test2: ## @test2 +## BB#0: + insertps $0, %xmm2, %xmm2 + insertps $16, %xmm3, %xmm2 + insertps $0, %xmm0, %xmm3 + insertps $16, %xmm1, %xmm3 + addps %xmm2, %xmm3 + movq %xmm3, (%rdi) + movaps %xmm3, %xmm0 + pshufd $1, %xmm3, %xmm1 + ## kill: XMM1<def> XMM1<kill> + ret + +The insertps's of $0 are pointless complex copies. + +//===---------------------------------------------------------------------===// + + |

