diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-08-24 18:07:53 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-08-24 18:07:53 +0000 |
commit | 941bd6bbae6ba1fbf3d46cfb365d1015703ef448 (patch) | |
tree | 8eddc2a47b1117c80aff16e57288bb09d505d194 /llvm/test/CodeGen/X86/vector-shuffle-combining.ll | |
parent | 26d9c41ff6b863253e1e9b17bd5acffc30ead692 (diff) | |
download | bcm5719-llvm-941bd6bbae6ba1fbf3d46cfb365d1015703ef448.tar.gz bcm5719-llvm-941bd6bbae6ba1fbf3d46cfb365d1015703ef448.zip |
[X86][SSE] Add support for combining VZEXT_MOVL target shuffles
Includes adding more general support for the pattern: VZEXT_MOVL(VZEXT_LOAD(ptr)) -> VZEXT_LOAD(ptr)
This has unearthed a couple of latent poor codegen issues (MINSS/MAXSS scalar load folding and MOVDDUP/BROADCAST load folding patterns), which will be fixed shortly.
Its also reduced a couple of tests so that they no longer reach the instruction threshold necessary to be combined to PSHUFB (see PR26183).
llvm-svn: 279646
Diffstat (limited to 'llvm/test/CodeGen/X86/vector-shuffle-combining.ll')
-rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-combining.ll | 29 |
1 files changed, 5 insertions, 24 deletions
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll index 6e8fc5ef194..12ce9a2b1df 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll @@ -2814,31 +2814,12 @@ define <4 x float> @combine_insertps4(<4 x float> %a, <4 x float> %b) { ret <4 x float> %d } -; FIXME: Failed to recognise that the VMOVSD has already zero'd the upper element define void @combine_scalar_load_with_blend_with_zero(double* %a0, <4 x float>* %a1) { -; SSE2-LABEL: combine_scalar_load_with_blend_with_zero: -; SSE2: # BB#0: -; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE2-NEXT: xorps %xmm1, %xmm1 -; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0] -; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2] -; SSE2-NEXT: movaps %xmm0, (%rsi) -; SSE2-NEXT: retq -; -; SSSE3-LABEL: combine_scalar_load_with_blend_with_zero: -; SSSE3: # BB#0: -; SSSE3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSSE3-NEXT: xorps %xmm1, %xmm1 -; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0] -; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2] -; SSSE3-NEXT: movaps %xmm0, (%rsi) -; SSSE3-NEXT: retq -; -; SSE41-LABEL: combine_scalar_load_with_blend_with_zero: -; SSE41: # BB#0: -; SSE41-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE41-NEXT: movapd %xmm0, (%rsi) -; SSE41-NEXT: retq +; SSE-LABEL: combine_scalar_load_with_blend_with_zero: +; SSE: # BB#0: +; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; SSE-NEXT: movapd %xmm0, (%rsi) +; SSE-NEXT: retq ; ; AVX-LABEL: combine_scalar_load_with_blend_with_zero: ; AVX: # BB#0: |