summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2017-02-03 17:59:58 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2017-02-03 17:59:58 +0000
commit034c1bd32c2d201ba333b5cda9c8f873276beee9 (patch)
tree3e2c7233e465d1666c25d92cd12ddcad00537d07 /llvm/test/CodeGen
parentec9bc8ccd49fa4f89931e47e83c88939acd86db5 (diff)
downloadbcm5719-llvm-034c1bd32c2d201ba333b5cda9c8f873276beee9.tar.gz
bcm5719-llvm-034c1bd32c2d201ba333b5cda9c8f873276beee9.zip
[X86][SSE] Add support for combining scalar_to_vector(extract_vector_elt) into a target shuffle.
Correctly flagging upper elements as undef. llvm-svn: 294020
Diffstat (limited to 'llvm/test/CodeGen')
-rw-r--r--llvm/test/CodeGen/X86/combine-fcopysign.ll2
-rw-r--r--llvm/test/CodeGen/X86/pr29112.ll4
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll11
3 files changed, 6 insertions, 11 deletions
diff --git a/llvm/test/CodeGen/X86/combine-fcopysign.ll b/llvm/test/CodeGen/X86/combine-fcopysign.ll
index 807ac4e3fc6..43e09bfe5fe 100644
--- a/llvm/test/CodeGen/X86/combine-fcopysign.ll
+++ b/llvm/test/CodeGen/X86/combine-fcopysign.ll
@@ -292,7 +292,7 @@ define <4 x float> @combine_vec_fcopysign_fptrunc_sgn(<4 x float> %x, <4 x doubl
; SSE-NEXT: cvtsd2ss %xmm1, %xmm1
; SSE-NEXT: andps %xmm4, %xmm1
; SSE-NEXT: orps %xmm6, %xmm1
-; SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
+; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE-NEXT: movaps %xmm3, %xmm1
; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
; SSE-NEXT: andps %xmm5, %xmm1
diff --git a/llvm/test/CodeGen/X86/pr29112.ll b/llvm/test/CodeGen/X86/pr29112.ll
index 8bf704835ae..de2d5d79102 100644
--- a/llvm/test/CodeGen/X86/pr29112.ll
+++ b/llvm/test/CodeGen/X86/pr29112.ll
@@ -24,11 +24,11 @@ define <4 x float> @bar(<4 x float>* %a1p, <4 x float>* %a2p, <4 x float> %a3, <
; CHECK-NEXT: vextractf32x4 $2, %zmm3, %xmm4
; CHECK-NEXT: vblendps {{.*#+}} xmm4 = xmm0[0,1,2],xmm4[3]
; CHECK-NEXT: vpermilps {{.*#+}} xmm5 = xmm2[3,1,2,3]
-; CHECK-NEXT: vinsertps {{.*#+}} xmm5 = xmm5[0],xmm1[0],xmm5[2,3]
+; CHECK-NEXT: vunpcklps {{.*#+}} xmm5 = xmm5[0],xmm1[0],xmm5[1],xmm1[1]
; CHECK-NEXT: vinsertps {{.*#+}} xmm5 = xmm5[0,1],xmm2[1],xmm5[3]
; CHECK-NEXT: vinsertps {{.*#+}} xmm5 = xmm5[0,1,2],xmm3[1]
; CHECK-NEXT: vmovshdup {{.*#+}} xmm7 = xmm8[1,1,3,3]
-; CHECK-NEXT: vinsertps {{.*#+}} xmm7 = xmm7[0],xmm1[0],xmm7[2,3]
+; CHECK-NEXT: vunpcklps {{.*#+}} xmm7 = xmm7[0],xmm1[0],xmm7[1],xmm1[1]
; CHECK-NEXT: vinsertps {{.*#+}} xmm10 = xmm7[0,1],xmm2[1],xmm7[3]
; CHECK-NEXT: vblendps {{.*#+}} xmm7 = xmm10[0,1,2],xmm3[3]
; CHECK-NEXT: vblendps {{.*#+}} xmm11 = xmm0[0,1,2],xmm3[3]
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll
index eef7d5a1366..241c63c6acd 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll
@@ -342,23 +342,18 @@ define void @buildvector_v4f32_0404(float %a, float %b, <4 x float>* %ptr) {
ret void
}
-; FIXME: Failed to fold to vpermil2ps
define void @buildvector_v4f32_07z6(float %a, <4 x float> %b, <4 x float>* %ptr) {
; X32-LABEL: buildvector_v4f32_07z6:
; X32: # BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3]
-; X32-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; X32-NEXT: vunpcklps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; X32-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],zero,xmm0[2]
+; X32-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X32-NEXT: vpermil2ps {{.*#+}} xmm0 = xmm1[0],xmm0[3],zero,xmm0[2]
; X32-NEXT: vmovaps %xmm0, (%eax)
; X32-NEXT: retl
;
; X64-LABEL: buildvector_v4f32_07z6:
; X64: # BB#0:
-; X64-NEXT: vpermilps {{.*#+}} xmm2 = xmm1[3,1,2,3]
-; X64-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; X64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],zero,xmm1[2]
+; X64-NEXT: vpermil2ps {{.*#+}} xmm0 = xmm0[0],xmm1[3],zero,xmm1[2]
; X64-NEXT: vmovaps %xmm0, (%rdi)
; X64-NEXT: retq
%b2 = extractelement <4 x float> %b, i32 2
OpenPOWER on IntegriCloud