diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2015-01-21 22:44:35 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2015-01-21 22:44:35 +0000 |
| commit | b16b09b154f2a8468dcfaa9a8eb5297d59cbd7cd (patch) | |
| tree | 081164c2a076347cc91e92f3a0da3b47c48194d9 /llvm/test/CodeGen | |
| parent | d9987c7b0de0ce5ee523427269e1993254ef2353 (diff) | |
| download | bcm5719-llvm-b16b09b154f2a8468dcfaa9a8eb5297d59cbd7cd.tar.gz bcm5719-llvm-b16b09b154f2a8468dcfaa9a8eb5297d59cbd7cd.zip | |
[X86][SSE] Added support for SSE3 lane duplication shuffle instructions
This patch adds shuffle matching for the SSE3 MOVDDUP, MOVSLDUP and MOVSHDUP instructions. The big use of these being that they avoid many single source shuffles from needing to use (pre-AVX) dual source instructions such as SHUFPD/SHUFPS: causing extra moves and preventing load folds.
Adding these instructions uncovered an issue in XFormVExtractWithShuffleIntoLoad which crashed on single operand shuffle instructions (now fixed). It also involved fixing getTargetShuffleMask to correctly identify theses instructions as unary shuffles.
Also adds a missing tablegen pattern for MOVDDUP.
Differential Revision: http://reviews.llvm.org/D7042
llvm-svn: 226716
Diffstat (limited to 'llvm/test/CodeGen')
| -rw-r--r-- | llvm/test/CodeGen/X86/avx-splat.ll | 42 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx2-vbroadcast.ll | 14 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/sincos-opt.ll | 17 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/sse41.ll | 42 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/sse_partial_update.ll | 42 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/v2f32.ll | 32 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vec_cast2.ll | 46 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll | 135 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll | 240 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll | 140 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll | 74 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-combining.ll | 172 |
12 files changed, 493 insertions, 503 deletions
diff --git a/llvm/test/CodeGen/X86/avx-splat.ll b/llvm/test/CodeGen/X86/avx-splat.ll index 98c1645b908..37fce6c10d8 100644 --- a/llvm/test/CodeGen/X86/avx-splat.ll +++ b/llvm/test/CodeGen/X86/avx-splat.ll @@ -15,37 +15,37 @@ define <16 x i16> @funcB(<16 x i16> %a) nounwind uwtable readnone ssp { entry: %shuffle = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> ret <16 x i16> %shuffle -} - -; CHECK: vmovq -; CHECK-NEXT: vunpcklpd %xmm -; CHECK-NEXT: vinsertf128 $1 -define <4 x i64> @funcC(i64 %q) nounwind uwtable readnone ssp { -entry: +}
+
+; CHECK: vmovq
+; CHECK-NEXT: vmovddup %xmm
+; CHECK-NEXT: vinsertf128 $1
+define <4 x i64> @funcC(i64 %q) nounwind uwtable readnone ssp {
+entry:
%vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0 %vecinit2.i = insertelement <4 x i64> %vecinit.i, i64 %q, i32 1 %vecinit4.i = insertelement <4 x i64> %vecinit2.i, i64 %q, i32 2 %vecinit6.i = insertelement <4 x i64> %vecinit4.i, i64 %q, i32 3 - ret <4 x i64> %vecinit6.i -} - -; CHECK: vunpcklpd %xmm -; CHECK-NEXT: vinsertf128 $1 -define <4 x double> @funcD(double %q) nounwind uwtable readnone ssp { -entry: + ret <4 x i64> %vecinit6.i
+}
+
+; CHECK: vmovddup %xmm
+; CHECK-NEXT: vinsertf128 $1
+define <4 x double> @funcD(double %q) nounwind uwtable readnone ssp {
+entry:
%vecinit.i = insertelement <4 x double> undef, double %q, i32 0 %vecinit2.i = insertelement <4 x double> %vecinit.i, double %q, i32 1 %vecinit4.i = insertelement <4 x double> %vecinit2.i, double %q, i32 2 %vecinit6.i = insertelement <4 x double> %vecinit4.i, double %q, i32 3 ret <4 x double> %vecinit6.i } - -; Test this turns into a broadcast: -; shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0> -; -; CHECK: vbroadcastss -define <8 x float> @funcE() nounwind { -allocas: +
+; Test this turns into a broadcast:
+; shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
+;
+; CHECK: vbroadcastss
+define <8 x float> @funcE() nounwind {
+allocas:
%udx495 = alloca [18 x [18 x float]], align 32 br label %for_test505.preheader diff --git a/llvm/test/CodeGen/X86/avx2-vbroadcast.ll b/llvm/test/CodeGen/X86/avx2-vbroadcast.ll index 924c06eba76..a24cdef585a 100644 --- a/llvm/test/CodeGen/X86/avx2-vbroadcast.ll +++ b/llvm/test/CodeGen/X86/avx2-vbroadcast.ll @@ -314,13 +314,13 @@ define <2 x i64> @_inreg2xi64(<2 x i64> %a) { define <4 x double> @_inreg4xdouble(<4 x double> %a) { %b = shufflevector <4 x double> %a, <4 x double> undef, <4 x i32> zeroinitializer ret <4 x double> %b -} - -;CHECK-LABEL: _inreg2xdouble: -;CHECK: vunpcklpd -;CHECK: ret -define <2 x double> @_inreg2xdouble(<2 x double> %a) { - %b = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> zeroinitializer +}
+
+;CHECK-LABEL: _inreg2xdouble:
+;CHECK: vmovddup
+;CHECK: ret
+define <2 x double> @_inreg2xdouble(<2 x double> %a) {
+ %b = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> zeroinitializer
ret <2 x double> %b } diff --git a/llvm/test/CodeGen/X86/sincos-opt.ll b/llvm/test/CodeGen/X86/sincos-opt.ll index 1e34a2be10b..1509f5848f6 100644 --- a/llvm/test/CodeGen/X86/sincos-opt.ll +++ b/llvm/test/CodeGen/X86/sincos-opt.ll @@ -12,15 +12,14 @@ entry: ; GNU_SINCOS: callq sincosf ; GNU_SINCOS: movss 4(%rsp), %xmm0 ; GNU_SINCOS: addss (%rsp), %xmm0 - -; OSX_SINCOS-LABEL: test1: -; OSX_SINCOS: callq ___sincosf_stret -; OSX_SINCOS: movaps %xmm0, %xmm1 -; OSX_SINCOS: shufps {{.*}} ## xmm1 = xmm1[1,1,2,3] -; OSX_SINCOS: addss %xmm0, %xmm1 - -; OSX_NOOPT: test1 -; OSX_NOOPT: callq _sinf +
+; OSX_SINCOS-LABEL: test1:
+; OSX_SINCOS: callq ___sincosf_stret
+; OSX_SINCOS: movshdup {{.*}} xmm1 = xmm0[1,1,3,3]
+; OSX_SINCOS: addss %xmm1, %xmm0
+
+; OSX_NOOPT: test1
+; OSX_NOOPT: callq _sinf
; OSX_NOOPT: callq _cosf %call = tail call float @sinf(float %x) nounwind readnone %call1 = tail call float @cosf(float %x) nounwind readnone diff --git a/llvm/test/CodeGen/X86/sse41.ll b/llvm/test/CodeGen/X86/sse41.ll index 9c0c2221cb7..3295e2b206b 100644 --- a/llvm/test/CodeGen/X86/sse41.ll +++ b/llvm/test/CodeGen/X86/sse41.ll @@ -288,28 +288,26 @@ declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone ; This used to compile to insertps $0 + insertps $16. insertps $0 is always ; pointless. -define <2 x float> @buildvector(<2 x float> %A, <2 x float> %B) nounwind { -; X32-LABEL: buildvector: -; X32: ## BB#0: ## %entry -; X32-NEXT: movaps %xmm0, %xmm2 -; X32-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1,2,3] -; X32-NEXT: addss %xmm1, %xmm0 -; X32-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3] -; X32-NEXT: addss %xmm2, %xmm1 -; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] -; X32-NEXT: retl -; -; X64-LABEL: buildvector: -; X64: ## BB#0: ## %entry -; X64-NEXT: movaps %xmm0, %xmm2 -; X64-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1,2,3] -; X64-NEXT: addss %xmm1, %xmm0 -; X64-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3] -; X64-NEXT: addss %xmm2, %xmm1 -; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] -; X64-NEXT: retq -entry: - %tmp7 = extractelement <2 x float> %A, i32 0 +define <2 x float> @buildvector(<2 x float> %A, <2 x float> %B) nounwind {
+; X32-LABEL: buildvector:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
+; X32-NEXT: movshdup {{.*#+}} xmm3 = xmm1[1,1,3,3]
+; X32-NEXT: addss %xmm1, %xmm0
+; X32-NEXT: addss %xmm2, %xmm3
+; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[2,3]
+; X32-NEXT: retl
+;
+; X64-LABEL: buildvector:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
+; X64-NEXT: movshdup {{.*#+}} xmm3 = xmm1[1,1,3,3]
+; X64-NEXT: addss %xmm1, %xmm0
+; X64-NEXT: addss %xmm2, %xmm3
+; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[2,3]
+; X64-NEXT: retq
+entry:
+ %tmp7 = extractelement <2 x float> %A, i32 0
%tmp5 = extractelement <2 x float> %A, i32 1 %tmp3 = extractelement <2 x float> %B, i32 0 %tmp1 = extractelement <2 x float> %B, i32 1 diff --git a/llvm/test/CodeGen/X86/sse_partial_update.ll b/llvm/test/CodeGen/X86/sse_partial_update.ll index a88ab014641..648b2d29c4a 100644 --- a/llvm/test/CodeGen/X86/sse_partial_update.ll +++ b/llvm/test/CodeGen/X86/sse_partial_update.ll @@ -9,13 +9,13 @@ define void @rsqrtss(<4 x float> %a) nounwind uwtable ssp { entry: -; CHECK-LABEL: rsqrtss: -; CHECK: rsqrtss %xmm0, %xmm0 -; CHECK-NEXT: cvtss2sd %xmm0 -; CHECK-NEXT: shufps -; CHECK-NEXT: cvtss2sd %xmm0 -; CHECK-NEXT: movap -; CHECK-NEXT: jmp +; CHECK-LABEL: rsqrtss:
+; CHECK: rsqrtss %xmm0, %xmm0
+; CHECK-NEXT: cvtss2sd %xmm0
+; CHECK-NEXT: movshdup
+; CHECK-NEXT: cvtss2sd %xmm0
+; CHECK-NEXT: movap
+; CHECK-NEXT: jmp
%0 = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a) nounwind %a.addr.0.extract = extractelement <4 x float> %0, i32 0 @@ -30,13 +30,13 @@ declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone define void @rcpss(<4 x float> %a) nounwind uwtable ssp { entry: -; CHECK-LABEL: rcpss: -; CHECK: rcpss %xmm0, %xmm0 -; CHECK-NEXT: cvtss2sd %xmm0 -; CHECK-NEXT: shufps -; CHECK-NEXT: cvtss2sd %xmm0 -; CHECK-NEXT: movap -; CHECK-NEXT: jmp +; CHECK-LABEL: rcpss:
+; CHECK: rcpss %xmm0, %xmm0
+; CHECK-NEXT: cvtss2sd %xmm0
+; CHECK-NEXT: movshdup
+; CHECK-NEXT: cvtss2sd %xmm0
+; CHECK-NEXT: movap
+; CHECK-NEXT: jmp
%0 = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a) nounwind %a.addr.0.extract = extractelement <4 x float> %0, i32 0 @@ -50,13 +50,13 @@ declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone define void @sqrtss(<4 x float> %a) nounwind uwtable ssp { entry: -; CHECK-LABEL: sqrtss: -; CHECK: sqrtss %xmm0, %xmm0 -; CHECK-NEXT: cvtss2sd %xmm0 -; CHECK-NEXT: shufps -; CHECK-NEXT: cvtss2sd %xmm0 -; CHECK-NEXT: movap -; CHECK-NEXT: jmp +; CHECK-LABEL: sqrtss:
+; CHECK: sqrtss %xmm0, %xmm0
+; CHECK-NEXT: cvtss2sd %xmm0
+; CHECK-NEXT: movshdup
+; CHECK-NEXT: cvtss2sd %xmm0
+; CHECK-NEXT: movap
+; CHECK-NEXT: jmp
%0 = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a) nounwind %a.addr.0.extract = extractelement <4 x float> %0, i32 0 diff --git a/llvm/test/CodeGen/X86/v2f32.ll b/llvm/test/CodeGen/X86/v2f32.ll index b9bd80f949e..27ef7de9f27 100644 --- a/llvm/test/CodeGen/X86/v2f32.ll +++ b/llvm/test/CodeGen/X86/v2f32.ll @@ -2,23 +2,21 @@ ; RUN: llc < %s -mcpu=yonah -march=x86 -mtriple=i386-linux-gnu -o - | FileCheck %s --check-prefix=X32 ; PR7518 -define void @test1(<2 x float> %Q, float *%P2) nounwind { -; X64-LABEL: test1: -; X64: # BB#0: -; X64-NEXT: movaps %xmm0, %xmm1 -; X64-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3] -; X64-NEXT: addss %xmm0, %xmm1 -; X64-NEXT: movss %xmm1, (%rdi) -; X64-NEXT: retq -; -; X32-LABEL: test1: -; X32: # BB#0: -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movaps %xmm0, %xmm1 -; X32-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3] -; X32-NEXT: addss %xmm0, %xmm1 -; X32-NEXT: movss %xmm1, (%eax) -; X32-NEXT: retl +define void @test1(<2 x float> %Q, float *%P2) nounwind {
+; X64-LABEL: test1:
+; X64: # BB#0:
+; X64-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; X64-NEXT: addss %xmm0, %xmm1
+; X64-NEXT: movss %xmm1, (%rdi)
+; X64-NEXT: retq
+;
+; X32-LABEL: test1:
+; X32: # BB#0:
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; X32-NEXT: addss %xmm0, %xmm1
+; X32-NEXT: movss %xmm1, (%eax)
+; X32-NEXT: retl
%a = extractelement <2 x float> %Q, i32 0 %b = extractelement <2 x float> %Q, i32 1 %c = fadd float %a, %b diff --git a/llvm/test/CodeGen/X86/vec_cast2.ll b/llvm/test/CodeGen/X86/vec_cast2.ll index 8600c48aaac..0762b3500fd 100644 --- a/llvm/test/CodeGen/X86/vec_cast2.ll +++ b/llvm/test/CodeGen/X86/vec_cast2.ll @@ -115,22 +115,22 @@ define <8 x i8> @foo3_8(<8 x float> %src) { ; CHECK-WIDE-NEXT: vcvttss2si %xmm1, %eax ; CHECK-WIDE-NEXT: shll $8, %eax ; CHECK-WIDE-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; CHECK-WIDE-NEXT: vcvttss2si %xmm1, %ecx -; CHECK-WIDE-NEXT: movzbl %cl, %ecx -; CHECK-WIDE-NEXT: orl %eax, %ecx -; CHECK-WIDE-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3] -; CHECK-WIDE-NEXT: vcvttss2si %xmm1, %eax -; CHECK-WIDE-NEXT: shll $8, %eax -; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %edx -; CHECK-WIDE-NEXT: movzbl %dl, %edx -; CHECK-WIDE-NEXT: orl %eax, %edx -; CHECK-WIDE-NEXT: vpinsrw $0, %edx, %xmm0, %xmm1 -; CHECK-WIDE-NEXT: vpinsrw $1, %ecx, %xmm1, %xmm1 -; CHECK-WIDE-NEXT: vextractf128 $1, %ymm0, %xmm0 -; CHECK-WIDE-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[1,1,2,3] -; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax -; CHECK-WIDE-NEXT: shll $8, %eax -; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %ecx +; CHECK-WIDE-NEXT: vcvttss2si %xmm1, %ecx
+; CHECK-WIDE-NEXT: movzbl %cl, %ecx
+; CHECK-WIDE-NEXT: orl %eax, %ecx
+; CHECK-WIDE-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; CHECK-WIDE-NEXT: vcvttss2si %xmm1, %eax
+; CHECK-WIDE-NEXT: shll $8, %eax
+; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %edx
+; CHECK-WIDE-NEXT: movzbl %dl, %edx
+; CHECK-WIDE-NEXT: orl %eax, %edx
+; CHECK-WIDE-NEXT: vpinsrw $0, %edx, %xmm0, %xmm1
+; CHECK-WIDE-NEXT: vpinsrw $1, %ecx, %xmm1, %xmm1
+; CHECK-WIDE-NEXT: vextractf128 $1, %ymm0, %xmm0
+; CHECK-WIDE-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
+; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax
+; CHECK-WIDE-NEXT: shll $8, %eax
+; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %ecx
; CHECK-WIDE-NEXT: movzbl %cl, %ecx ; CHECK-WIDE-NEXT: orl %eax, %ecx ; CHECK-WIDE-NEXT: vpinsrw $2, %ecx, %xmm1, %xmm1 @@ -160,13 +160,13 @@ define <4 x i8> @foo3_4(<4 x float> %src) { ; CHECK-WIDE-NEXT: vcvttss2si %xmm1, %eax ; CHECK-WIDE-NEXT: shll $8, %eax ; CHECK-WIDE-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; CHECK-WIDE-NEXT: vcvttss2si %xmm1, %ecx -; CHECK-WIDE-NEXT: movzbl %cl, %ecx -; CHECK-WIDE-NEXT: orl %eax, %ecx -; CHECK-WIDE-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3] -; CHECK-WIDE-NEXT: vcvttss2si %xmm1, %eax -; CHECK-WIDE-NEXT: shll $8, %eax -; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %edx +; CHECK-WIDE-NEXT: vcvttss2si %xmm1, %ecx
+; CHECK-WIDE-NEXT: movzbl %cl, %ecx
+; CHECK-WIDE-NEXT: orl %eax, %ecx
+; CHECK-WIDE-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; CHECK-WIDE-NEXT: vcvttss2si %xmm1, %eax
+; CHECK-WIDE-NEXT: shll $8, %eax
+; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %edx
; CHECK-WIDE-NEXT: movzbl %dl, %edx ; CHECK-WIDE-NEXT: orl %eax, %edx ; CHECK-WIDE-NEXT: vpinsrw $0, %edx, %xmm0, %xmm0 diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll index 57fa0e85981..d64b6329aae 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll @@ -102,28 +102,28 @@ define <2 x double> @shuffle_v2f64_00(<2 x double> %a, <2 x double> %b) { ; SSE2: # BB#0: ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] ; SSE2-NEXT: retq -; -; SSE3-LABEL: shuffle_v2f64_00: -; SSE3: # BB#0: -; SSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0] -; SSE3-NEXT: retq -; -; SSSE3-LABEL: shuffle_v2f64_00: -; SSSE3: # BB#0: -; SSSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0] -; SSSE3-NEXT: retq -; -; SSE41-LABEL: shuffle_v2f64_00: -; SSE41: # BB#0: -; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0] -; SSE41-NEXT: retq -; -; AVX-LABEL: shuffle_v2f64_00: -; AVX: # BB#0: -; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0] -; AVX-NEXT: retq - %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 0> - ret <2 x double> %shuffle +;
+; SSE3-LABEL: shuffle_v2f64_00:
+; SSE3: # BB#0:
+; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
+; SSE3-NEXT: retq
+;
+; SSSE3-LABEL: shuffle_v2f64_00:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v2f64_00:
+; SSE41: # BB#0:
+; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: shuffle_v2f64_00:
+; AVX: # BB#0:
+; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; AVX-NEXT: retq
+ %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 0>
+ ret <2 x double> %shuffle
} define <2 x double> @shuffle_v2f64_10(<2 x double> %a, <2 x double> %b) { ; SSE-LABEL: shuffle_v2f64_10: @@ -157,31 +157,28 @@ define <2 x double> @shuffle_v2f64_22(<2 x double> %a, <2 x double> %b) { ; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0,0] ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: retq -; -; SSE3-LABEL: shuffle_v2f64_22: -; SSE3: # BB#0: -; SSE3-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0,0] -; SSE3-NEXT: movapd %xmm1, %xmm0 -; SSE3-NEXT: retq -; -; SSSE3-LABEL: shuffle_v2f64_22: -; SSSE3: # BB#0: -; SSSE3-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0,0] -; SSSE3-NEXT: movapd %xmm1, %xmm0 -; SSSE3-NEXT: retq -; -; SSE41-LABEL: shuffle_v2f64_22: -; SSE41: # BB#0: -; SSE41-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0,0] -; SSE41-NEXT: movapd %xmm1, %xmm0 -; SSE41-NEXT: retq -; -; AVX-LABEL: shuffle_v2f64_22: -; AVX: # BB#0: -; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0,0] -; AVX-NEXT: retq - %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 2> - ret <2 x double> %shuffle +;
+; SSE3-LABEL: shuffle_v2f64_22:
+; SSE3: # BB#0:
+; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0]
+; SSE3-NEXT: retq
+;
+; SSSE3-LABEL: shuffle_v2f64_22:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v2f64_22:
+; SSE41: # BB#0:
+; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0]
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: shuffle_v2f64_22:
+; AVX: # BB#0:
+; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm1[0,0]
+; AVX-NEXT: retq
+ %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 2>
+ ret <2 x double> %shuffle
} define <2 x double> @shuffle_v2f64_32(<2 x double> %a, <2 x double> %b) { ; SSE-LABEL: shuffle_v2f64_32: @@ -1061,28 +1058,28 @@ define <2 x double> @insert_dup_reg_v2f64(double %a) { ; SSE2: # BB#0: ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] ; SSE2-NEXT: retq -; -; SSE3-LABEL: insert_dup_reg_v2f64: -; SSE3: # BB#0: -; SSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0] -; SSE3-NEXT: retq -; -; SSSE3-LABEL: insert_dup_reg_v2f64: -; SSSE3: # BB#0: -; SSSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0] -; SSSE3-NEXT: retq -; -; SSE41-LABEL: insert_dup_reg_v2f64: -; SSE41: # BB#0: -; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0] -; SSE41-NEXT: retq -; -; AVX-LABEL: insert_dup_reg_v2f64: -; AVX: # BB#0: -; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0] -; AVX-NEXT: retq - %v = insertelement <2 x double> undef, double %a, i32 0 - %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0> +;
+; SSE3-LABEL: insert_dup_reg_v2f64:
+; SSE3: # BB#0:
+; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
+; SSE3-NEXT: retq
+;
+; SSSE3-LABEL: insert_dup_reg_v2f64:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: insert_dup_reg_v2f64:
+; SSE41: # BB#0:
+; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: insert_dup_reg_v2f64:
+; AVX: # BB#0:
+; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; AVX-NEXT: retq
+ %v = insertelement <2 x double> undef, double %a, i32 0
+ %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
ret <2 x double> %shuffle } define <2 x double> @insert_dup_mem_v2f64(double* %ptr) { diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll index bca7fb7a276..0097f6bd70a 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll @@ -3,13 +3,13 @@ target triple = "x86_64-unknown-unknown" -define <4 x double> @shuffle_v4f64_0000(<4 x double> %a, <4 x double> %b) { -; AVX1-LABEL: shuffle_v4f64_0000: -; AVX1: # BB#0: -; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0] -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 -; AVX1-NEXT: retq -; +define <4 x double> @shuffle_v4f64_0000(<4 x double> %a, <4 x double> %b) {
+; AVX1-LABEL: shuffle_v4f64_0000:
+; AVX1: # BB#0:
+; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
; AVX2-LABEL: shuffle_v4f64_0000: ; AVX2: # BB#0: ; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0 @@ -18,13 +18,13 @@ define <4 x double> @shuffle_v4f64_0000(<4 x double> %a, <4 x double> %b) { ret <4 x double> %shuffle } -define <4 x double> @shuffle_v4f64_0001(<4 x double> %a, <4 x double> %b) { -; AVX1-LABEL: shuffle_v4f64_0001: -; AVX1: # BB#0: -; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0,0] -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX1-NEXT: retq -; +define <4 x double> @shuffle_v4f64_0001(<4 x double> %a, <4 x double> %b) {
+; AVX1-LABEL: shuffle_v4f64_0001:
+; AVX1: # BB#0:
+; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm0[0,0]
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
; AVX2-LABEL: shuffle_v4f64_0001: ; AVX2: # BB#0: ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,1] @@ -35,13 +35,13 @@ define <4 x double> @shuffle_v4f64_0001(<4 x double> %a, <4 x double> %b) { define <4 x double> @shuffle_v4f64_0020(<4 x double> %a, <4 x double> %b) { ; AVX1-LABEL: shuffle_v4f64_0020: -; AVX1: # BB#0: -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] -; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0] -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX1-NEXT: retq -; +; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
; AVX2-LABEL: shuffle_v4f64_0020: ; AVX2: # BB#0: ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,0] @@ -67,13 +67,13 @@ define <4 x double> @shuffle_v4f64_0300(<4 x double> %a, <4 x double> %b) { } define <4 x double> @shuffle_v4f64_1000(<4 x double> %a, <4 x double> %b) { -; AVX1-LABEL: shuffle_v4f64_1000: -; AVX1: # BB#0: -; AVX1-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0] -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX1-NEXT: retq -; +; AVX1-LABEL: shuffle_v4f64_1000:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
+; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
; AVX2-LABEL: shuffle_v4f64_1000: ; AVX2: # BB#0: ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,0,0,0] @@ -83,13 +83,13 @@ define <4 x double> @shuffle_v4f64_1000(<4 x double> %a, <4 x double> %b) { } define <4 x double> @shuffle_v4f64_2200(<4 x double> %a, <4 x double> %b) { -; AVX1-LABEL: shuffle_v4f64_2200: -; AVX1: # BB#0: -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] -; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,2] -; AVX1-NEXT: retq -; -; AVX2-LABEL: shuffle_v4f64_2200: +; AVX1-LABEL: shuffle_v4f64_2200:
+; AVX1: # BB#0:
+; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
+; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v4f64_2200:
; AVX2: # BB#0: ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,0,0] ; AVX2-NEXT: retq @@ -138,13 +138,13 @@ define <4 x double> @shuffle_v4f64_0023(<4 x double> %a, <4 x double> %b) { ret <4 x double> %shuffle } -define <4 x double> @shuffle_v4f64_0022(<4 x double> %a, <4 x double> %b) { -; ALL-LABEL: shuffle_v4f64_0022: -; ALL: # BB#0: -; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,2] -; ALL-NEXT: retq - %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2> - ret <4 x double> %shuffle +define <4 x double> @shuffle_v4f64_0022(<4 x double> %a, <4 x double> %b) {
+; ALL-LABEL: shuffle_v4f64_0022:
+; ALL: # BB#0:
+; ALL-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
+; ALL-NEXT: retq
+ %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
+ ret <4 x double> %shuffle
} define <4 x double> @shuffle_v4f64_1032(<4 x double> %a, <4 x double> %b) { @@ -183,13 +183,13 @@ define <4 x double> @shuffle_v4f64_1022(<4 x double> %a, <4 x double> %b) { ret <4 x double> %shuffle } -define <4 x double> @shuffle_v4f64_0423(<4 x double> %a, <4 x double> %b) { -; AVX1-LABEL: shuffle_v4f64_0423: -; AVX1: # BB#0: -; AVX1-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[0,0,2,2] -; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3] -; AVX1-NEXT: retq -; +define <4 x double> @shuffle_v4f64_0423(<4 x double> %a, <4 x double> %b) {
+; AVX1-LABEL: shuffle_v4f64_0423:
+; AVX1: # BB#0:
+; AVX1-NEXT: vmovddup {{.*#+}} ymm1 = ymm1[0,0,2,2]
+; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3]
+; AVX1-NEXT: retq
+;
; AVX2-LABEL: shuffle_v4f64_0423: ; AVX2: # BB#0: ; AVX2-NEXT: vbroadcastsd %xmm1, %ymm1 @@ -199,14 +199,14 @@ define <4 x double> @shuffle_v4f64_0423(<4 x double> %a, <4 x double> %b) { ret <4 x double> %shuffle } -define <4 x double> @shuffle_v4f64_0462(<4 x double> %a, <4 x double> %b) { -; ALL-LABEL: shuffle_v4f64_0462: -; ALL: # BB#0: -; ALL-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[0,0,2,2] -; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,2] -; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] -; ALL-NEXT: retq - %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 6, i32 2> +define <4 x double> @shuffle_v4f64_0462(<4 x double> %a, <4 x double> %b) {
+; ALL-LABEL: shuffle_v4f64_0462:
+; ALL: # BB#0:
+; ALL-NEXT: vmovddup {{.*#+}} ymm1 = ymm1[0,0,2,2]
+; ALL-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
+; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3]
+; ALL-NEXT: retq
+ %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 6, i32 2>
ret <4 x double> %shuffle } @@ -358,13 +358,13 @@ define <4 x double> @shuffle_v4f64_0415(<4 x double> %a, <4 x double> %b) { ret <4 x double> %shuffle } -define <4 x i64> @shuffle_v4i64_0000(<4 x i64> %a, <4 x i64> %b) { -; AVX1-LABEL: shuffle_v4i64_0000: -; AVX1: # BB#0: -; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0] -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 -; AVX1-NEXT: retq -; +define <4 x i64> @shuffle_v4i64_0000(<4 x i64> %a, <4 x i64> %b) {
+; AVX1-LABEL: shuffle_v4i64_0000:
+; AVX1: # BB#0:
+; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
; AVX2-LABEL: shuffle_v4i64_0000: ; AVX2: # BB#0: ; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0 @@ -373,13 +373,13 @@ define <4 x i64> @shuffle_v4i64_0000(<4 x i64> %a, <4 x i64> %b) { ret <4 x i64> %shuffle } -define <4 x i64> @shuffle_v4i64_0001(<4 x i64> %a, <4 x i64> %b) { -; AVX1-LABEL: shuffle_v4i64_0001: -; AVX1: # BB#0: -; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0,0] -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX1-NEXT: retq -; +define <4 x i64> @shuffle_v4i64_0001(<4 x i64> %a, <4 x i64> %b) {
+; AVX1-LABEL: shuffle_v4i64_0001:
+; AVX1: # BB#0:
+; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm0[0,0]
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
; AVX2-LABEL: shuffle_v4i64_0001: ; AVX2: # BB#0: ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1] @@ -390,13 +390,13 @@ define <4 x i64> @shuffle_v4i64_0001(<4 x i64> %a, <4 x i64> %b) { define <4 x i64> @shuffle_v4i64_0020(<4 x i64> %a, <4 x i64> %b) { ; AVX1-LABEL: shuffle_v4i64_0020: -; AVX1: # BB#0: -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] -; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0] -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX1-NEXT: retq -; +; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
; AVX2-LABEL: shuffle_v4i64_0020: ; AVX2: # BB#0: ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,2,0] @@ -438,13 +438,13 @@ define <4 x i64> @shuffle_v4i64_0300(<4 x i64> %a, <4 x i64> %b) { } define <4 x i64> @shuffle_v4i64_1000(<4 x i64> %a, <4 x i64> %b) { -; AVX1-LABEL: shuffle_v4i64_1000: -; AVX1: # BB#0: -; AVX1-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0] -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX1-NEXT: retq -; +; AVX1-LABEL: shuffle_v4i64_1000:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
+; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
; AVX2-LABEL: shuffle_v4i64_1000: ; AVX2: # BB#0: ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,0,0,0] @@ -454,13 +454,13 @@ define <4 x i64> @shuffle_v4i64_1000(<4 x i64> %a, <4 x i64> %b) { } define <4 x i64> @shuffle_v4i64_2200(<4 x i64> %a, <4 x i64> %b) { -; AVX1-LABEL: shuffle_v4i64_2200: -; AVX1: # BB#0: -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] -; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,2] -; AVX1-NEXT: retq -; -; AVX2-LABEL: shuffle_v4i64_2200: +; AVX1-LABEL: shuffle_v4i64_2200:
+; AVX1: # BB#0:
+; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
+; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v4i64_2200:
; AVX2: # BB#0: ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,0,0] ; AVX2-NEXT: retq @@ -500,13 +500,13 @@ define <4 x i64> @shuffle_v4i64_3210(<4 x i64> %a, <4 x i64> %b) { ret <4 x i64> %shuffle } -define <4 x i64> @shuffle_v4i64_0124(<4 x i64> %a, <4 x i64> %b) { -; AVX1-LABEL: shuffle_v4i64_0124: -; AVX1: # BB#0: -; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0,0] -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 -; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3] -; AVX1-NEXT: retq +define <4 x i64> @shuffle_v4i64_0124(<4 x i64> %a, <4 x i64> %b) {
+; AVX1-LABEL: shuffle_v4i64_0124:
+; AVX1: # BB#0:
+; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0]
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
+; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3]
+; AVX1-NEXT: retq
; ; AVX2-LABEL: shuffle_v4i64_0124: ; AVX2: # BB#0: @@ -538,13 +538,13 @@ define <4 x i64> @shuffle_v4i64_0142(<4 x i64> %a, <4 x i64> %b) { define <4 x i64> @shuffle_v4i64_0412(<4 x i64> %a, <4 x i64> %b) { ; AVX1-LABEL: shuffle_v4i64_0412: ; AVX1: # BB#0: -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX1-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1],xmm2[0] -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 -; AVX1-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[0,0,2,2] -; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3] -; AVX1-NEXT: retq -; +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1],xmm2[0]
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: vmovddup {{.*#+}} ymm1 = ymm1[0,0,2,2]
+; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3]
+; AVX1-NEXT: retq
+;
; AVX2-LABEL: shuffle_v4i64_0412: ; AVX2: # BB#0: ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,2] @@ -557,13 +557,13 @@ define <4 x i64> @shuffle_v4i64_0412(<4 x i64> %a, <4 x i64> %b) { define <4 x i64> @shuffle_v4i64_4012(<4 x i64> %a, <4 x i64> %b) { ; AVX1-LABEL: shuffle_v4i64_4012: -; AVX1: # BB#0: -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX1-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1],xmm2[0] -; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0] -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 -; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3] -; AVX1-NEXT: retq +; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1],xmm2[0]
+; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3]
+; AVX1-NEXT: retq
; ; AVX2-LABEL: shuffle_v4i64_4012: ; AVX2: # BB#0: @@ -872,13 +872,13 @@ define <4 x double> @splat_mem_v4f64_2(double* %p) { ret <4 x double> %3 } -define <4 x double> @splat_v4f64(<2 x double> %r) { -; AVX1-LABEL: splat_v4f64: -; AVX1: # BB#0: -; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0] -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 -; AVX1-NEXT: retq -; +define <4 x double> @splat_v4f64(<2 x double> %r) {
+; AVX1-LABEL: splat_v4f64:
+; AVX1: # BB#0:
+; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
; AVX2-LABEL: splat_v4f64: ; AVX2: # BB#0: ; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0 diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll index e4bd4c4f817..fa931449420 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll @@ -145,13 +145,13 @@ define <8 x float> @shuffle_v8f32_70000000(<8 x float> %a, <8 x float> %b) { ret <8 x float> %shuffle } -define <8 x float> @shuffle_v8f32_01014545(<8 x float> %a, <8 x float> %b) { -; ALL-LABEL: shuffle_v8f32_01014545: -; ALL: # BB#0: -; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,2] -; ALL-NEXT: retq - %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5> - ret <8 x float> %shuffle +define <8 x float> @shuffle_v8f32_01014545(<8 x float> %a, <8 x float> %b) {
+; ALL-LABEL: shuffle_v8f32_01014545:
+; ALL: # BB#0:
+; ALL-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
+; ALL-NEXT: retq
+ %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
+ ret <8 x float> %shuffle
} define <8 x float> @shuffle_v8f32_00112233(<8 x float> %a, <8 x float> %b) { @@ -199,13 +199,13 @@ define <8 x float> @shuffle_v8f32_81a3c5e7(<8 x float> %a, <8 x float> %b) { define <8 x float> @shuffle_v8f32_08080808(<8 x float> %a, <8 x float> %b) { ; AVX1-LABEL: shuffle_v8f32_08080808: -; AVX1: # BB#0: -; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,2,0] -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 -; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0] -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 -; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] -; AVX1-NEXT: retq +; AVX1: # BB#0:
+; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,2,0]
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
+; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
+; AVX1-NEXT: retq
; ; AVX2-LABEL: shuffle_v8f32_08080808: ; AVX2: # BB#0: @@ -333,13 +333,13 @@ define <8 x float> @shuffle_v8f32_091b2d3f(<8 x float> %a, <8 x float> %b) { ret <8 x float> %shuffle } -define <8 x float> @shuffle_v8f32_09ab1def(<8 x float> %a, <8 x float> %b) { -; AVX1-LABEL: shuffle_v8f32_09ab1def: -; AVX1: # BB#0: -; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[1,1,2,3] -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 -; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7] -; AVX1-NEXT: retq +define <8 x float> @shuffle_v8f32_09ab1def(<8 x float> %a, <8 x float> %b) {
+; AVX1-LABEL: shuffle_v8f32_09ab1def:
+; AVX1: # BB#0:
+; AVX1-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
+; AVX1-NEXT: retq
; ; AVX2-LABEL: shuffle_v8f32_09ab1def: ; AVX2: # BB#0: @@ -423,13 +423,13 @@ define <8 x float> @shuffle_v8f32_00234467(<8 x float> %a, <8 x float> %b) { ret <8 x float> %shuffle } -define <8 x float> @shuffle_v8f32_00224466(<8 x float> %a, <8 x float> %b) { -; ALL-LABEL: shuffle_v8f32_00224466: -; ALL: # BB#0: -; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] -; ALL-NEXT: retq - %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6> - ret <8 x float> %shuffle +define <8 x float> @shuffle_v8f32_00224466(<8 x float> %a, <8 x float> %b) {
+; ALL-LABEL: shuffle_v8f32_00224466:
+; ALL: # BB#0:
+; ALL-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
+; ALL-NEXT: retq
+ %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
+ ret <8 x float> %shuffle
} define <8 x float> @shuffle_v8f32_10325476(<8 x float> %a, <8 x float> %b) { @@ -441,13 +441,13 @@ define <8 x float> @shuffle_v8f32_10325476(<8 x float> %a, <8 x float> %b) { ret <8 x float> %shuffle } -define <8 x float> @shuffle_v8f32_11335577(<8 x float> %a, <8 x float> %b) { -; ALL-LABEL: shuffle_v8f32_11335577: -; ALL: # BB#0: -; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] -; ALL-NEXT: retq - %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7> - ret <8 x float> %shuffle +define <8 x float> @shuffle_v8f32_11335577(<8 x float> %a, <8 x float> %b) {
+; ALL-LABEL: shuffle_v8f32_11335577:
+; ALL: # BB#0:
+; ALL-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
+; ALL-NEXT: retq
+ %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
+ ret <8 x float> %shuffle
} define <8 x float> @shuffle_v8f32_10235467(<8 x float> %a, <8 x float> %b) { @@ -937,13 +937,13 @@ define <8 x i32> @shuffle_v8i32_70000000(<8 x i32> %a, <8 x i32> %b) { ret <8 x i32> %shuffle } -define <8 x i32> @shuffle_v8i32_01014545(<8 x i32> %a, <8 x i32> %b) { -; AVX1-LABEL: shuffle_v8i32_01014545: -; AVX1: # BB#0: -; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,2] -; AVX1-NEXT: retq -; -; AVX2-LABEL: shuffle_v8i32_01014545: +define <8 x i32> @shuffle_v8i32_01014545(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_01014545:
+; AVX1: # BB#0:
+; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v8i32_01014545:
; AVX2: # BB#0: ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5] ; AVX2-NEXT: retq @@ -1001,13 +1001,13 @@ define <8 x i32> @shuffle_v8i32_81a3c5e7(<8 x i32> %a, <8 x i32> %b) { define <8 x i32> @shuffle_v8i32_08080808(<8 x i32> %a, <8 x i32> %b) { ; AVX1-LABEL: shuffle_v8i32_08080808: -; AVX1: # BB#0: -; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,2,0] -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 -; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0] -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 -; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] -; AVX1-NEXT: retq +; AVX1: # BB#0:
+; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,2,0]
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
+; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
+; AVX1-NEXT: retq
; ; AVX2-LABEL: shuffle_v8i32_08080808: ; AVX2: # BB#0: @@ -1172,13 +1172,13 @@ define <8 x i32> @shuffle_v8i32_091b2d3f(<8 x i32> %a, <8 x i32> %b) { ret <8 x i32> %shuffle } -define <8 x i32> @shuffle_v8i32_09ab1def(<8 x i32> %a, <8 x i32> %b) { -; AVX1-LABEL: shuffle_v8i32_09ab1def: -; AVX1: # BB#0: -; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[1,1,2,3] -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 -; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7] -; AVX1-NEXT: retq +define <8 x i32> @shuffle_v8i32_09ab1def(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_09ab1def:
+; AVX1: # BB#0:
+; AVX1-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
+; AVX1-NEXT: retq
; ; AVX2-LABEL: shuffle_v8i32_09ab1def: ; AVX2: # BB#0: @@ -1302,13 +1302,13 @@ define <8 x i32> @shuffle_v8i32_00234467(<8 x i32> %a, <8 x i32> %b) { ret <8 x i32> %shuffle } -define <8 x i32> @shuffle_v8i32_00224466(<8 x i32> %a, <8 x i32> %b) { -; AVX1-LABEL: shuffle_v8i32_00224466: -; AVX1: # BB#0: -; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] -; AVX1-NEXT: retq -; -; AVX2-LABEL: shuffle_v8i32_00224466: +define <8 x i32> @shuffle_v8i32_00224466(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_00224466:
+; AVX1: # BB#0:
+; AVX1-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v8i32_00224466:
; AVX2: # BB#0: ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] ; AVX2-NEXT: retq @@ -1330,13 +1330,13 @@ define <8 x i32> @shuffle_v8i32_10325476(<8 x i32> %a, <8 x i32> %b) { ret <8 x i32> %shuffle } -define <8 x i32> @shuffle_v8i32_11335577(<8 x i32> %a, <8 x i32> %b) { -; AVX1-LABEL: shuffle_v8i32_11335577: -; AVX1: # BB#0: -; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] -; AVX1-NEXT: retq -; -; AVX2-LABEL: shuffle_v8i32_11335577: +define <8 x i32> @shuffle_v8i32_11335577(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_11335577:
+; AVX1: # BB#0:
+; AVX1-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v8i32_11335577:
; AVX2: # BB#0: ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] ; AVX2-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll b/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll index fd4c9cb72ba..f3c64da044f 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll @@ -404,15 +404,15 @@ define <8 x double> @shuffle_v8f64_00234467(<8 x double> %a, <8 x double> %b) { ret <8 x double> %shuffle } -define <8 x double> @shuffle_v8f64_00224466(<8 x double> %a, <8 x double> %b) { -; ALL-LABEL: shuffle_v8f64_00224466: -; ALL: # BB#0: -; ALL-NEXT: vpermilpd {{.*#+}} ymm1 = ymm0[0,0,2,2] -; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0 -; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,2] -; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0 -; ALL-NEXT: retq - %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6> +define <8 x double> @shuffle_v8f64_00224466(<8 x double> %a, <8 x double> %b) {
+; ALL-LABEL: shuffle_v8f64_00224466:
+; ALL: # BB#0:
+; ALL-NEXT: vmovddup {{.*#+}} ymm1 = ymm0[0,0,2,2]
+; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
+; ALL-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
+; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: retq
+ %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
ret <8 x double> %shuffle } @@ -559,13 +559,13 @@ define <8 x double> @shuffle_v8f64_00236744(<8 x double> %a, <8 x double> %b) { ret <8 x double> %shuffle } -define <8 x double> @shuffle_v8f64_00226644(<8 x double> %a, <8 x double> %b) { -; ALL-LABEL: shuffle_v8f64_00226644: -; ALL: # BB#0: -; ALL-NEXT: vpermilpd {{.*#+}} ymm1 = ymm0[0,0,2,2] -; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0 -; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,0,0] -; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0 +define <8 x double> @shuffle_v8f64_00226644(<8 x double> %a, <8 x double> %b) {
+; ALL-LABEL: shuffle_v8f64_00226644:
+; ALL: # BB#0:
+; ALL-NEXT: vmovddup {{.*#+}} ymm1 = ymm0[0,0,2,2]
+; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
+; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,0,0]
+; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4> ret <8 x double> %shuffle @@ -615,13 +615,13 @@ define <8 x double> @shuffle_v8f64_01235466(<8 x double> %a, <8 x double> %b) { ret <8 x double> %shuffle } -define <8 x double> @shuffle_v8f64_002u6u44(<8 x double> %a, <8 x double> %b) { -; ALL-LABEL: shuffle_v8f64_002u6u44: -; ALL: # BB#0: -; ALL-NEXT: vpermilpd {{.*#+}} ymm1 = ymm0[0,0,2,2] -; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0 -; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,1,0,0] -; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0 +define <8 x double> @shuffle_v8f64_002u6u44(<8 x double> %a, <8 x double> %b) {
+; ALL-LABEL: shuffle_v8f64_002u6u44:
+; ALL: # BB#0:
+; ALL-NEXT: vmovddup {{.*#+}} ymm1 = ymm0[0,0,2,2]
+; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
+; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,1,0,0]
+; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4> ret <8 x double> %shuffle @@ -673,13 +673,13 @@ define <8 x double> @shuffle_v8f64_0uu354uu(<8 x double> %a, <8 x double> %b) { } define <8 x double> @shuffle_v8f64_uuu3uu66(<8 x double> %a, <8 x double> %b) { -; ALL-LABEL: shuffle_v8f64_uuu3uu66: -; ALL: # BB#0: -; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm1 -; ALL-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[0,0,2,2] -; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 -; ALL-NEXT: retq - %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6> +; ALL-LABEL: shuffle_v8f64_uuu3uu66:
+; ALL: # BB#0:
+; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm1
+; ALL-NEXT: vmovddup {{.*#+}} ymm1 = ymm1[0,0,2,2]
+; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
+; ALL-NEXT: retq
+ %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6>
ret <8 x double> %shuffle } @@ -705,13 +705,13 @@ define <8 x double> @shuffle_v8f64_f511235a(<8 x double> %a, <8 x double> %b) { ; ALL-LABEL: shuffle_v8f64_f511235a: ; ALL: # BB#0: ; ALL-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1] -; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm3 -; ALL-NEXT: vpermpd {{.*#+}} ymm4 = ymm3[0,1,1,3] -; ALL-NEXT: vblendpd {{.*#+}} ymm2 = ymm2[0,1],ymm4[2],ymm2[3] -; ALL-NEXT: vpermilpd {{.*#+}} ymm4 = ymm1[0,0,2,2] -; ALL-NEXT: vblendpd {{.*#+}} ymm2 = ymm2[0,1,2],ymm4[3] -; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,1] -; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm3[1],ymm0[2,3] +; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm3
+; ALL-NEXT: vpermpd {{.*#+}} ymm4 = ymm3[0,1,1,3]
+; ALL-NEXT: vblendpd {{.*#+}} ymm2 = ymm2[0,1],ymm4[2],ymm2[3]
+; ALL-NEXT: vmovddup {{.*#+}} ymm4 = ymm1[0,0,2,2]
+; ALL-NEXT: vblendpd {{.*#+}} ymm2 = ymm2[0,1,2],ymm4[3]
+; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,1]
+; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm3[1],ymm0[2,3]
; ALL-NEXT: vextractf64x4 $1, %zmm1, %ymm1 ; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[3,1,2,3] ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3] diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll index 4e2bf87fdf6..109372c836b 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll @@ -1641,25 +1641,23 @@ define <4 x float> @combine_test2b(<4 x float> %a, <4 x float> %b) { ; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0,0] ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: retq -; -; SSSE3-LABEL: combine_test2b: -; SSSE3: # BB#0: -; SSSE3-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0,0] -; SSSE3-NEXT: movapd %xmm1, %xmm0 -; SSSE3-NEXT: retq -; -; SSE41-LABEL: combine_test2b: -; SSE41: # BB#0: -; SSE41-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0,0] -; SSE41-NEXT: movapd %xmm1, %xmm0 -; SSE41-NEXT: retq -; -; AVX-LABEL: combine_test2b: -; AVX: # BB#0: -; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0,0] -; AVX-NEXT: retq - %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3> - %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 0, i32 5> +;
+; SSSE3-LABEL: combine_test2b:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: combine_test2b:
+; SSE41: # BB#0:
+; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0]
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: combine_test2b:
+; AVX: # BB#0:
+; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm1[0,0]
+; AVX-NEXT: retq
+ %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
+ %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 0, i32 5>
ret <4 x float> %2 } @@ -2178,23 +2176,23 @@ define <4 x float> @combine_undef_input_test7(<4 x float> %a) { ; SSE2: # BB#0: ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] ; SSE2-NEXT: retq -; -; SSSE3-LABEL: combine_undef_input_test7: -; SSSE3: # BB#0: -; SSSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0] -; SSSE3-NEXT: retq -; -; SSE41-LABEL: combine_undef_input_test7: -; SSE41: # BB#0: -; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0] -; SSE41-NEXT: retq -; -; AVX-LABEL: combine_undef_input_test7: -; AVX: # BB#0: -; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0] -; AVX-NEXT: retq - %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 6, i32 0, i32 1, i32 7> - %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 1, i32 2, i32 4, i32 5> +;
+; SSSE3-LABEL: combine_undef_input_test7:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: combine_undef_input_test7:
+; SSE41: # BB#0:
+; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: combine_undef_input_test7:
+; AVX: # BB#0:
+; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; AVX-NEXT: retq
+ %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 6, i32 0, i32 1, i32 7>
+ %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 1, i32 2, i32 4, i32 5>
ret <4 x float> %2 } @@ -2203,23 +2201,23 @@ define <4 x float> @combine_undef_input_test8(<4 x float> %a) { ; SSE2: # BB#0: ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] ; SSE2-NEXT: retq -; -; SSSE3-LABEL: combine_undef_input_test8: -; SSSE3: # BB#0: -; SSSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0] -; SSSE3-NEXT: retq -; -; SSE41-LABEL: combine_undef_input_test8: -; SSE41: # BB#0: -; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0] -; SSE41-NEXT: retq -; -; AVX-LABEL: combine_undef_input_test8: -; AVX: # BB#0: -; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0] -; AVX-NEXT: retq - %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 5, i32 1, i32 7> - %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 2, i32 4, i32 1> +;
+; SSSE3-LABEL: combine_undef_input_test8:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: combine_undef_input_test8:
+; SSE41: # BB#0:
+; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: combine_undef_input_test8:
+; AVX: # BB#0:
+; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; AVX-NEXT: retq
+ %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
+ %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 2, i32 4, i32 1>
ret <4 x float> %2 } @@ -2369,23 +2367,23 @@ define <4 x float> @combine_undef_input_test17(<4 x float> %a) { ; SSE2: # BB#0: ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] ; SSE2-NEXT: retq -; -; SSSE3-LABEL: combine_undef_input_test17: -; SSSE3: # BB#0: -; SSSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0] -; SSSE3-NEXT: retq -; -; SSE41-LABEL: combine_undef_input_test17: -; SSE41: # BB#0: -; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0] -; SSE41-NEXT: retq -; -; AVX-LABEL: combine_undef_input_test17: -; AVX: # BB#0: -; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0] -; AVX-NEXT: retq - %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 6, i32 0, i32 1, i32 7> - %2 = shufflevector <4 x float> %a, <4 x float> %1, <4 x i32> <i32 5, i32 6, i32 0, i32 1> +;
+; SSSE3-LABEL: combine_undef_input_test17:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: combine_undef_input_test17:
+; SSE41: # BB#0:
+; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: combine_undef_input_test17:
+; AVX: # BB#0:
+; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; AVX-NEXT: retq
+ %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 6, i32 0, i32 1, i32 7>
+ %2 = shufflevector <4 x float> %a, <4 x float> %1, <4 x i32> <i32 5, i32 6, i32 0, i32 1>
ret <4 x float> %2 } @@ -2394,23 +2392,23 @@ define <4 x float> @combine_undef_input_test18(<4 x float> %a) { ; SSE2: # BB#0: ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] ; SSE2-NEXT: retq -; -; SSSE3-LABEL: combine_undef_input_test18: -; SSSE3: # BB#0: -; SSSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0] -; SSSE3-NEXT: retq -; -; SSE41-LABEL: combine_undef_input_test18: -; SSE41: # BB#0: -; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0] -; SSE41-NEXT: retq -; -; AVX-LABEL: combine_undef_input_test18: -; AVX: # BB#0: -; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0] -; AVX-NEXT: retq - %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 5, i32 1, i32 7> - %2 = shufflevector <4 x float> %a, <4 x float> %1, <4 x i32> <i32 4, i32 6, i32 0, i32 5> +;
+; SSSE3-LABEL: combine_undef_input_test18:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: combine_undef_input_test18:
+; SSE41: # BB#0:
+; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: combine_undef_input_test18:
+; AVX: # BB#0:
+; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; AVX-NEXT: retq
+ %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
+ %2 = shufflevector <4 x float> %a, <4 x float> %1, <4 x i32> <i32 4, i32 6, i32 0, i32 5>
ret <4 x float> %2 } |

