diff options
Diffstat (limited to 'llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll')
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll | 227 |
1 files changed, 137 insertions, 90 deletions
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll index 072d71fae57..fc22040578b 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll @@ -63,14 +63,14 @@ define <8 x i16> @shuffle_v8i16_456789AB(<8 x i16> %a, <8 x i16> %b) { define <8 x i16> @shuffle_v8i16_00000000(<8 x i16> %a, <8 x i16> %b) { ; SSE-LABEL: shuffle_v8i16_00000000: ; SSE: # %bb.0: -; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] -; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] +; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; SSE-NEXT: retq ; ; AVX1-LABEL: shuffle_v8i16_00000000: ; AVX1: # %bb.0: -; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] -; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] +; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; AVX1-NEXT: retq ; ; AVX2OR512VL-LABEL: shuffle_v8i16_00000000: @@ -1123,33 +1123,44 @@ define <8 x i16> @shuffle_v8i16_c4d5e6f7(<8 x i16> %a, <8 x i16> %b) { } define <8 x i16> @shuffle_v8i16_0213cedf(<8 x i16> %a, <8 x i16> %b) { -; SSE-LABEL: shuffle_v8i16_0213cedf: -; SSE: # %bb.0: -; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7] -; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] -; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,1,3,4,5,6,7] -; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; SSE-NEXT: retq +; SSE2-LABEL: shuffle_v8i16_0213cedf: +; SSE2: # %bb.0: +; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm0[0,2,1,3,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,6,5,7] +; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] +; SSE2-NEXT: retq +; +; SSSE3-LABEL: shuffle_v8i16_0213cedf: +; SSSE3: # %bb.0: +; SSSE3-NEXT: pshuflw {{.*#+}} xmm2 = xmm0[0,2,1,3,4,5,6,7] +; SSSE3-NEXT: pshufhw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,6,5,7] +; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] +; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v8i16_0213cedf: +; SSE41: # %bb.0: +; SSE41-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,5,7] +; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7] +; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] +; SSE41-NEXT: retq ; ; AVX1-LABEL: shuffle_v8i16_0213cedf: ; AVX1: # %bb.0: +; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,5,7] ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7] -; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] -; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,1,3,4,5,6,7] -; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] ; AVX1-NEXT: retq ; ; AVX2-SLOW-LABEL: shuffle_v8i16_0213cedf: ; AVX2-SLOW: # %bb.0: +; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,5,7] ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7] -; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,1,3,4,5,6,7] -; AVX2-SLOW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX2-SLOW-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; AVX2-SLOW-NEXT: retq ; ; AVX2-FAST-LABEL: shuffle_v8i16_0213cedf: ; AVX2-FAST: # %bb.0: -; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,12,13,10,11,14,15,8,9,10,11,12,13,14,15] +; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,12,13,10,11,14,15,8,9,12,13,10,11,14,15] ; AVX2-FAST-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7] ; AVX2-FAST-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX2-FAST-NEXT: retq @@ -1157,14 +1168,14 @@ define <8 x i16> @shuffle_v8i16_0213cedf(<8 x i16> %a, <8 x i16> %b) { ; AVX512VL-SLOW-LABEL: shuffle_v8i16_0213cedf: ; AVX512VL-SLOW: # %bb.0: ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7] +; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,5,7] ; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] -; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,1,3,4,5,6,7] ; AVX512VL-SLOW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX512VL-SLOW-NEXT: retq ; ; AVX512VL-FAST-LABEL: shuffle_v8i16_0213cedf: ; AVX512VL-FAST: # %bb.0: -; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,12,13,10,11,14,15,8,9,10,11,12,13,14,15] +; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,12,13,10,11,14,15,8,9,12,13,10,11,14,15] ; AVX512VL-FAST-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7] ; AVX512VL-FAST-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX512VL-FAST-NEXT: retq @@ -2111,79 +2122,115 @@ define <8 x i16> @shuffle_v8i16_0z1z2z3z(<8 x i16> %a) { } define <8 x i16> @shuffle_v8i16_01100110(<8 x i16> %a) { -; SSE2-LABEL: shuffle_v8i16_01100110: -; SSE2: # %bb.0: -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3] -; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7] -; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,5,4] -; SSE2-NEXT: retq +; SSE-LABEL: shuffle_v8i16_01100110: +; SSE: # %bb.0: +; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7] +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] +; SSE-NEXT: retq ; -; SSSE3-LABEL: shuffle_v8i16_01100110: -; SSSE3: # %bb.0: -; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,2,3,0,1,0,1,2,3,2,3,0,1] -; SSSE3-NEXT: retq +; AVX1-LABEL: shuffle_v8i16_01100110: +; AVX1: # %bb.0: +; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7] +; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] +; AVX1-NEXT: retq ; -; SSE41-LABEL: shuffle_v8i16_01100110: -; SSE41: # %bb.0: -; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,2,3,0,1,0,1,2,3,2,3,0,1] -; SSE41-NEXT: retq +; AVX2-SLOW-LABEL: shuffle_v8i16_01100110: +; AVX2-SLOW: # %bb.0: +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7] +; AVX2-SLOW-NEXT: vpbroadcastq %xmm0, %xmm0 +; AVX2-SLOW-NEXT: retq ; -; AVX-LABEL: shuffle_v8i16_01100110: -; AVX: # %bb.0: -; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,2,3,0,1,0,1,2,3,2,3,0,1] -; AVX-NEXT: retq +; AVX2-FAST-LABEL: shuffle_v8i16_01100110: +; AVX2-FAST: # %bb.0: +; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,2,3,0,1,0,1,2,3,2,3,0,1] +; AVX2-FAST-NEXT: retq +; +; AVX512VL-SLOW-LABEL: shuffle_v8i16_01100110: +; AVX512VL-SLOW: # %bb.0: +; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7] +; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] +; AVX512VL-SLOW-NEXT: retq +; +; AVX512VL-FAST-LABEL: shuffle_v8i16_01100110: +; AVX512VL-FAST: # %bb.0: +; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,2,3,0,1,0,1,2,3,2,3,0,1] +; AVX512VL-FAST-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 1, i32 0, i32 0, i32 1, i32 1, i32 0> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_01u0u110(<8 x i16> %a) { -; SSE2-LABEL: shuffle_v8i16_01u0u110: -; SSE2: # %bb.0: -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3] -; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,0,4,5,6,7] -; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,5,4] -; SSE2-NEXT: retq +; SSE-LABEL: shuffle_v8i16_01u0u110: +; SSE: # %bb.0: +; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7] +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] +; SSE-NEXT: retq ; -; SSSE3-LABEL: shuffle_v8i16_01u0u110: -; SSSE3: # %bb.0: -; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,0,1,0,1,2,3,2,3,0,1] -; SSSE3-NEXT: retq +; AVX1-LABEL: shuffle_v8i16_01u0u110: +; AVX1: # %bb.0: +; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7] +; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] +; AVX1-NEXT: retq ; -; SSE41-LABEL: shuffle_v8i16_01u0u110: -; SSE41: # %bb.0: -; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,0,1,0,1,2,3,2,3,0,1] -; SSE41-NEXT: retq +; AVX2-SLOW-LABEL: shuffle_v8i16_01u0u110: +; AVX2-SLOW: # %bb.0: +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7] +; AVX2-SLOW-NEXT: vpbroadcastq %xmm0, %xmm0 +; AVX2-SLOW-NEXT: retq ; -; AVX-LABEL: shuffle_v8i16_01u0u110: -; AVX: # %bb.0: -; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,0,1,0,1,2,3,2,3,0,1] -; AVX-NEXT: retq +; AVX2-FAST-LABEL: shuffle_v8i16_01u0u110: +; AVX2-FAST: # %bb.0: +; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,2,3,0,1,0,1,2,3,2,3,0,1] +; AVX2-FAST-NEXT: retq +; +; AVX512VL-SLOW-LABEL: shuffle_v8i16_01u0u110: +; AVX512VL-SLOW: # %bb.0: +; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7] +; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] +; AVX512VL-SLOW-NEXT: retq +; +; AVX512VL-FAST-LABEL: shuffle_v8i16_01u0u110: +; AVX512VL-FAST: # %bb.0: +; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,2,3,0,1,0,1,2,3,2,3,0,1] +; AVX512VL-FAST-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 undef, i32 0, i32 undef, i32 1, i32 1, i32 0> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_467uu675(<8 x i16> %a) { -; SSE2-LABEL: shuffle_v8i16_467uu675: -; SSE2: # %bb.0: -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,3,3,4,5,6,7] -; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,7,5] -; SSE2-NEXT: retq +; SSE-LABEL: shuffle_v8i16_467uu675: +; SSE: # %bb.0: +; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,7,5] +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; SSE-NEXT: retq ; -; SSSE3-LABEL: shuffle_v8i16_467uu675: -; SSSE3: # %bb.0: -; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,12,13,14,15,14,15,8,9,12,13,14,15,10,11] -; SSSE3-NEXT: retq +; AVX1-LABEL: shuffle_v8i16_467uu675: +; AVX1: # %bb.0: +; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,7,5] +; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; AVX1-NEXT: retq ; -; SSE41-LABEL: shuffle_v8i16_467uu675: -; SSE41: # %bb.0: -; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,12,13,14,15,14,15,8,9,12,13,14,15,10,11] -; SSE41-NEXT: retq +; AVX2-SLOW-LABEL: shuffle_v8i16_467uu675: +; AVX2-SLOW: # %bb.0: +; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,7,5] +; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; AVX2-SLOW-NEXT: retq ; -; AVX-LABEL: shuffle_v8i16_467uu675: -; AVX: # %bb.0: -; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,12,13,14,15,14,15,8,9,12,13,14,15,10,11] -; AVX-NEXT: retq +; AVX2-FAST-LABEL: shuffle_v8i16_467uu675: +; AVX2-FAST: # %bb.0: +; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,12,13,14,15,10,11,8,9,12,13,14,15,10,11] +; AVX2-FAST-NEXT: retq +; +; AVX512VL-SLOW-LABEL: shuffle_v8i16_467uu675: +; AVX512VL-SLOW: # %bb.0: +; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,7,5] +; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; AVX512VL-SLOW-NEXT: retq +; +; AVX512VL-FAST-LABEL: shuffle_v8i16_467uu675: +; AVX512VL-FAST: # %bb.0: +; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,12,13,14,15,10,11,8,9,12,13,14,15,10,11] +; AVX512VL-FAST-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 4, i32 6, i32 7, i32 undef, i32 undef, i32 6, i32 7, i32 5> ret <8 x i16> %shuffle } @@ -2471,15 +2518,15 @@ define <8 x i16> @insert_dup_mem_v8i16_i32(i32* %ptr) { ; SSE-LABEL: insert_dup_mem_v8i16_i32: ; SSE: # %bb.0: ; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] -; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] +; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; SSE-NEXT: retq ; ; AVX1-LABEL: insert_dup_mem_v8i16_i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] -; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] +; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; AVX1-NEXT: retq ; ; AVX2OR512VL-LABEL: insert_dup_mem_v8i16_i32: @@ -2498,8 +2545,8 @@ define <8 x i16> @insert_dup_mem_v8i16_sext_i16(i16* %ptr) { ; SSE2: # %bb.0: ; SSE2-NEXT: movswl (%rdi), %eax ; SSE2-NEXT: movd %eax, %xmm0 -; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; SSE2-NEXT: retq ; ; SSSE3-LABEL: insert_dup_mem_v8i16_sext_i16: @@ -2547,15 +2594,15 @@ define <8 x i16> @insert_dup_elt1_mem_v8i16_i32(i32* %ptr) { ; SSE-LABEL: insert_dup_elt1_mem_v8i16_i32: ; SSE: # %bb.0: ; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7] -; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] +; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,2,3,4,5,6,7] +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; SSE-NEXT: retq ; ; AVX1-LABEL: insert_dup_elt1_mem_v8i16_i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7] -; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,2,3,4,5,6,7] +; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; AVX1-NEXT: retq ; ; AVX2OR512VL-LABEL: insert_dup_elt1_mem_v8i16_i32: @@ -2574,8 +2621,8 @@ define <8 x i16> @insert_dup_elt3_mem_v8i16_i32(i32* %ptr) { ; SSE2: # %bb.0: ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1] -; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7] -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,3,2,3,4,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; SSE2-NEXT: retq ; ; SSSE3-LABEL: insert_dup_elt3_mem_v8i16_i32: @@ -2612,8 +2659,8 @@ define <8 x i16> @insert_dup_elt1_mem_v8i16_sext_i16(i16* %ptr) { ; SSE2: # %bb.0: ; SSE2-NEXT: movswl (%rdi), %eax ; SSE2-NEXT: movd %eax, %xmm0 -; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7] -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,2,3,4,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; SSE2-NEXT: retq ; ; SSSE3-LABEL: insert_dup_elt1_mem_v8i16_sext_i16: @@ -2665,8 +2712,8 @@ define <8 x i16> @insert_dup_elt3_mem_v8i16_sext_i16(i16* %ptr) { ; SSE2-NEXT: movswl (%rdi), %eax ; SSE2-NEXT: movd %eax, %xmm0 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1] -; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7] -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,3,2,3,4,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; SSE2-NEXT: retq ; ; SSSE3-LABEL: insert_dup_elt3_mem_v8i16_sext_i16: |

