summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll')
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll227
1 files changed, 137 insertions, 90 deletions
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll
index 072d71fae57..fc22040578b 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll
@@ -63,14 +63,14 @@ define <8 x i16> @shuffle_v8i16_456789AB(<8 x i16> %a, <8 x i16> %b) {
define <8 x i16> @shuffle_v8i16_00000000(<8 x i16> %a, <8 x i16> %b) {
; SSE-LABEL: shuffle_v8i16_00000000:
; SSE: # %bb.0:
-; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
-; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; SSE-NEXT: retq
;
; AVX1-LABEL: shuffle_v8i16_00000000:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
-; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX1-NEXT: retq
;
; AVX2OR512VL-LABEL: shuffle_v8i16_00000000:
@@ -1123,33 +1123,44 @@ define <8 x i16> @shuffle_v8i16_c4d5e6f7(<8 x i16> %a, <8 x i16> %b) {
}
define <8 x i16> @shuffle_v8i16_0213cedf(<8 x i16> %a, <8 x i16> %b) {
-; SSE-LABEL: shuffle_v8i16_0213cedf:
-; SSE: # %bb.0:
-; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
-; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
-; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,1,3,4,5,6,7]
-; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; SSE-NEXT: retq
+; SSE2-LABEL: shuffle_v8i16_0213cedf:
+; SSE2: # %bb.0:
+; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm0[0,2,1,3,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,6,5,7]
+; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: shuffle_v8i16_0213cedf:
+; SSSE3: # %bb.0:
+; SSSE3-NEXT: pshuflw {{.*#+}} xmm2 = xmm0[0,2,1,3,4,5,6,7]
+; SSSE3-NEXT: pshufhw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,6,5,7]
+; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v8i16_0213cedf:
+; SSE41: # %bb.0:
+; SSE41-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,5,7]
+; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+; SSE41-NEXT: retq
;
; AVX1-LABEL: shuffle_v8i16_0213cedf:
; AVX1: # %bb.0:
+; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,5,7]
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
-; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,1,3,4,5,6,7]
-; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
; AVX1-NEXT: retq
;
; AVX2-SLOW-LABEL: shuffle_v8i16_0213cedf:
; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,5,7]
; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
-; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
-; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,1,3,4,5,6,7]
-; AVX2-SLOW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX2-SLOW-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; AVX2-SLOW-NEXT: retq
;
; AVX2-FAST-LABEL: shuffle_v8i16_0213cedf:
; AVX2-FAST: # %bb.0:
-; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,12,13,10,11,14,15,8,9,10,11,12,13,14,15]
+; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,12,13,10,11,14,15,8,9,12,13,10,11,14,15]
; AVX2-FAST-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
; AVX2-FAST-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX2-FAST-NEXT: retq
@@ -1157,14 +1168,14 @@ define <8 x i16> @shuffle_v8i16_0213cedf(<8 x i16> %a, <8 x i16> %b) {
; AVX512VL-SLOW-LABEL: shuffle_v8i16_0213cedf:
; AVX512VL-SLOW: # %bb.0:
; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
+; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,5,7]
; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
-; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,1,3,4,5,6,7]
; AVX512VL-SLOW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX512VL-SLOW-NEXT: retq
;
; AVX512VL-FAST-LABEL: shuffle_v8i16_0213cedf:
; AVX512VL-FAST: # %bb.0:
-; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,12,13,10,11,14,15,8,9,10,11,12,13,14,15]
+; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,12,13,10,11,14,15,8,9,12,13,10,11,14,15]
; AVX512VL-FAST-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
; AVX512VL-FAST-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX512VL-FAST-NEXT: retq
@@ -2111,79 +2122,115 @@ define <8 x i16> @shuffle_v8i16_0z1z2z3z(<8 x i16> %a) {
}
define <8 x i16> @shuffle_v8i16_01100110(<8 x i16> %a) {
-; SSE2-LABEL: shuffle_v8i16_01100110:
-; SSE2: # %bb.0:
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
-; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7]
-; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,5,4]
-; SSE2-NEXT: retq
+; SSE-LABEL: shuffle_v8i16_01100110:
+; SSE: # %bb.0:
+; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7]
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; SSE-NEXT: retq
;
-; SSSE3-LABEL: shuffle_v8i16_01100110:
-; SSSE3: # %bb.0:
-; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,2,3,0,1,0,1,2,3,2,3,0,1]
-; SSSE3-NEXT: retq
+; AVX1-LABEL: shuffle_v8i16_01100110:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; AVX1-NEXT: retq
;
-; SSE41-LABEL: shuffle_v8i16_01100110:
-; SSE41: # %bb.0:
-; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,2,3,0,1,0,1,2,3,2,3,0,1]
-; SSE41-NEXT: retq
+; AVX2-SLOW-LABEL: shuffle_v8i16_01100110:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7]
+; AVX2-SLOW-NEXT: vpbroadcastq %xmm0, %xmm0
+; AVX2-SLOW-NEXT: retq
;
-; AVX-LABEL: shuffle_v8i16_01100110:
-; AVX: # %bb.0:
-; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,2,3,0,1,0,1,2,3,2,3,0,1]
-; AVX-NEXT: retq
+; AVX2-FAST-LABEL: shuffle_v8i16_01100110:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,2,3,0,1,0,1,2,3,2,3,0,1]
+; AVX2-FAST-NEXT: retq
+;
+; AVX512VL-SLOW-LABEL: shuffle_v8i16_01100110:
+; AVX512VL-SLOW: # %bb.0:
+; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7]
+; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; AVX512VL-SLOW-NEXT: retq
+;
+; AVX512VL-FAST-LABEL: shuffle_v8i16_01100110:
+; AVX512VL-FAST: # %bb.0:
+; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,2,3,0,1,0,1,2,3,2,3,0,1]
+; AVX512VL-FAST-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 1, i32 0, i32 0, i32 1, i32 1, i32 0>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_01u0u110(<8 x i16> %a) {
-; SSE2-LABEL: shuffle_v8i16_01u0u110:
-; SSE2: # %bb.0:
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
-; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,0,4,5,6,7]
-; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,5,4]
-; SSE2-NEXT: retq
+; SSE-LABEL: shuffle_v8i16_01u0u110:
+; SSE: # %bb.0:
+; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7]
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; SSE-NEXT: retq
;
-; SSSE3-LABEL: shuffle_v8i16_01u0u110:
-; SSSE3: # %bb.0:
-; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,0,1,0,1,2,3,2,3,0,1]
-; SSSE3-NEXT: retq
+; AVX1-LABEL: shuffle_v8i16_01u0u110:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; AVX1-NEXT: retq
;
-; SSE41-LABEL: shuffle_v8i16_01u0u110:
-; SSE41: # %bb.0:
-; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,0,1,0,1,2,3,2,3,0,1]
-; SSE41-NEXT: retq
+; AVX2-SLOW-LABEL: shuffle_v8i16_01u0u110:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7]
+; AVX2-SLOW-NEXT: vpbroadcastq %xmm0, %xmm0
+; AVX2-SLOW-NEXT: retq
;
-; AVX-LABEL: shuffle_v8i16_01u0u110:
-; AVX: # %bb.0:
-; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,0,1,0,1,2,3,2,3,0,1]
-; AVX-NEXT: retq
+; AVX2-FAST-LABEL: shuffle_v8i16_01u0u110:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,2,3,0,1,0,1,2,3,2,3,0,1]
+; AVX2-FAST-NEXT: retq
+;
+; AVX512VL-SLOW-LABEL: shuffle_v8i16_01u0u110:
+; AVX512VL-SLOW: # %bb.0:
+; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7]
+; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; AVX512VL-SLOW-NEXT: retq
+;
+; AVX512VL-FAST-LABEL: shuffle_v8i16_01u0u110:
+; AVX512VL-FAST: # %bb.0:
+; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,2,3,0,1,0,1,2,3,2,3,0,1]
+; AVX512VL-FAST-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 undef, i32 0, i32 undef, i32 1, i32 1, i32 0>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_467uu675(<8 x i16> %a) {
-; SSE2-LABEL: shuffle_v8i16_467uu675:
-; SSE2: # %bb.0:
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
-; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,3,3,4,5,6,7]
-; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,7,5]
-; SSE2-NEXT: retq
+; SSE-LABEL: shuffle_v8i16_467uu675:
+; SSE: # %bb.0:
+; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,7,5]
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
+; SSE-NEXT: retq
;
-; SSSE3-LABEL: shuffle_v8i16_467uu675:
-; SSSE3: # %bb.0:
-; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,12,13,14,15,14,15,8,9,12,13,14,15,10,11]
-; SSSE3-NEXT: retq
+; AVX1-LABEL: shuffle_v8i16_467uu675:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,7,5]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
+; AVX1-NEXT: retq
;
-; SSE41-LABEL: shuffle_v8i16_467uu675:
-; SSE41: # %bb.0:
-; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,12,13,14,15,14,15,8,9,12,13,14,15,10,11]
-; SSE41-NEXT: retq
+; AVX2-SLOW-LABEL: shuffle_v8i16_467uu675:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,7,5]
+; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
+; AVX2-SLOW-NEXT: retq
;
-; AVX-LABEL: shuffle_v8i16_467uu675:
-; AVX: # %bb.0:
-; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,12,13,14,15,14,15,8,9,12,13,14,15,10,11]
-; AVX-NEXT: retq
+; AVX2-FAST-LABEL: shuffle_v8i16_467uu675:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,12,13,14,15,10,11,8,9,12,13,14,15,10,11]
+; AVX2-FAST-NEXT: retq
+;
+; AVX512VL-SLOW-LABEL: shuffle_v8i16_467uu675:
+; AVX512VL-SLOW: # %bb.0:
+; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,7,5]
+; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
+; AVX512VL-SLOW-NEXT: retq
+;
+; AVX512VL-FAST-LABEL: shuffle_v8i16_467uu675:
+; AVX512VL-FAST: # %bb.0:
+; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,12,13,14,15,10,11,8,9,12,13,14,15,10,11]
+; AVX512VL-FAST-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 4, i32 6, i32 7, i32 undef, i32 undef, i32 6, i32 7, i32 5>
ret <8 x i16> %shuffle
}
@@ -2471,15 +2518,15 @@ define <8 x i16> @insert_dup_mem_v8i16_i32(i32* %ptr) {
; SSE-LABEL: insert_dup_mem_v8i16_i32:
; SSE: # %bb.0:
; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
-; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; SSE-NEXT: retq
;
; AVX1-LABEL: insert_dup_mem_v8i16_i32:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
-; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX1-NEXT: retq
;
; AVX2OR512VL-LABEL: insert_dup_mem_v8i16_i32:
@@ -2498,8 +2545,8 @@ define <8 x i16> @insert_dup_mem_v8i16_sext_i16(i16* %ptr) {
; SSE2: # %bb.0:
; SSE2-NEXT: movswl (%rdi), %eax
; SSE2-NEXT: movd %eax, %xmm0
-; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: insert_dup_mem_v8i16_sext_i16:
@@ -2547,15 +2594,15 @@ define <8 x i16> @insert_dup_elt1_mem_v8i16_i32(i32* %ptr) {
; SSE-LABEL: insert_dup_elt1_mem_v8i16_i32:
; SSE: # %bb.0:
; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
-; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,2,3,4,5,6,7]
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; SSE-NEXT: retq
;
; AVX1-LABEL: insert_dup_elt1_mem_v8i16_i32:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
-; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,2,3,4,5,6,7]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX1-NEXT: retq
;
; AVX2OR512VL-LABEL: insert_dup_elt1_mem_v8i16_i32:
@@ -2574,8 +2621,8 @@ define <8 x i16> @insert_dup_elt3_mem_v8i16_i32(i32* %ptr) {
; SSE2: # %bb.0:
; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1]
-; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,3,2,3,4,5,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: insert_dup_elt3_mem_v8i16_i32:
@@ -2612,8 +2659,8 @@ define <8 x i16> @insert_dup_elt1_mem_v8i16_sext_i16(i16* %ptr) {
; SSE2: # %bb.0:
; SSE2-NEXT: movswl (%rdi), %eax
; SSE2-NEXT: movd %eax, %xmm0
-; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,2,3,4,5,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
@@ -2665,8 +2712,8 @@ define <8 x i16> @insert_dup_elt3_mem_v8i16_sext_i16(i16* %ptr) {
; SSE2-NEXT: movswl (%rdi), %eax
; SSE2-NEXT: movd %eax, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1]
-; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,3,2,3,4,5,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
OpenPOWER on IntegriCloud