summaryrefslogtreecommitdiffstats
path: root/llvm/test
diff options
context:
space:
mode:
authorChandler Carruth <chandlerc@gmail.com>2014-10-01 00:41:21 +0000
committerChandler Carruth <chandlerc@gmail.com>2014-10-01 00:41:21 +0000
commitb9d3fa1e651bb965dcbd16d71116f847867ef153 (patch)
tree18cc91babe1ed3179f1a0319e2475e14986e7a13 /llvm/test
parent46d2e0e53d2683e975ab787f82be85c611925ac2 (diff)
downloadbcm5719-llvm-b9d3fa1e651bb965dcbd16d71116f847867ef153.tar.gz
bcm5719-llvm-b9d3fa1e651bb965dcbd16d71116f847867ef153.zip
[x86] Teach the new vector shuffle lowering about VBROADCAST and
VPBROADCAST. This has the somewhat expected pervasive impact. I don't know why I forgot about this. Everything seems good with lots of significant improvements in the tests. llvm-svn: 218724
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll46
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll26
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll82
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll41
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll196
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll54
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll16
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll112
8 files changed, 310 insertions, 263 deletions
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll
index 306ef8a220f..d7373602431 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll
@@ -36,11 +36,16 @@ define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(
; SSE41-NEXT: pshufb %xmm1, %xmm0
; SSE41-NEXT: retq
;
-; AVX-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
-; AVX: # BB#0:
-; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vpshufb %xmm1, %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX1-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0
+; AVX2-NEXT: retq
%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <16 x i8> %shuffle
}
@@ -200,10 +205,15 @@ define <16 x i8> @shuffle_v16i8_0101010101010101(<16 x i8> %a, <16 x i8> %b) {
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; SSE41-NEXT: retq
;
-; AVX-LABEL: shuffle_v16i8_0101010101010101:
-; AVX: # BB#0:
-; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
-; AVX-NEXT: retq
+; AVX1-LABEL: shuffle_v16i8_0101010101010101:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i8_0101010101010101:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
+; AVX2-NEXT: retq
%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
ret <16 x i8> %shuffle
}
@@ -231,12 +241,18 @@ define <16 x i8> @shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07(
; SSE-NEXT: movdqa %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07:
-; AVX: # BB#0:
-; AVX-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; AVX-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
-; AVX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; AVX-NEXT: retq
+; AVX1-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
+; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpbroadcastb %xmm1, %xmm1
+; AVX2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; AVX2-NEXT: retq
%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7>
ret <16 x i8> %shuffle
}
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll
index 6f891c12505..67723dd07b2 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll
@@ -14,10 +14,15 @@ define <2 x i64> @shuffle_v2i64_00(<2 x i64> %a, <2 x i64> %b) {
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; SSE-NEXT: retq
;
-; AVX-LABEL: shuffle_v2i64_00:
-; AVX: # BB#0:
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
-; AVX-NEXT: retq
+; AVX1-LABEL: shuffle_v2i64_00:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v2i64_00:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
+; AVX2-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 0>
ret <2 x i64> %shuffle
}
@@ -53,10 +58,15 @@ define <2 x i64> @shuffle_v2i64_22(<2 x i64> %a, <2 x i64> %b) {
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,0,1]
; SSE-NEXT: retq
;
-; AVX-LABEL: shuffle_v2i64_22:
-; AVX: # BB#0:
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,1,0,1]
-; AVX-NEXT: retq
+; AVX1-LABEL: shuffle_v2i64_22:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,1,0,1]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v2i64_22:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpbroadcastq %xmm1, %xmm0
+; AVX2-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 2>
ret <2 x i64> %shuffle
}
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll
index d0c4a3f9d36..7d6e2bb1124 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll
@@ -77,10 +77,15 @@ define <8 x i16> @shuffle_v8i16_00000000(<8 x i16> %a, <8 x i16> %b) {
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; SSE41-NEXT: retq
;
-; AVX-LABEL: shuffle_v8i16_00000000:
-; AVX: # BB#0:
-; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
-; AVX-NEXT: retq
+; AVX1-LABEL: shuffle_v8i16_00000000:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v8i16_00000000:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
+; AVX2-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <8 x i16> %shuffle
}
@@ -1169,12 +1174,19 @@ define <8 x i16> @shuffle_v8i16_XXXXcde3(<8 x i16> %a, <8 x i16> %b) {
; SSE41-NEXT: movdqa %xmm1, %xmm0
; SSE41-NEXT: retq
;
-; AVX-LABEL: shuffle_v8i16_XXXXcde3:
-; AVX: # BB#0:
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
-; AVX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,4,5,6,7,0,1,4,5,8,9,14,15]
-; AVX-NEXT: retq
+; AVX1-LABEL: shuffle_v8i16_XXXXcde3:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,4,5,6,7,0,1,4,5,8,9,14,15]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v8i16_XXXXcde3:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
+; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,4,5,6,7,0,1,4,5,8,9,14,15]
+; AVX2-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 3>
ret <8 x i16> %shuffle
}
@@ -1205,12 +1217,19 @@ define <8 x i16> @shuffle_v8i16_cde3XXXX(<8 x i16> %a, <8 x i16> %b) {
; SSE41-NEXT: movdqa %xmm1, %xmm0
; SSE41-NEXT: retq
;
-; AVX-LABEL: shuffle_v8i16_cde3XXXX:
-; AVX: # BB#0:
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
-; AVX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,14,15,8,9,14,15,12,13,14,15]
-; AVX-NEXT: retq
+; AVX1-LABEL: shuffle_v8i16_cde3XXXX:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,14,15,8,9,14,15,12,13,14,15]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v8i16_cde3XXXX:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
+; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,14,15,8,9,14,15,12,13,14,15]
+; AVX2-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 12, i32 13, i32 14, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
ret <8 x i16> %shuffle
}
@@ -1254,16 +1273,27 @@ define <8 x i16> @shuffle_v8i16_012dcde3(<8 x i16> %a, <8 x i16> %b) {
; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE41-NEXT: retq
;
-; AVX-LABEL: shuffle_v8i16_012dcde3:
-; AVX: # BB#0:
-; AVX-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[0,1,0,1]
-; AVX-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
-; AVX-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[0,1,4,5,8,9,14,15,8,9,14,15,12,13,14,15]
-; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,6,7,8,9,0,1,0,1,2,3]
-; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
-; AVX-NEXT: retq
+; AVX1-LABEL: shuffle_v8i16_012dcde3:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[0,1,0,1]
+; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; AVX1-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[0,1,4,5,8,9,14,15,8,9,14,15,12,13,14,15]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,6,7,8,9,0,1,0,1,2,3]
+; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v8i16_012dcde3:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpbroadcastq %xmm0, %xmm2
+; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; AVX2-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[0,1,4,5,8,9,14,15,8,9,14,15,12,13,14,15]
+; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,6,7,8,9,0,1,0,1,2,3]
+; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX2-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 13, i32 12, i32 13, i32 14, i32 3>
ret <8 x i16> %shuffle
}
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll
index 0cb73521503..cc449899110 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll
@@ -12,8 +12,7 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_0
;
; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
; AVX2: # BB#0:
-; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
-; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
; AVX2-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <16 x i16> %shuffle
@@ -31,10 +30,8 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_0
;
; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
; AVX2: # BB#0:
-; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
-; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
-; AVX2-NEXT: vpshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,4,4,4,4]
-; AVX2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,5,4]
+; AVX2-NEXT: vpbroadcastw %xmm0, %xmm1
+; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1]
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
; AVX2-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
@@ -51,7 +48,7 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_0
;
; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
; AVX2: # BB#0:
-; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX2-NEXT: vpbroadcastw %xmm0, %xmm1
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1]
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
; AVX2-NEXT: retq
@@ -69,7 +66,7 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_0
;
; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00:
; AVX2: # BB#0:
-; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX2-NEXT: vpbroadcastw %xmm0, %xmm1
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1]
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
; AVX2-NEXT: retq
@@ -87,7 +84,7 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_0
;
; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00:
; AVX2: # BB#0:
-; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX2-NEXT: vpbroadcastw %xmm0, %xmm1
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
; AVX2-NEXT: retq
@@ -105,7 +102,7 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_0
;
; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00:
; AVX2: # BB#0:
-; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX2-NEXT: vpbroadcastw %xmm0, %xmm1
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
; AVX2-NEXT: retq
@@ -123,7 +120,7 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_0
;
; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
; AVX2: # BB#0:
-; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX2-NEXT: vpbroadcastw %xmm0, %xmm1
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
; AVX2-NEXT: retq
@@ -141,7 +138,7 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_0
;
; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
; AVX2: # BB#0:
-; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX2-NEXT: vpbroadcastw %xmm0, %xmm1
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
; AVX2-NEXT: retq
@@ -165,7 +162,7 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_0
; AVX2: # BB#0:
; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,u,u,u,u,u,u,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
-; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
@@ -189,7 +186,7 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_0
; AVX2: # BB#0:
; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,u,u,u,u,2,3,u,u,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
-; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,u,u,0,1,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,0,0,0,0,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
@@ -214,7 +211,7 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_0
; AVX2: # BB#0:
; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,u,u,4,5,u,u,u,u,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
-; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,u,u,0,1,0,1,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,0,0,255,255,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
@@ -239,7 +236,7 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_0
; AVX2: # BB#0:
; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,6,7,u,u,u,u,u,u,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
-; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,u,u,0,1,0,1,0,1,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,255,255,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
@@ -264,7 +261,7 @@ define <16 x i16> @shuffle_v16i16_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_0
; AVX2: # BB#0:
; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,8,9,u,u,u,u,u,u,u,u,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
-; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,u,u,0,1,0,1,0,1,0,1,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,255,255,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
@@ -289,7 +286,7 @@ define <16 x i16> @shuffle_v16i16_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_0
; AVX2: # BB#0:
; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,10,11,u,u,u,u,u,u,u,u,u,u,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
-; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,u,u,0,1,0,1,0,1,0,1,0,1,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,0,255,255,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
@@ -315,7 +312,7 @@ define <16 x i16> @shuffle_v16i16_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_0
; AVX2: # BB#0:
; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,12,13,u,u,u,u,u,u,u,u,u,u,u,u,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
-; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,u,u,0,1,0,1,0,1,0,1,0,1,0,1,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
@@ -341,7 +338,7 @@ define <16 x i16> @shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_0
; AVX2: # BB#0:
; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[14,15,u,u,u,u,u,u,u,u,u,u,u,u,u,u,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
-; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,0,1,0,1,0,1,0,1,0,1,0,1,0,1,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
@@ -735,8 +732,8 @@ define <16 x i16> @shuffle_v16i16_00_16_00_16_00_16_00_16_00_16_00_16_00_16_00_1
;
; AVX2-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_00_16_00_16_00_16_00_16:
; AVX2: # BB#0:
-; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,4,5,0,1,0,1,0,1,12,13,0,1]
-; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; AVX2-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX2-NEXT: vpbroadcastd %xmm0, %xmm0
; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
; AVX2-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll
index fab36b2b273..687928f0a7c 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll
@@ -13,9 +13,7 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_
;
; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
; AVX2: # BB#0:
-; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
; AVX2-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <32 x i8> %shuffle
@@ -32,8 +30,7 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_
;
; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
; AVX2: # BB#0:
-; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm1
+; AVX2-NEXT: vpbroadcastb %xmm0, %xmm1
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
; AVX2-NEXT: retq
@@ -52,8 +49,7 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_
;
; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
; AVX2: # BB#0:
-; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm1
+; AVX2-NEXT: vpbroadcastb %xmm0, %xmm1
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
; AVX2-NEXT: retq
@@ -72,8 +68,7 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_
;
; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00:
; AVX2: # BB#0:
-; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm1
+; AVX2-NEXT: vpbroadcastb %xmm0, %xmm1
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0]
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
; AVX2-NEXT: retq
@@ -92,8 +87,7 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_
;
; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00:
; AVX2: # BB#0:
-; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm1
+; AVX2-NEXT: vpbroadcastb %xmm0, %xmm1
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0]
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
; AVX2-NEXT: retq
@@ -112,8 +106,7 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_
;
; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00:
; AVX2: # BB#0:
-; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm1
+; AVX2-NEXT: vpbroadcastb %xmm0, %xmm1
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0]
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
; AVX2-NEXT: retq
@@ -132,8 +125,7 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_
;
; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
; AVX2: # BB#0:
-; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm1
+; AVX2-NEXT: vpbroadcastb %xmm0, %xmm1
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0]
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
; AVX2-NEXT: retq
@@ -152,8 +144,7 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_
;
; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
; AVX2: # BB#0:
-; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm1
+; AVX2-NEXT: vpbroadcastb %xmm0, %xmm1
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
; AVX2-NEXT: retq
@@ -172,8 +163,7 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_
;
; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
; AVX2: # BB#0:
-; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm1
+; AVX2-NEXT: vpbroadcastb %xmm0, %xmm1
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0]
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
; AVX2-NEXT: retq
@@ -192,8 +182,7 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_
;
; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00:
; AVX2: # BB#0:
-; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm1
+; AVX2-NEXT: vpbroadcastb %xmm0, %xmm1
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0]
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
; AVX2-NEXT: retq
@@ -212,8 +201,7 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_
;
; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00:
; AVX2: # BB#0:
-; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm1
+; AVX2-NEXT: vpbroadcastb %xmm0, %xmm1
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0]
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
; AVX2-NEXT: retq
@@ -232,8 +220,7 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_
;
; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00:
; AVX2: # BB#0:
-; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm1
+; AVX2-NEXT: vpbroadcastb %xmm0, %xmm1
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0]
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
; AVX2-NEXT: retq
@@ -252,8 +239,7 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_
;
; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00:
; AVX2: # BB#0:
-; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm1
+; AVX2-NEXT: vpbroadcastb %xmm0, %xmm1
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0]
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
; AVX2-NEXT: retq
@@ -272,8 +258,7 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_
;
; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00:
; AVX2: # BB#0:
-; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm1
+; AVX2-NEXT: vpbroadcastb %xmm0, %xmm1
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,13,0,0,0,0,0,0,0,0,0,0,0,0,0]
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
; AVX2-NEXT: retq
@@ -292,8 +277,7 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_
;
; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
; AVX2: # BB#0:
-; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm1
+; AVX2-NEXT: vpbroadcastb %xmm0, %xmm1
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
; AVX2-NEXT: retq
@@ -317,8 +301,7 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_
; AVX2-NEXT: movl $15, %eax
; AVX2-NEXT: vmovd %eax, %xmm1
; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm1
-; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX2-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
@@ -339,12 +322,12 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_
;
; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
; AVX2: # BB#0:
-; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
-; AVX2-NEXT: vpshufb %ymm1, %ymm0, %ymm2
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
-; AVX2-NEXT: vpshufb %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX2-NEXT: vpblendvb %ymm1, %ymm0, %ymm2, %ymm0
+; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT: vpxor %ymm2, %ymm2, %ymm2
+; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1
+; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <32 x i8> %shuffle
@@ -364,12 +347,11 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_17_00_
;
; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_17_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
; AVX2: # BB#0:
-; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
-; AVX2-NEXT: vpshufb %ymm1, %ymm0, %ymm1
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
-; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,u,u,u,u,u,u,1,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,u,u,u,u,u,u,1,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 17, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <32 x i8> %shuffle
@@ -389,12 +371,11 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_18_00_00_
;
; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_18_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
; AVX2: # BB#0:
-; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
-; AVX2-NEXT: vpshufb %ymm1, %ymm0, %ymm1
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
-; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,u,u,u,u,u,2,u,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,u,u,u,u,u,2,u,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,0,0,0,0,0,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 18, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <32 x i8> %shuffle
@@ -414,12 +395,11 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_19_00_00_00_
;
; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_19_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
; AVX2: # BB#0:
-; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
-; AVX2-NEXT: vpshufb %ymm1, %ymm0, %ymm1
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
-; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,u,u,u,u,3,u,u,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,u,u,u,u,3,u,u,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,0,0,0,0,255,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 19, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <32 x i8> %shuffle
@@ -439,12 +419,11 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_20_00_00_00_00_
;
; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_20_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
; AVX2: # BB#0:
-; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
-; AVX2-NEXT: vpshufb %ymm1, %ymm0, %ymm1
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
-; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,u,u,u,4,u,u,u,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,u,u,u,4,u,u,u,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,0,0,0,255,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 20, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <32 x i8> %shuffle
@@ -464,12 +443,11 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_21_00_00_00_00_00_
;
; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_21_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
; AVX2: # BB#0:
-; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
-; AVX2-NEXT: vpshufb %ymm1, %ymm0, %ymm1
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
-; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,u,u,5,u,u,u,u,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,u,u,5,u,u,u,u,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,0,0,255,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 21, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <32 x i8> %shuffle
@@ -489,12 +467,11 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_22_00_00_00_00_00_00_
;
; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_22_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
; AVX2: # BB#0:
-; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
-; AVX2-NEXT: vpshufb %ymm1, %ymm0, %ymm1
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
-; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,u,6,u,u,u,u,u,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,u,6,u,u,u,u,u,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,0,255,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 22, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <32 x i8> %shuffle
@@ -514,12 +491,11 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_23_00_00_00_00_00_00_00_
;
; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_23_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
; AVX2: # BB#0:
-; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
-; AVX2-NEXT: vpshufb %ymm1, %ymm0, %ymm1
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
-; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,7,u,u,u,u,u,u,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,7,u,u,u,u,u,u,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 23, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <32 x i8> %shuffle
@@ -539,12 +515,11 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_24_00_00_00_00_00_00_00_00_
;
; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_24_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
; AVX2: # BB#0:
-; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
-; AVX2-NEXT: vpshufb %ymm1, %ymm0, %ymm1
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
-; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,8,u,u,u,u,u,u,u,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,8,u,u,u,u,u,u,u,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,255,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 24, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <32 x i8> %shuffle
@@ -564,12 +539,11 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_25_00_00_00_00_00_00_00_00_00_
;
; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_25_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
; AVX2: # BB#0:
-; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
-; AVX2-NEXT: vpshufb %ymm1, %ymm0, %ymm1
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
-; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,9,u,u,u,u,u,u,u,u,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,9,u,u,u,u,u,u,u,u,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,255,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 25, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <32 x i8> %shuffle
@@ -589,12 +563,11 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_26_00_00_00_00_00_00_00_00_00_00_
;
; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_26_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
; AVX2: # BB#0:
-; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
-; AVX2-NEXT: vpshufb %ymm1, %ymm0, %ymm1
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
-; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,10,u,u,u,u,u,u,u,u,u,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,10,u,u,u,u,u,u,u,u,u,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,255,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 26, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <32 x i8> %shuffle
@@ -614,12 +587,11 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_27_00_00_00_00_00_00_00_00_00_00_00_
;
; AVX2-LABEL: shuffle_v32i8_00_00_00_00_27_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
; AVX2: # BB#0:
-; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
-; AVX2-NEXT: vpshufb %ymm1, %ymm0, %ymm1
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
-; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,11,u,u,u,u,u,u,u,u,u,u,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,11,u,u,u,u,u,u,u,u,u,u,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,0,255,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 27, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <32 x i8> %shuffle
@@ -639,12 +611,11 @@ define <32 x i8> @shuffle_v32i8_00_00_00_28_00_00_00_00_00_00_00_00_00_00_00_00_
;
; AVX2-LABEL: shuffle_v32i8_00_00_00_28_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
; AVX2: # BB#0:
-; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
-; AVX2-NEXT: vpshufb %ymm1, %ymm0, %ymm1
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
-; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,12,u,u,u,u,u,u,u,u,u,u,u,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,12,u,u,u,u,u,u,u,u,u,u,u,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,255,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 28, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <32 x i8> %shuffle
@@ -664,12 +635,11 @@ define <32 x i8> @shuffle_v32i8_00_00_29_00_00_00_00_00_00_00_00_00_00_00_00_00_
;
; AVX2-LABEL: shuffle_v32i8_00_00_29_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
; AVX2: # BB#0:
-; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
-; AVX2-NEXT: vpshufb %ymm1, %ymm0, %ymm1
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
-; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,13,u,u,u,u,u,u,u,u,u,u,u,u,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,13,u,u,u,u,u,u,u,u,u,u,u,u,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,255,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 29, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <32 x i8> %shuffle
@@ -689,12 +659,11 @@ define <32 x i8> @shuffle_v32i8_00_30_00_00_00_00_00_00_00_00_00_00_00_00_00_00_
;
; AVX2-LABEL: shuffle_v32i8_00_30_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
; AVX2: # BB#0:
-; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
-; AVX2-NEXT: vpshufb %ymm1, %ymm0, %ymm1
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
-; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,14,u,u,u,u,u,u,u,u,u,u,u,u,u,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,14,u,u,u,u,u,u,u,u,u,u,u,u,u,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 30, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <32 x i8> %shuffle
@@ -722,7 +691,7 @@ define <32 x i8> @shuffle_v32i8_31_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_
; AVX2-NEXT: vpxor %ymm3, %ymm3, %ymm3
; AVX2-NEXT: vinserti128 $0, %xmm2, %ymm3, %ymm2
; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1
-; AVX2-NEXT: vpshufb %ymm3, %ymm0, %ymm0
+; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
@@ -1037,9 +1006,10 @@ define <32 x i8> @shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_
;
; AVX2-LABEL: shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32:
; AVX2: # BB#0:
-; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
-; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpbroadcastb %xmm1, %xmm1
+; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u]
+; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
+; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
; AVX2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
; AVX2-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll
index 74662ac82e4..5c5bc21a48f 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll
@@ -12,7 +12,7 @@ define <4 x double> @shuffle_v4f64_0000(<4 x double> %a, <4 x double> %b) {
;
; AVX2-LABEL: shuffle_v4f64_0000:
; AVX2: # BB#0:
-; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,0]
+; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0
; AVX2-NEXT: retq
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
ret <4 x double> %shuffle
@@ -184,11 +184,17 @@ define <4 x double> @shuffle_v4f64_1022(<4 x double> %a, <4 x double> %b) {
}
define <4 x double> @shuffle_v4f64_0423(<4 x double> %a, <4 x double> %b) {
-; ALL-LABEL: shuffle_v4f64_0423:
-; ALL: # BB#0:
-; ALL-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[0,0,2,2]
-; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3]
-; ALL-NEXT: retq
+; AVX1-LABEL: shuffle_v4f64_0423:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[0,0,2,2]
+; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v4f64_0423:
+; AVX2: # BB#0:
+; AVX2-NEXT: vbroadcastsd %xmm1, %ymm1
+; AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3]
+; AVX2-NEXT: retq
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 3>
ret <4 x double> %shuffle
}
@@ -317,7 +323,7 @@ define <4 x i64> @shuffle_v4i64_0000(<4 x i64> %a, <4 x i64> %b) {
;
; AVX2-LABEL: shuffle_v4i64_0000:
; AVX2: # BB#0:
-; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,0]
+; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0
; AVX2-NEXT: retq
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
ret <4 x i64> %shuffle
@@ -460,7 +466,7 @@ define <4 x i64> @shuffle_v4i64_0124(<4 x i64> %a, <4 x i64> %b) {
;
; AVX2-LABEL: shuffle_v4i64_0124:
; AVX2: # BB#0:
-; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,1,2,0]
+; AVX2-NEXT: vpbroadcastq %xmm1, %ymm1
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7]
; AVX2-NEXT: retq
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
@@ -477,8 +483,8 @@ define <4 x i64> @shuffle_v4i64_0142(<4 x i64> %a, <4 x i64> %b) {
;
; AVX2-LABEL: shuffle_v4i64_0142:
; AVX2: # BB#0:
-; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,1,0,3]
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,2,2]
+; AVX2-NEXT: vpbroadcastq %xmm1, %ymm1
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
; AVX2-NEXT: retq
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 2>
@@ -497,8 +503,8 @@ define <4 x i64> @shuffle_v4i64_0412(<4 x i64> %a, <4 x i64> %b) {
;
; AVX2-LABEL: shuffle_v4i64_0412:
; AVX2: # BB#0:
-; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,1,0,1,4,5,4,5]
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,2]
+; AVX2-NEXT: vpbroadcastq %xmm1, %ymm1
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
; AVX2-NEXT: retq
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 2>
@@ -731,3 +737,31 @@ define <4 x double> @insert_mem_and_zero_v4f64(double* %ptr) {
%shuffle = shufflevector <4 x double> %v, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
ret <4 x double> %shuffle
}
+
+define <4 x double> @splat_mem_v4f64(double* %ptr) {
+; ALL-LABEL: splat_mem_v4f64:
+; ALL: # BB#0:
+; ALL-NEXT: vbroadcastsd (%rdi), %ymm0
+; ALL-NEXT: retq
+ %a = load double* %ptr
+ %v = insertelement <4 x double> undef, double %a, i32 0
+ %shuffle = shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ ret <4 x double> %shuffle
+}
+
+define <4 x i64> @splat_mem_v4i64(i64* %ptr) {
+; AVX1-LABEL: splat_mem_v4i64:
+; AVX1: # BB#0:
+; AVX1-NEXT: vmovddup (%rdi), %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: splat_mem_v4i64:
+; AVX2: # BB#0:
+; AVX2-NEXT: vbroadcastsd (%rdi), %ymm0
+; AVX2-NEXT: retq
+ %a = load i64* %ptr
+ %v = insertelement <4 x i64> undef, i64 %a, i64 0
+ %shuffle = shufflevector <4 x i64> %v, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ ret <4 x i64> %shuffle
+}
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll
index b9a2de0298b..f11554cc2fc 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll
@@ -12,8 +12,7 @@ define <8 x float> @shuffle_v8f32_00000000(<8 x float> %a, <8 x float> %b) {
;
; AVX2-LABEL: shuffle_v8f32_00000000:
; AVX2: # BB#0:
-; AVX2-NEXT: vxorps %ymm1, %ymm1, %ymm1
-; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vbroadcastss %xmm0, %ymm0
; AVX2-NEXT: retq
%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <8 x float> %shuffle
@@ -210,9 +209,8 @@ define <8 x float> @shuffle_v8f32_08080808(<8 x float> %a, <8 x float> %b) {
;
; AVX2-LABEL: shuffle_v8f32_08080808:
; AVX2: # BB#0:
-; AVX2-NEXT: vxorps %ymm2, %ymm2, %ymm2
-; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1
-; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0
+; AVX2-NEXT: vbroadcastss %xmm1, %ymm1
+; AVX2-NEXT: vbroadcastss %xmm0, %ymm0
; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
; AVX2-NEXT: retq
%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8>
@@ -702,8 +700,7 @@ define <8 x i32> @shuffle_v8i32_00000000(<8 x i32> %a, <8 x i32> %b) {
;
; AVX2-LABEL: shuffle_v8i32_00000000:
; AVX2: # BB#0:
-; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
-; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vbroadcastss %xmm0, %ymm0
; AVX2-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <8 x i32> %shuffle
@@ -910,9 +907,8 @@ define <8 x i32> @shuffle_v8i32_08080808(<8 x i32> %a, <8 x i32> %b) {
;
; AVX2-LABEL: shuffle_v8i32_08080808:
; AVX2: # BB#0:
-; AVX2-NEXT: vpxor %ymm2, %ymm2, %ymm2
-; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1
-; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,0]
+; AVX2-NEXT: vpbroadcastd %xmm1, %ymm1
+; AVX2-NEXT: vpbroadcastq %xmm0, %ymm0
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
; AVX2-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8>
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll b/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll
index 09dde3132dd..2f02f2fc08f 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll
@@ -6,7 +6,7 @@ target triple = "x86_64-unknown-unknown"
define <8 x double> @shuffle_v8f64_00000000(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_00000000:
; ALL: # BB#0:
-; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,0]
+; ALL-NEXT: vbroadcastsd %xmm0, %ymm0
; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
@@ -16,7 +16,7 @@ define <8 x double> @shuffle_v8f64_00000000(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_00000010(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_00000010:
; ALL: # BB#0:
-; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm0[0,0,0,0]
+; ALL-NEXT: vbroadcastsd %xmm0, %ymm1
; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,0]
; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
@@ -27,7 +27,7 @@ define <8 x double> @shuffle_v8f64_00000010(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_00000200(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_00000200:
; ALL: # BB#0:
-; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm0[0,0,0,0]
+; ALL-NEXT: vbroadcastsd %xmm0, %ymm1
; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,0,0]
; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
@@ -38,7 +38,7 @@ define <8 x double> @shuffle_v8f64_00000200(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_00003000(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_00003000:
; ALL: # BB#0:
-; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm0[0,0,0,0]
+; ALL-NEXT: vbroadcastsd %xmm0, %ymm1
; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,0,0,0]
; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
@@ -50,10 +50,9 @@ define <8 x double> @shuffle_v8f64_00040000(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_00040000:
; ALL: # BB#0:
; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm1
-; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,1,2,0]
-; ALL-NEXT: vpermpd {{.*#+}} ymm2 = ymm0[0,0,0,3]
-; ALL-NEXT: vblendpd {{.*#+}} ymm1 = ymm2[0,1,2],ymm1[3]
-; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,0]
+; ALL-NEXT: vbroadcastsd %xmm1, %ymm1
+; ALL-NEXT: vbroadcastsd %xmm0, %ymm0
+; ALL-NEXT: vblendpd {{.*#+}} ymm1 = ymm0[0,1,2],ymm1[3]
; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
@@ -65,9 +64,8 @@ define <8 x double> @shuffle_v8f64_00500000(<8 x double> %a, <8 x double> %b) {
; ALL: # BB#0:
; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm1
; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,1,1,3]
-; ALL-NEXT: vpermpd {{.*#+}} ymm2 = ymm0[0,0,2,0]
-; ALL-NEXT: vblendpd {{.*#+}} ymm1 = ymm2[0,1],ymm1[2],ymm2[3]
-; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,0]
+; ALL-NEXT: vbroadcastsd %xmm0, %ymm0
+; ALL-NEXT: vblendpd {{.*#+}} ymm1 = ymm0[0,1],ymm1[2],ymm0[3]
; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
@@ -79,9 +77,8 @@ define <8 x double> @shuffle_v8f64_06000000(<8 x double> %a, <8 x double> %b) {
; ALL: # BB#0:
; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm1
; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,2,2,3]
-; ALL-NEXT: vpermpd {{.*#+}} ymm2 = ymm0[0,1,0,0]
-; ALL-NEXT: vblendpd {{.*#+}} ymm1 = ymm2[0],ymm1[1],ymm2[2,3]
-; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,0]
+; ALL-NEXT: vbroadcastsd %xmm0, %ymm0
+; ALL-NEXT: vblendpd {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2,3]
; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
@@ -93,7 +90,7 @@ define <8 x double> @shuffle_v8f64_70000000(<8 x double> %a, <8 x double> %b) {
; ALL: # BB#0:
; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm1
; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[3,1,2,3]
-; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,0]
+; ALL-NEXT: vbroadcastsd %xmm0, %ymm0
; ALL-NEXT: vblendpd {{.*#+}} ymm1 = ymm1[0],ymm0[1,2,3]
; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
@@ -127,7 +124,7 @@ define <8 x double> @shuffle_v8f64_00112233(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_00001111(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_00001111:
; ALL: # BB#0:
-; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm0[0,0,0,0]
+; ALL-NEXT: vbroadcastsd %xmm0, %ymm1
; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,1,1,1]
; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
@@ -151,8 +148,8 @@ define <8 x double> @shuffle_v8f64_81a3c5e7(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_08080808(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_08080808:
; ALL: # BB#0:
-; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,0,2,0]
-; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,0,3]
+; ALL-NEXT: vbroadcastsd %xmm1, %ymm1
+; ALL-NEXT: vbroadcastsd %xmm0, %ymm0
; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
; ALL-NEXT: retq
@@ -164,12 +161,12 @@ define <8 x double> @shuffle_v8f64_08084c4c(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_08084c4c:
; ALL: # BB#0:
; ALL-NEXT: vextractf64x4 $1, %zmm1, %ymm2
-; ALL-NEXT: vpermpd {{.*#+}} ymm2 = ymm2[0,0,2,0]
+; ALL-NEXT: vbroadcastsd %xmm2, %ymm2
; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm3
-; ALL-NEXT: vpermpd {{.*#+}} ymm3 = ymm3[0,1,0,3]
+; ALL-NEXT: vbroadcastsd %xmm3, %ymm3
; ALL-NEXT: vblendpd {{.*#+}} ymm2 = ymm3[0],ymm2[1],ymm3[2],ymm2[3]
-; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,0,2,0]
-; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,0,3]
+; ALL-NEXT: vbroadcastsd %xmm1, %ymm1
+; ALL-NEXT: vbroadcastsd %xmm0, %ymm0
; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
; ALL-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
; ALL-NEXT: retq
@@ -182,9 +179,9 @@ define <8 x double> @shuffle_v8f64_8823cc67(<8 x double> %a, <8 x double> %b) {
; ALL: # BB#0:
; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm2
; ALL-NEXT: vextractf64x4 $1, %zmm1, %ymm3
-; ALL-NEXT: vpermilpd {{.*#+}} ymm3 = ymm3[0,0,2,2]
+; ALL-NEXT: vbroadcastsd %xmm3, %ymm3
; ALL-NEXT: vblendpd {{.*#+}} ymm2 = ymm3[0,1],ymm2[2,3]
-; ALL-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[0,0,2,2]
+; ALL-NEXT: vbroadcastsd %xmm1, %ymm1
; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
; ALL-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
; ALL-NEXT: retq
@@ -513,7 +510,7 @@ define <8 x double> @shuffle_v8f64_10004444(<8 x double> %a, <8 x double> %b) {
; ALL: # BB#0:
; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm0[1,0,0,0]
; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,0]
+; ALL-NEXT: vbroadcastsd %xmm0, %ymm0
; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
@@ -638,7 +635,7 @@ define <8 x double> @shuffle_v8f64_002u6u44(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_00uu66uu(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_00uu66uu:
; ALL: # BB#0:
-; ALL-NEXT: vpermilpd {{.*#+}} ymm1 = ymm0[0,0,2,2]
+; ALL-NEXT: vbroadcastsd %xmm0, %ymm1
; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,3]
; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
@@ -694,16 +691,16 @@ define <8 x double> @shuffle_v8f64_uuu3uu66(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_c348cda0(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_c348cda0:
; ALL: # BB#0:
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm2
-; ALL-NEXT: vpermpd {{.*#+}} ymm2 = ymm2[0,1,0,3]
-; ALL-NEXT: vpermpd {{.*#+}} ymm3 = ymm0[0,3,2,3]
-; ALL-NEXT: vblendpd {{.*#+}} ymm2 = ymm3[0,1],ymm2[2],ymm3[3]
+; ALL-NEXT: vpermpd {{.*#+}} ymm2 = ymm0[0,3,2,3]
+; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm3
+; ALL-NEXT: vbroadcastsd %xmm3, %ymm3
+; ALL-NEXT: vblendpd {{.*#+}} ymm2 = ymm2[0,1],ymm3[2],ymm2[3]
; ALL-NEXT: vextractf64x4 $1, %zmm1, %ymm3
-; ALL-NEXT: vpermpd {{.*#+}} ymm4 = ymm1[0,1,2,0]
+; ALL-NEXT: vbroadcastsd %xmm1, %ymm4
; ALL-NEXT: vblendpd {{.*#+}} ymm4 = ymm3[0,1,2],ymm4[3]
; ALL-NEXT: vblendpd {{.*#+}} ymm2 = ymm4[0],ymm2[1,2],ymm4[3]
; ALL-NEXT: vblendpd {{.*#+}} ymm1 = ymm3[0,1],ymm1[2],ymm3[3]
-; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,2,0]
+; ALL-NEXT: vbroadcastsd %xmm0, %ymm0
; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3]
; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm2, %zmm0
; ALL-NEXT: retq
@@ -734,7 +731,7 @@ define <8 x double> @shuffle_v8f64_f511235a(<8 x double> %a, <8 x double> %b) {
define <8 x i64> @shuffle_v8i64_00000000(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_00000000:
; ALL: # BB#0:
-; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,0]
+; ALL-NEXT: vpbroadcastq %xmm0, %ymm0
; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
@@ -744,7 +741,7 @@ define <8 x i64> @shuffle_v8i64_00000000(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_00000010(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_00000010:
; ALL: # BB#0:
-; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[0,0,0,0]
+; ALL-NEXT: vpbroadcastq %xmm0, %ymm1
; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
@@ -755,7 +752,7 @@ define <8 x i64> @shuffle_v8i64_00000010(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_00000200(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_00000200:
; ALL: # BB#0:
-; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[0,0,0,0]
+; ALL-NEXT: vpbroadcastq %xmm0, %ymm1
; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,0]
; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
@@ -766,7 +763,7 @@ define <8 x i64> @shuffle_v8i64_00000200(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_00003000(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_00003000:
; ALL: # BB#0:
-; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[0,0,0,0]
+; ALL-NEXT: vpbroadcastq %xmm0, %ymm1
; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,0,0,0]
; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
@@ -778,10 +775,9 @@ define <8 x i64> @shuffle_v8i64_00040000(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_00040000:
; ALL: # BB#0:
; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,1,2,0]
-; ALL-NEXT: vpermq {{.*#+}} ymm2 = ymm0[0,0,0,3]
-; ALL-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3,4,5],ymm1[6,7]
-; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,0]
+; ALL-NEXT: vpbroadcastq %xmm1, %ymm1
+; ALL-NEXT: vpbroadcastq %xmm0, %ymm0
+; ALL-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0,1,2,3,4,5],ymm1[6,7]
; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
@@ -793,9 +789,8 @@ define <8 x i64> @shuffle_v8i64_00500000(<8 x i64> %a, <8 x i64> %b) {
; ALL: # BB#0:
; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,1,1,3]
-; ALL-NEXT: vpermq {{.*#+}} ymm2 = ymm0[0,0,2,0]
-; ALL-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5],ymm2[6,7]
-; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,0]
+; ALL-NEXT: vpbroadcastq %xmm0, %ymm0
+; ALL-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
@@ -807,9 +802,8 @@ define <8 x i64> @shuffle_v8i64_06000000(<8 x i64> %a, <8 x i64> %b) {
; ALL: # BB#0:
; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
-; ALL-NEXT: vpermq {{.*#+}} ymm2 = ymm0[0,1,0,0]
-; ALL-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1],ymm1[2,3],ymm2[4,5,6,7]
-; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,0]
+; ALL-NEXT: vpbroadcastq %xmm0, %ymm0
+; ALL-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
@@ -821,7 +815,7 @@ define <8 x i64> @shuffle_v8i64_70000000(<8 x i64> %a, <8 x i64> %b) {
; ALL: # BB#0:
; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm1[3,1,2,3]
-; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,0]
+; ALL-NEXT: vpbroadcastq %xmm0, %ymm0
; ALL-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1],ymm0[2,3,4,5,6,7]
; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
@@ -855,7 +849,7 @@ define <8 x i64> @shuffle_v8i64_00112233(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_00001111(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_00001111:
; ALL: # BB#0:
-; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[0,0,0,0]
+; ALL-NEXT: vpbroadcastq %xmm0, %ymm1
; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,1,1,1]
; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
@@ -879,8 +873,8 @@ define <8 x i64> @shuffle_v8i64_81a3c5e7(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_08080808(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_08080808:
; ALL: # BB#0:
-; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,0,2,0]
-; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,3]
+; ALL-NEXT: vpbroadcastq %xmm1, %ymm1
+; ALL-NEXT: vpbroadcastq %xmm0, %ymm0
; ALL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
; ALL-NEXT: retq
@@ -892,12 +886,12 @@ define <8 x i64> @shuffle_v8i64_08084c4c(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_08084c4c:
; ALL: # BB#0:
; ALL-NEXT: vextracti64x4 $1, %zmm1, %ymm2
-; ALL-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,0,2,0]
+; ALL-NEXT: vpbroadcastq %xmm2, %ymm2
; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm3
-; ALL-NEXT: vpermq {{.*#+}} ymm3 = ymm3[0,1,0,3]
+; ALL-NEXT: vpbroadcastq %xmm3, %ymm3
; ALL-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1],ymm2[2,3],ymm3[4,5],ymm2[6,7]
-; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,0,2,0]
-; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,3]
+; ALL-NEXT: vpbroadcastq %xmm1, %ymm1
+; ALL-NEXT: vpbroadcastq %xmm0, %ymm0
; ALL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
; ALL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; ALL-NEXT: retq
@@ -910,9 +904,9 @@ define <8 x i64> @shuffle_v8i64_8823cc67(<8 x i64> %a, <8 x i64> %b) {
; ALL: # BB#0:
; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; ALL-NEXT: vextracti64x4 $1, %zmm1, %ymm3
-; ALL-NEXT: vpshufd {{.*#+}} ymm3 = ymm3[0,1,0,1,4,5,4,5]
+; ALL-NEXT: vpbroadcastq %xmm3, %ymm3
; ALL-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7]
-; ALL-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,1,0,1,4,5,4,5]
+; ALL-NEXT: vpbroadcastq %xmm1, %ymm1
; ALL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
; ALL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; ALL-NEXT: retq
@@ -1241,7 +1235,7 @@ define <8 x i64> @shuffle_v8i64_10004444(<8 x i64> %a, <8 x i64> %b) {
; ALL: # BB#0:
; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[1,0,0,0]
; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,0]
+; ALL-NEXT: vpbroadcastq %xmm0, %ymm0
; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
@@ -1366,7 +1360,7 @@ define <8 x i64> @shuffle_v8i64_002u6u44(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_00uu66uu(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_00uu66uu:
; ALL: # BB#0:
-; ALL-NEXT: vpshufd {{.*#+}} ymm1 = ymm0[0,1,0,1,4,5,4,5]
+; ALL-NEXT: vpbroadcastq %xmm0, %ymm1
; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,2,3]
; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
@@ -1427,8 +1421,8 @@ define <8 x i64> @shuffle_v8i64_6caa87e5(<8 x i64> %a, <8 x i64> %b) {
; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; ALL-NEXT: vpermq {{.*#+}} ymm4 = ymm0[0,3,2,1]
; ALL-NEXT: vpblendd {{.*#+}} ymm3 = ymm3[0,1],ymm4[2,3],ymm3[4,5],ymm4[6,7]
-; ALL-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[0,1,0,1,4,5,4,5]
; ALL-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,1,0,1,4,5,4,5]
+; ALL-NEXT: vpbroadcastq %xmm2, %ymm2
; ALL-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1],ymm2[2,3],ymm1[4,5,6,7]
; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,1,2,3]
; ALL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
OpenPOWER on IntegriCloud