summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll')
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll260
1 files changed, 109 insertions, 151 deletions
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll
index a09480686d8..804f4522385 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll
@@ -159,7 +159,7 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_0
; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
; AVX2: # BB#0:
; AVX2-NEXT: vpbroadcastw %xmm0, %xmm1
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
+; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1]
; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5]
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
@@ -188,7 +188,7 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_0
;
; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00:
; AVX2: # BB#0:
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,0,255,255,u,u,u,u,u,u,u,u,u,u,u,u,255,255,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
@@ -216,7 +216,7 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_0
;
; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00:
; AVX2: # BB#0:
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
; AVX2-NEXT: retq
@@ -243,7 +243,7 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_0
;
; AVX2-LABEL: shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00:
; AVX2: # BB#0:
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
; AVX2-NEXT: retq
@@ -489,17 +489,11 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_01_00_00_00_00_00_00_00_01_0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_00_00_00_00_00_00_01_00:
-; AVX2: # BB#0:
-; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1]
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[0,1,0,1]
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_00_00_00_00_00_00_01_00:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1]
-; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
-; AVX512VL-NEXT: retq
+; AVX2OR512VL-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_00_00_00_00_00_00_01_00:
+; AVX2OR512VL: # BB#0:
+; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1]
+; AVX2OR512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
+; AVX2OR512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
ret <16 x i16> %shuffle
}
@@ -511,17 +505,11 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_02_00_00_00_00_00_00_00_02_00_0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_00_00_00_00_00_02_00_00:
-; AVX2: # BB#0:
-; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1]
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[0,1,0,1]
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_00_00_00_00_00_02_00_00:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1]
-; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
-; AVX512VL-NEXT: retq
+; AVX2OR512VL-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_00_00_00_00_00_02_00_00:
+; AVX2OR512VL: # BB#0:
+; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1]
+; AVX2OR512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
+; AVX2OR512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
ret <16 x i16> %shuffle
}
@@ -533,17 +521,11 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_03_00_00_00_00_00_00_00_03_00_00_0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_00_00_00_00_03_00_00_00:
-; AVX2: # BB#0:
-; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1]
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[0,1,0,1]
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_00_00_00_00_03_00_00_00:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1]
-; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
-; AVX512VL-NEXT: retq
+; AVX2OR512VL-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_00_00_00_00_03_00_00_00:
+; AVX2OR512VL: # BB#0:
+; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1]
+; AVX2OR512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
+; AVX2OR512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
ret <16 x i16> %shuffle
}
@@ -555,17 +537,11 @@ define <16 x i16> @shuffle_v16i16_00_00_00_04_00_00_00_00_00_00_00_04_00_00_00_0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_00_00_00_04_00_00_00_00:
-; AVX2: # BB#0:
-; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[0,1,0,1]
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_00_00_00_04_00_00_00_00:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
-; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
-; AVX512VL-NEXT: retq
+; AVX2OR512VL-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_00_00_00_04_00_00_00_00:
+; AVX2OR512VL: # BB#0:
+; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
+; AVX2OR512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
+; AVX2OR512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
ret <16 x i16> %shuffle
}
@@ -577,17 +553,11 @@ define <16 x i16> @shuffle_v16i16_00_00_05_00_00_00_00_00_00_00_05_00_00_00_00_0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_00_00_05_00_00_00_00_00:
-; AVX2: # BB#0:
-; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[0,1,0,1]
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_00_00_05_00_00_00_00_00:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
-; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
-; AVX512VL-NEXT: retq
+; AVX2OR512VL-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_00_00_05_00_00_00_00_00:
+; AVX2OR512VL: # BB#0:
+; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
+; AVX2OR512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
+; AVX2OR512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <16 x i16> %shuffle
}
@@ -599,17 +569,11 @@ define <16 x i16> @shuffle_v16i16_00_06_00_00_00_00_00_00_00_06_00_00_00_00_00_0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
-; AVX2: # BB#0:
-; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[0,1,0,1]
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
-; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
-; AVX512VL-NEXT: retq
+; AVX2OR512VL-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
+; AVX2OR512VL: # BB#0:
+; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX2OR512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
+; AVX2OR512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <16 x i16> %shuffle
}
@@ -621,17 +585,11 @@ define <16 x i16> @shuffle_v16i16_07_00_00_00_00_00_00_00_07_00_00_00_00_00_00_0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
-; AVX2: # BB#0:
-; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[0,1,0,1]
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
-; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
-; AVX512VL-NEXT: retq
+; AVX2OR512VL-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
+; AVX2OR512VL: # BB#0:
+; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX2OR512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
+; AVX2OR512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <16 x i16> %shuffle
}
@@ -1835,7 +1793,7 @@ define <16 x i16> @shuffle_v16i16_00_01_00_01_02_03_02_11_08_09_08_09_10_11_10_1
;
; AVX2-LABEL: shuffle_v16i16_00_01_00_01_02_03_02_11_08_09_08_09_10_11_10_11:
; AVX2: # BB#0:
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
; AVX2-NEXT: vpbroadcastq %xmm1, %xmm1
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5]
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
@@ -1863,7 +1821,7 @@ define <16 x i16> @shuffle_v16i16_06_07_04_05_02_03_00_09_14_15_12_13_10_11_08_0
;
; AVX2-LABEL: shuffle_v16i16_06_07_04_05_02_03_00_09_14_15_12_13_10_11_08_09:
; AVX2: # BB#0:
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
@@ -1893,7 +1851,7 @@ define <16 x i16> @shuffle_v16i16_04_05_06_07_16_17_18_27_12_13_14_15_24_25_26_2
; AVX2-LABEL: shuffle_v16i16_04_05_06_07_16_17_18_27_12_13_14_15_24_25_26_27:
; AVX2: # BB#0:
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
@@ -1921,7 +1879,7 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_0
;
; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_08:
; AVX2: # BB#0:
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
; AVX2-NEXT: vpbroadcastw %xmm1, %xmm1
; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5]
@@ -1953,7 +1911,7 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_04_04_04_12_08_08_08_08_12_12_12_1
;
; AVX2-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_12_08_08_08_08_12_12_12_12:
; AVX2: # BB#0:
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
; AVX2-NEXT: vpsllq $48, %xmm1, %xmm1
; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
@@ -1983,7 +1941,7 @@ define <16 x i16> @shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_1
;
; AVX2-LABEL: shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_11:
; AVX2: # BB#0:
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <255,255,255,255,255,255,0,0,u,u,u,u,u,u,u,u,255,255,255,255,255,255,255,255,u,u,u,u,u,u,u,u>
; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
@@ -2011,9 +1969,9 @@ define <16 x i16> @shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_1
;
; AVX2-LABEL: shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_15:
; AVX2: # BB#0:
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
-; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
-; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6,7]
+; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm1 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
+; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1]
+; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3],ymm1[4,5,6,7]
; AVX2-NEXT: retq
;
; AVX512VL-LABEL: shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_15:
@@ -2039,7 +1997,7 @@ define <16 x i16> @shuffle_v16i16_03_01_02_00_06_07_04_13_11_09_10_08_14_15_12_1
;
; AVX2-LABEL: shuffle_v16i16_03_01_02_00_06_07_04_13_11_09_10_08_14_15_12_13:
; AVX2: # BB#0:
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,1,2,0,4,5,6,7,11,9,10,8,12,13,14,15]
@@ -2068,7 +2026,7 @@ define <16 x i16> @shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_0
;
; AVX2-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_08:
; AVX2: # BB#0:
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
; AVX2-NEXT: vpbroadcastw %xmm1, %xmm1
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1,24,25,24,25,24,25,24,25,16,17,16,17,16,17,16,17]
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
@@ -2096,7 +2054,7 @@ define <16 x i16> @shuffle_v16i16_02_03_00_01_06_07_04_13_10_11_08_09_14_15_12_1
;
; AVX2-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_13_10_11_08_09_14_15_12_13:
; AVX2: # BB#0:
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
@@ -2125,7 +2083,7 @@ define <16 x i16> @shuffle_v16i16_02_03_00_02_06_07_04_13_10_11_08_10_14_15_12_1
;
; AVX2-LABEL: shuffle_v16i16_02_03_00_02_06_07_04_13_10_11_08_10_14_15_12_13:
; AVX2: # BB#0:
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <255,255,u,u,255,255,255,255,255,255,0,0,255,255,255,255,255,255,u,u,255,255,255,255,255,255,255,255,255,255,255,255>
; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[2,3,0,2,4,5,6,7,10,11,8,10,12,13,14,15]
@@ -2154,7 +2112,7 @@ define <16 x i16> @shuffle_v16i16_02_03_00_01_06_07_04_15_10_11_08_09_14_15_12_1
;
; AVX2-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_15_10_11_08_09_14_15_12_15:
; AVX2: # BB#0:
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7]
; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,6,7,4,7,8,9,10,11,14,15,12,15]
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
@@ -2183,7 +2141,7 @@ define <16 x i16> @shuffle_v16i16_07_05_06_04_03_01_02_08_15_13_14_12_11_09_10_0
;
; AVX2-LABEL: shuffle_v16i16_07_05_06_04_03_01_02_08_15_13_14_12_11_09_10_08:
; AVX2: # BB#0:
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1,30,31,26,27,28,29,24,25,22,23,18,19,20,21,16,17]
@@ -2211,7 +2169,7 @@ define <16 x i16> @shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_0
;
; AVX2-LABEL: shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_08:
; AVX2: # BB#0:
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
; AVX2-NEXT: vpbroadcastw %xmm1, %xmm1
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1,18,19,16,17,26,27,24,25,26,27,24,25,18,19,16,17]
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
@@ -2240,7 +2198,7 @@ define <16 x i16> @shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_0
;
; AVX2-LABEL: shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_08:
; AVX2: # BB#0:
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
; AVX2-NEXT: vpbroadcastw %xmm1, %xmm1
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1,26,27,24,25,18,19,16,17,26,27,24,25,18,19,16,17]
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
@@ -2269,7 +2227,7 @@ define <16 x i16> @shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_1
;
; AVX2-LABEL: shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_12:
; AVX2: # BB#0:
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
; AVX2-NEXT: vpsllq $48, %xmm1, %xmm1
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9,26,27,24,25,18,19,16,17,18,19,16,17,26,27,24,25]
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
@@ -2298,7 +2256,7 @@ define <16 x i16> @shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_0
;
; AVX2-LABEL: shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_08:
; AVX2: # BB#0:
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
; AVX2-NEXT: vpbroadcastw %xmm1, %xmm1
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1,16,17,24,25,24,25,16,17,16,17,24,25,24,25,16,17]
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
@@ -2327,7 +2285,7 @@ define <16 x i16> @shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_1
;
; AVX2-LABEL: shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_12:
; AVX2: # BB#0:
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
; AVX2-NEXT: vpsllq $48, %xmm1, %xmm1
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9,24,25,16,17,16,17,24,25,24,25,16,17,16,17,24,25]
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
@@ -2356,7 +2314,7 @@ define <16 x i16> @shuffle_v16i16_02_06_04_00_05_01_07_11_10_14_12_08_13_09_15_1
;
; AVX2-LABEL: shuffle_v16i16_02_06_04_00_05_01_07_11_10_14_12_08_13_09_15_11:
; AVX2: # BB#0:
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7,20,21,28,29,24,25,16,17,26,27,18,19,30,31,22,23]
@@ -2384,7 +2342,7 @@ define <16 x i16> @shuffle_v16i16_02_00_06_04_05_01_07_11_10_08_14_12_13_09_15_1
;
; AVX2-LABEL: shuffle_v16i16_02_00_06_04_05_01_07_11_10_08_14_12_13_09_15_11:
; AVX2: # BB#0:
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7,20,21,16,17,28,29,24,25,26,27,18,19,30,31,22,23]
@@ -2412,7 +2370,7 @@ define <16 x i16> @shuffle_v16i16_02_06_04_00_01_03_07_13_10_14_12_08_09_11_15_1
;
; AVX2-LABEL: shuffle_v16i16_02_06_04_00_01_03_07_13_10_14_12_08_09_11_15_13:
; AVX2: # BB#0:
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11,20,21,28,29,24,25,16,17,18,19,22,23,30,31,26,27]
@@ -2440,7 +2398,7 @@ define <16 x i16> @shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_1
;
; AVX2-LABEL: shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_11:
; AVX2: # BB#0:
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3,4,5,6,7]
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7,28,29,28,29,30,31,26,27,18,19,28,29,24,25,22,23]
; AVX2-NEXT: retq
@@ -2467,7 +2425,7 @@ define <16 x i16> @shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_1
;
; AVX2-LABEL: shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_12:
; AVX2: # BB#0:
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
; AVX2-NEXT: vpsllq $48, %xmm1, %xmm1
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9,16,17,16,17,24,25,24,25,24,25,24,25,24,25,24,25]
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
@@ -2496,7 +2454,7 @@ define <16 x i16> @shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_1
;
; AVX2-LABEL: shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_12:
; AVX2: # BB#0:
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
; AVX2-NEXT: vpsllq $48, %xmm1, %xmm1
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9,24,25,24,25,16,17,16,17,24,25,24,25,24,25,24,25]
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
@@ -2525,7 +2483,7 @@ define <16 x i16> @shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_1
;
; AVX2-LABEL: shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_12:
; AVX2: # BB#0:
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
; AVX2-NEXT: vpsllq $48, %xmm1, %xmm1
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9,16,17,24,25,24,25,16,17,24,25,24,25,24,25,24,25]
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
@@ -2554,7 +2512,7 @@ define <16 x i16> @shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_0
;
; AVX2-LABEL: shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_08:
; AVX2: # BB#0:
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
; AVX2-NEXT: vpbroadcastw %xmm1, %xmm1
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1,16,17,24,25,24,25,16,17,16,17,16,17,16,17,16,17]
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
@@ -2584,7 +2542,7 @@ define <16 x i16> @shuffle_v16i16_00_04_04_00_04_05_06_15_08_12_12_08_12_13_14_1
;
; AVX2-LABEL: shuffle_v16i16_00_04_04_00_04_05_06_15_08_12_12_08_12_13_14_15:
; AVX2: # BB#0:
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,2,2,0,4,5,6,7,8,10,10,8,12,13,14,15]
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
@@ -2613,7 +2571,7 @@ define <16 x i16> @shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_1
;
; AVX2-LABEL: shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_12:
; AVX2: # BB#0:
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
; AVX2-NEXT: vpsllq $48, %xmm1, %xmm1
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9,16,17,18,19,24,25,24,25,24,25,24,25,24,25,24,25]
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <255,255,u,u,255,255,255,255,255,255,255,255,255,255,0,0,255,255,u,u,255,255,255,255,255,255,255,255,255,255,255,255>
@@ -2642,7 +2600,7 @@ define <16 x i16> @shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_1
;
; AVX2-LABEL: shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_12:
; AVX2: # BB#0:
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
; AVX2-NEXT: vpsllq $48, %xmm1, %xmm1
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9,24,25,24,25,24,25,16,17,24,25,24,25,24,25,24,25]
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <255,255,255,255,u,u,255,255,255,255,255,255,255,255,0,0,255,255,255,255,u,u,255,255,255,255,255,255,255,255,255,255>
@@ -2671,7 +2629,7 @@ define <16 x i16> @shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_1
;
; AVX2-LABEL: shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_12:
; AVX2: # BB#0:
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
; AVX2-NEXT: vpsllq $48, %xmm1, %xmm1
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9,16,17,24,25,24,25,16,17,24,25,24,25,24,25,24,25]
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <u,u,255,255,255,255,255,255,255,255,255,255,255,255,0,0,u,u,255,255,255,255,255,255,255,255,255,255,255,255,255,255>
@@ -2792,7 +2750,7 @@ define <16 x i16> @shuffle_v16i16_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_1
;
; AVX2-LABEL: shuffle_v16i16_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_11:
; AVX2: # BB#0:
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7,16,17,18,19,20,21,30,31,24,25,26,27,28,29,22,23]
@@ -2819,10 +2777,10 @@ define <16 x i16> @shuffle_v16i16_04_05_06_03_00_01_02_15_12_13_14_11_08_09_10_1
;
; AVX2-LABEL: shuffle_v16i16_04_05_06_03_00_01_02_15_12_13_14_11_08_09_10_15:
; AVX2: # BB#0:
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
-; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15,24,25,26,27,28,29,22,23,16,17,18,19,20,21,30,31]
+; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15,24,25,26,27,28,29,22,23,16,17,18,19,20,21,30,31]
+; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1]
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; AVX512VL-LABEL: shuffle_v16i16_04_05_06_03_00_01_02_15_12_13_14_11_08_09_10_15:
@@ -2847,7 +2805,7 @@ define <16 x i16> @shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_1
;
; AVX2-LABEL: shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_13:
; AVX2: # BB#0:
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3,4,5,6,7]
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11,22,23,30,31,18,19,16,17,20,21,30,31,22,23,26,27]
; AVX2-NEXT: retq
@@ -2875,7 +2833,7 @@ define <16 x i16> @shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_2
;
; AVX2-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_27:
; AVX2: # BB#0:
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm2 = ymm1[2,3,0,1]
+; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm1[2,3,0,1]
; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = <255,255,255,255,255,255,0,0,u,u,u,u,u,u,u,u,255,255,255,255,255,255,255,255,u,u,u,u,u,u,u,u>
; AVX2-NEXT: vpblendvb %ymm3, %ymm1, %ymm2, %ymm1
; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
@@ -2906,10 +2864,10 @@ define <16 x i16> @shuffle_v16i16_00_20_01_21_02_22_03_31_08_28_09_29_10_30_11_3
; AVX2-LABEL: shuffle_v16i16_00_20_01_21_02_22_03_31_08_28_09_29_10_30_11_31:
; AVX2: # BB#0:
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
-; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,8,9,2,3,10,11,4,5,12,13,6,7,14,15,16,17,24,25,18,19,26,27,20,21,28,29,22,23,30,31]
+; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,8,9,2,3,10,11,4,5,12,13,6,7,14,15,16,17,24,25,18,19,26,27,20,21,28,29,22,23,30,31]
+; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1]
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; AVX512VL-LABEL: shuffle_v16i16_00_20_01_21_02_22_03_31_08_28_09_29_10_30_11_31:
@@ -2935,10 +2893,10 @@ define <16 x i16> @shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_3
;
; AVX2-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_31:
; AVX2: # BB#0:
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm2 = ymm1[2,3,0,1]
-; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm1 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15]
-; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2],ymm2[3],ymm1[4,5,6,7]
; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
+; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm2 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15]
+; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,0,1]
+; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2],ymm1[3],ymm2[4,5,6,7]
; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
; AVX2-NEXT: retq
;
@@ -2967,7 +2925,7 @@ define <16 x i16> @shuffle_v16i16_04_16_05_17_06_18_07_27_12_24_13_25_14_26_15_2
; AVX2-LABEL: shuffle_v16i16_04_16_05_17_06_18_07_27_12_24_13_25_14_26_15_27:
; AVX2: # BB#0:
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,0,1,10,11,2,3,12,13,4,5,14,15,6,7,24,25,16,17,26,27,18,19,28,29,20,21,30,31,22,23]
@@ -3001,7 +2959,7 @@ define <16 x i16> @shuffle_v16i16_00_16_01_17_06_22_07_31_08_24_09_25_14_30_15_3
;
; AVX2-LABEL: shuffle_v16i16_00_16_01_17_06_22_07_31_08_24_09_25_14_30_15_31:
; AVX2: # BB#0:
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm2 = ymm1[2,3,0,1]
+; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm1[2,3,0,1]
; AVX2-NEXT: vpshuflw {{.*#+}} ymm1 = ymm1[0,0,2,1,4,5,6,7,8,8,10,9,12,13,14,15]
; AVX2-NEXT: vpshufhw {{.*#+}} ymm1 = ymm1[0,1,2,3,4,6,6,7,8,9,10,11,12,14,14,15]
; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2],ymm2[3],ymm1[4,5,6,7]
@@ -3036,7 +2994,7 @@ define <16 x i16> @shuffle_v16i16_00_20_01_21_06_16_07_25_08_28_09_29_14_24_15_2
;
; AVX2-LABEL: shuffle_v16i16_00_20_01_21_06_16_07_25_08_28_09_29_14_24_15_25:
; AVX2: # BB#0:
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm2 = ymm1[2,3,0,1]
+; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm1[2,3,0,1]
; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = <255,255,0,0,u,u,u,u,255,255,255,255,u,u,u,u,255,255,255,255,u,u,u,u,255,255,255,255,u,u,u,u>
; AVX2-NEXT: vpblendvb %ymm3, %ymm1, %ymm2, %ymm1
; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[8,9,8,9,4,5,10,11,0,1,0,1,12,13,2,3,24,25,24,25,20,21,26,27,16,17,16,17,28,29,18,19]
@@ -3071,7 +3029,7 @@ define <16 x i16> @shuffle_v16i16_01_00_17_16_03_02_19_26_09_08_25_24_11_10_27_2
;
; AVX2-LABEL: shuffle_v16i16_01_00_17_16_03_02_19_26_09_08_25_24_11_10_27_26:
; AVX2: # BB#0:
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm2 = ymm1[2,3,0,1]
+; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm1[2,3,0,1]
; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = <255,255,255,255,0,0,255,255,u,u,u,u,u,u,u,u,255,255,255,255,255,255,255,255,u,u,u,u,u,u,u,u>
; AVX2-NEXT: vpblendvb %ymm3, %ymm1, %ymm2, %ymm1
; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[0,1,2,3,2,3,0,1,8,9,10,11,6,7,4,5,16,17,18,19,18,19,16,17,24,25,26,27,22,23,20,21]
@@ -3102,7 +3060,7 @@ define <16 x i16> @shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_1
;
; AVX2-LABEL: shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_11:
; AVX2: # BB#0:
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm2 = ymm0[2,3,0,1]
+; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm0[2,3,0,1]
; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = <255,255,255,255,255,255,0,0,u,u,u,u,u,u,u,u,255,255,255,255,255,255,255,255,u,u,u,u,u,u,u,u>
; AVX2-NEXT: vpblendvb %ymm3, %ymm0, %ymm2, %ymm0
; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11]
@@ -3132,10 +3090,10 @@ define <16 x i16> @shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_1
;
; AVX2-LABEL: shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_15:
; AVX2: # BB#0:
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm2 = ymm0[2,3,0,1]
-; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
-; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2],ymm2[3],ymm0[4,5,6,7]
; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm1 = ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15]
+; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm2 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
+; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1]
+; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2],ymm0[3],ymm2[4,5,6,7]
; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7],ymm1[8],ymm0[9],ymm1[10],ymm0[11],ymm1[12],ymm0[13],ymm1[14],ymm0[15]
; AVX2-NEXT: retq
;
@@ -3168,7 +3126,7 @@ define <16 x i16> @shuffle_v16i16_00_02_01_03_20_22_21_31_08_10_09_11_28_30_29_3
; AVX2-LABEL: shuffle_v16i16_00_02_01_03_20_22_21_31_08_10_09_11_28_30_29_31:
; AVX2: # BB#0:
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,2,1,3,4,5,6,7,8,10,9,11,12,13,14,15]
; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,6,5,7,8,9,10,11,12,14,13,15]
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
@@ -3349,7 +3307,7 @@ define <16 x i16> @shuffle_v16i16_00_01_02_21_20_21_22_11_08_09_10_29_28_29_30_1
; AVX2-LABEL: shuffle_v16i16_00_01_02_21_20_21_22_11_08_09_10_29_28_29_30_11:
; AVX2: # BB#0:
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <255,255,255,255,255,255,0,0,255,255,255,255,255,255,u,u,255,255,255,255,255,255,255,255,255,255,255,255,255,255,u,u>
; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7,16,17,18,19,20,21,26,27,24,25,26,27,28,29,22,23]
@@ -3469,7 +3427,7 @@ define <16 x i16> @shuffle_v16i16_21_22_23_00_01_02_03_12_29_30_31_08_09_10_11_1
; AVX2-LABEL: shuffle_v16i16_21_22_23_00_01_02_03_12_29_30_31_08_09_10_11_12:
; AVX2: # BB#0:
; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm1[5,6,7],ymm0[8,9,10,11,12],ymm1[13,14,15]
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,26,27,28,29,30,31,16,17,18,19,20,21,22,23,24,25]
@@ -3514,7 +3472,7 @@ define <16 x i16> @shuffle_v16i16_05_06_07_00_01_02_03_12_13_14_15_08_09_10_11_1
;
; AVX2-LABEL: shuffle_v16i16_05_06_07_00_01_02_03_12_13_14_15_08_09_10_11_12:
; AVX2: # BB#0:
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,26,27,28,29,30,31,16,17,18,19,20,21,22,23,24,25]
@@ -3579,7 +3537,7 @@ define <16 x i16> @shuffle_v16i16_19_20_21_22_23_00_01_10_27_28_29_30_31_08_09_1
; AVX2-LABEL: shuffle_v16i16_19_20_21_22_23_00_01_10_27_28_29_30_31_08_09_10:
; AVX2: # BB#0:
; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3,4,5,6,7],ymm0[8,9,10],ymm1[11,12,13,14,15]
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,22,23,24,25,26,27,28,29,30,31,16,17,18,19,20,21]
@@ -3625,7 +3583,7 @@ define <16 x i16> @shuffle_v16i16_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_1
;
; AVX2-LABEL: shuffle_v16i16_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_10:
; AVX2: # BB#0:
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,22,23,24,25,26,27,28,29,30,31,16,17,18,19,20,21]
@@ -3690,7 +3648,7 @@ define <16 x i16> @shuffle_v16i16_03_04_05_06_07_16_17_26_11_12_13_14_15_24_25_2
; AVX2-LABEL: shuffle_v16i16_03_04_05_06_07_16_17_26_11_12_13_14_15_24_25_26:
; AVX2: # BB#0:
; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7],ymm1[8,9,10],ymm0[11,12,13,14,15]
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,22,23,24,25,26,27,28,29,30,31,16,17,18,19,20,21]
@@ -3737,7 +3695,7 @@ define <16 x i16> @shuffle_v16i16_05_06_07_16_17_18_19_28_13_14_15_24_25_26_27_2
; AVX2-LABEL: shuffle_v16i16_05_06_07_16_17_18_19_28_13_14_15_24_25_26_27_28:
; AVX2: # BB#0:
; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4],ymm0[5,6,7],ymm1[8,9,10,11,12],ymm0[13,14,15]
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,26,27,28,29,30,31,16,17,18,19,20,21,22,23,24,25]
@@ -4022,16 +3980,16 @@ define <16 x i16> @PR24935(<16 x i16> %a, <16 x i16> %b) {
;
; AVX2-LABEL: PR24935:
; AVX2: # BB#0:
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm2 = ymm1[2,3,0,1]
-; AVX2-NEXT: vpshufb {{.*#+}} ymm2 = ymm2[6,7,4,5,0,1,10,11,4,5,10,11,4,5,6,7,22,23,20,21,16,17,26,27,20,21,26,27,20,21,22,23]
-; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[8,9,10,11,4,5,8,9,0,1,14,15,12,13,0,1,24,25,26,27,20,21,24,25,16,17,30,31,28,29,16,17]
+; AVX2-NEXT: vpshufb {{.*#+}} ymm2 = ymm1[8,9,10,11,4,5,8,9,0,1,14,15,12,13,0,1,24,25,26,27,20,21,24,25,16,17,30,31,28,29,16,17]
+; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,0,1]
+; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[6,7,4,5,0,1,10,11,4,5,10,11,4,5,6,7,22,23,20,21,16,17,26,27,20,21,26,27,20,21,22,23]
; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = <255,255,255,255,u,u,255,255,255,255,0,0,u,u,0,0,u,u,u,u,255,255,0,0,u,u,u,u,u,u,0,0>
-; AVX2-NEXT: vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm2 = ymm0[2,3,0,1]
-; AVX2-NEXT: vpshufb {{.*#+}} ymm2 = ymm2[u,u,u,u,u,u,u,u,u,u,u,u,6,7,u,u,18,19,u,u,u,u,u,u,u,u,24,25,16,17,u,u]
-; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,1,1,3,4,5,6,7,8,9,9,11,12,13,14,15]
-; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,5,5,6,7,8,9,10,11,13,13,14,15]
-; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm2[0],ymm0[1,2],ymm2[3],ymm0[4],ymm2[5,6,7,8],ymm0[9,10],ymm2[11],ymm0[12],ymm2[13,14,15]
+; AVX2-NEXT: vpblendvb %ymm3, %ymm1, %ymm2, %ymm1
+; AVX2-NEXT: vpshuflw {{.*#+}} ymm2 = ymm0[0,1,1,3,4,5,6,7,8,9,9,11,12,13,14,15]
+; AVX2-NEXT: vpshufhw {{.*#+}} ymm2 = ymm2[0,1,2,3,5,5,6,7,8,9,10,11,13,13,14,15]
+; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1]
+; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,u,u,u,u,6,7,u,u,18,19,u,u,u,u,u,u,u,u,24,25,16,17,u,u]
+; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1,2],ymm0[3],ymm2[4],ymm0[5,6,7,8],ymm2[9,10],ymm0[11],ymm2[12],ymm0[13,14,15]
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,0,0,255,255,255,255,255,255,0,0,255,255,0,0,0,0,255,255,255,255,0,0,0,0,0,0,255,255]
; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
@@ -4058,7 +4016,7 @@ define <16 x i16> @PR34369(<16 x i16> %vec) {
;
; AVX2-LABEL: PR34369:
; AVX2: # BB#0:
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <255,255,u,u,0,0,255,255,u,u,255,255,u,u,u,u,255,255,255,255,255,255,u,u,255,255,u,u,u,u,255,255>
; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,0,1,0,1],zero,zero,ymm0[10,11],zero,zero,zero,zero,ymm0[4,5,30,31,16,17],zero,zero,ymm0[16,17,18,19,20,21,24,25,24,25]
OpenPOWER on IntegriCloud