summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86/vector-fshl-256.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/X86/vector-fshl-256.ll')
-rw-r--r--llvm/test/CodeGen/X86/vector-fshl-256.ll486
1 files changed, 456 insertions, 30 deletions
diff --git a/llvm/test/CodeGen/X86/vector-fshl-256.ll b/llvm/test/CodeGen/X86/vector-fshl-256.ll
index aac4662cff0..8aeae700d6a 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-256.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-256.ll
@@ -4,7 +4,9 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512VL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi2 | FileCheck %s --check-prefixes=AVX512,AVX512VBMI2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512VLBW
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi2,+avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512VLVBMI2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefixes=XOP,XOPAVX1
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=XOP,XOPAVX2
@@ -113,6 +115,22 @@ define <4 x i64> @var_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %amt)
; AVX512BW-NEXT: vmovdqa %ymm1, %ymm0
; AVX512BW-NEXT: retq
;
+; AVX512VBMI2-LABEL: var_funnnel_v4i64:
+; AVX512VBMI2: # %bb.0:
+; AVX512VBMI2-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512VBMI2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [63,63,63,63]
+; AVX512VBMI2-NEXT: vpand %ymm3, %ymm2, %ymm4
+; AVX512VBMI2-NEXT: vpsllvq %ymm4, %ymm0, %ymm5
+; AVX512VBMI2-NEXT: vpbroadcastq {{.*#+}} ymm6 = [64,64,64,64]
+; AVX512VBMI2-NEXT: vpsubq %ymm4, %ymm6, %ymm4
+; AVX512VBMI2-NEXT: vpsrlvq %ymm4, %ymm1, %ymm1
+; AVX512VBMI2-NEXT: vpor %ymm1, %ymm5, %ymm1
+; AVX512VBMI2-NEXT: vptestnmq %zmm3, %zmm2, %k1
+; AVX512VBMI2-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
+; AVX512VBMI2-NEXT: vmovdqa %ymm1, %ymm0
+; AVX512VBMI2-NEXT: retq
+;
; AVX512VLBW-LABEL: var_funnnel_v4i64:
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} ymm3 = [63,63,63,63]
@@ -127,6 +145,11 @@ define <4 x i64> @var_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %amt)
; AVX512VLBW-NEXT: vmovdqa %ymm1, %ymm0
; AVX512VLBW-NEXT: retq
;
+; AVX512VLVBMI2-LABEL: var_funnnel_v4i64:
+; AVX512VLVBMI2: # %bb.0:
+; AVX512VLVBMI2-NEXT: vpshldvq %ymm2, %ymm1, %ymm0
+; AVX512VLVBMI2-NEXT: retq
+;
; XOPAVX1-LABEL: var_funnnel_v4i64:
; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
@@ -278,6 +301,22 @@ define <8 x i32> @var_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %amt)
; AVX512BW-NEXT: vmovdqa %ymm1, %ymm0
; AVX512BW-NEXT: retq
;
+; AVX512VBMI2-LABEL: var_funnnel_v8i32:
+; AVX512VBMI2: # %bb.0:
+; AVX512VBMI2-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512VBMI2-NEXT: vpbroadcastd {{.*#+}} ymm3 = [31,31,31,31,31,31,31,31]
+; AVX512VBMI2-NEXT: vpand %ymm3, %ymm2, %ymm4
+; AVX512VBMI2-NEXT: vpsllvd %ymm4, %ymm0, %ymm5
+; AVX512VBMI2-NEXT: vpbroadcastd {{.*#+}} ymm6 = [32,32,32,32,32,32,32,32]
+; AVX512VBMI2-NEXT: vpsubd %ymm4, %ymm6, %ymm4
+; AVX512VBMI2-NEXT: vpsrlvd %ymm4, %ymm1, %ymm1
+; AVX512VBMI2-NEXT: vpor %ymm1, %ymm5, %ymm1
+; AVX512VBMI2-NEXT: vptestnmd %zmm3, %zmm2, %k1
+; AVX512VBMI2-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
+; AVX512VBMI2-NEXT: vmovdqa %ymm1, %ymm0
+; AVX512VBMI2-NEXT: retq
+;
; AVX512VLBW-LABEL: var_funnnel_v8i32:
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vpbroadcastd {{.*#+}} ymm3 = [31,31,31,31,31,31,31,31]
@@ -292,6 +331,11 @@ define <8 x i32> @var_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %amt)
; AVX512VLBW-NEXT: vmovdqa %ymm1, %ymm0
; AVX512VLBW-NEXT: retq
;
+; AVX512VLVBMI2-LABEL: var_funnnel_v8i32:
+; AVX512VLVBMI2: # %bb.0:
+; AVX512VLVBMI2-NEXT: vpshldvd %ymm2, %ymm1, %ymm0
+; AVX512VLVBMI2-NEXT: retq
+;
; XOPAVX1-LABEL: var_funnnel_v8i32:
; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
@@ -485,6 +529,23 @@ define <16 x i16> @var_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %
; AVX512BW-NEXT: vmovdqa %ymm1, %ymm0
; AVX512BW-NEXT: retq
;
+; AVX512VBMI2-LABEL: var_funnnel_v16i16:
+; AVX512VBMI2: # %bb.0:
+; AVX512VBMI2-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2
+; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512VBMI2-NEXT: vpand %ymm3, %ymm2, %ymm4
+; AVX512VBMI2-NEXT: vpsllvw %zmm4, %zmm0, %zmm5
+; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm6 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX512VBMI2-NEXT: vpsubw %ymm4, %ymm6, %ymm4
+; AVX512VBMI2-NEXT: vpsrlvw %zmm4, %zmm1, %zmm1
+; AVX512VBMI2-NEXT: vpor %ymm1, %ymm5, %ymm1
+; AVX512VBMI2-NEXT: vptestnmw %zmm3, %zmm2, %k1
+; AVX512VBMI2-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1}
+; AVX512VBMI2-NEXT: vmovdqa %ymm1, %ymm0
+; AVX512VBMI2-NEXT: retq
+;
; AVX512VLBW-LABEL: var_funnnel_v16i16:
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
@@ -499,6 +560,11 @@ define <16 x i16> @var_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %
; AVX512VLBW-NEXT: vmovdqa %ymm1, %ymm0
; AVX512VLBW-NEXT: retq
;
+; AVX512VLVBMI2-LABEL: var_funnnel_v16i16:
+; AVX512VLVBMI2: # %bb.0:
+; AVX512VLVBMI2-NEXT: vpshldvw %ymm2, %ymm1, %ymm0
+; AVX512VLVBMI2-NEXT: retq
+;
; XOPAVX1-LABEL: var_funnnel_v16i16:
; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
@@ -745,6 +811,27 @@ define <32 x i8> @var_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %amt)
; AVX512BW-NEXT: vmovdqa %ymm1, %ymm0
; AVX512BW-NEXT: retq
;
+; AVX512VBMI2-LABEL: var_funnnel_v32i8:
+; AVX512VBMI2: # %bb.0:
+; AVX512VBMI2-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
+; AVX512VBMI2-NEXT: vpand %ymm3, %ymm2, %ymm4
+; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} zmm5 = ymm4[0],zero,ymm4[1],zero,ymm4[2],zero,ymm4[3],zero,ymm4[4],zero,ymm4[5],zero,ymm4[6],zero,ymm4[7],zero,ymm4[8],zero,ymm4[9],zero,ymm4[10],zero,ymm4[11],zero,ymm4[12],zero,ymm4[13],zero,ymm4[14],zero,ymm4[15],zero,ymm4[16],zero,ymm4[17],zero,ymm4[18],zero,ymm4[19],zero,ymm4[20],zero,ymm4[21],zero,ymm4[22],zero,ymm4[23],zero,ymm4[24],zero,ymm4[25],zero,ymm4[26],zero,ymm4[27],zero,ymm4[28],zero,ymm4[29],zero,ymm4[30],zero,ymm4[31],zero
+; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} zmm6 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
+; AVX512VBMI2-NEXT: vpsllvw %zmm5, %zmm6, %zmm5
+; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm6 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
+; AVX512VBMI2-NEXT: vpsubb %ymm4, %ymm6, %ymm4
+; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} zmm4 = ymm4[0],zero,ymm4[1],zero,ymm4[2],zero,ymm4[3],zero,ymm4[4],zero,ymm4[5],zero,ymm4[6],zero,ymm4[7],zero,ymm4[8],zero,ymm4[9],zero,ymm4[10],zero,ymm4[11],zero,ymm4[12],zero,ymm4[13],zero,ymm4[14],zero,ymm4[15],zero,ymm4[16],zero,ymm4[17],zero,ymm4[18],zero,ymm4[19],zero,ymm4[20],zero,ymm4[21],zero,ymm4[22],zero,ymm4[23],zero,ymm4[24],zero,ymm4[25],zero,ymm4[26],zero,ymm4[27],zero,ymm4[28],zero,ymm4[29],zero,ymm4[30],zero,ymm4[31],zero
+; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
+; AVX512VBMI2-NEXT: vpsrlvw %zmm4, %zmm1, %zmm1
+; AVX512VBMI2-NEXT: vporq %zmm1, %zmm5, %zmm1
+; AVX512VBMI2-NEXT: vpmovwb %zmm1, %ymm1
+; AVX512VBMI2-NEXT: vptestnmb %zmm3, %zmm2, %k1
+; AVX512VBMI2-NEXT: vmovdqu8 %zmm0, %zmm1 {%k1}
+; AVX512VBMI2-NEXT: vmovdqa %ymm1, %ymm0
+; AVX512VBMI2-NEXT: retq
+;
; AVX512VLBW-LABEL: var_funnnel_v32i8:
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} ymm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
@@ -764,6 +851,25 @@ define <32 x i8> @var_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %amt)
; AVX512VLBW-NEXT: vmovdqa %ymm1, %ymm0
; AVX512VLBW-NEXT: retq
;
+; AVX512VLVBMI2-LABEL: var_funnnel_v32i8:
+; AVX512VLVBMI2: # %bb.0:
+; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} ymm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
+; AVX512VLVBMI2-NEXT: vpand %ymm3, %ymm2, %ymm4
+; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} zmm5 = ymm4[0],zero,ymm4[1],zero,ymm4[2],zero,ymm4[3],zero,ymm4[4],zero,ymm4[5],zero,ymm4[6],zero,ymm4[7],zero,ymm4[8],zero,ymm4[9],zero,ymm4[10],zero,ymm4[11],zero,ymm4[12],zero,ymm4[13],zero,ymm4[14],zero,ymm4[15],zero,ymm4[16],zero,ymm4[17],zero,ymm4[18],zero,ymm4[19],zero,ymm4[20],zero,ymm4[21],zero,ymm4[22],zero,ymm4[23],zero,ymm4[24],zero,ymm4[25],zero,ymm4[26],zero,ymm4[27],zero,ymm4[28],zero,ymm4[29],zero,ymm4[30],zero,ymm4[31],zero
+; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} zmm6 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
+; AVX512VLVBMI2-NEXT: vpsllvw %zmm5, %zmm6, %zmm5
+; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} ymm6 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
+; AVX512VLVBMI2-NEXT: vpsubb %ymm4, %ymm6, %ymm4
+; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} zmm4 = ymm4[0],zero,ymm4[1],zero,ymm4[2],zero,ymm4[3],zero,ymm4[4],zero,ymm4[5],zero,ymm4[6],zero,ymm4[7],zero,ymm4[8],zero,ymm4[9],zero,ymm4[10],zero,ymm4[11],zero,ymm4[12],zero,ymm4[13],zero,ymm4[14],zero,ymm4[15],zero,ymm4[16],zero,ymm4[17],zero,ymm4[18],zero,ymm4[19],zero,ymm4[20],zero,ymm4[21],zero,ymm4[22],zero,ymm4[23],zero,ymm4[24],zero,ymm4[25],zero,ymm4[26],zero,ymm4[27],zero,ymm4[28],zero,ymm4[29],zero,ymm4[30],zero,ymm4[31],zero
+; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
+; AVX512VLVBMI2-NEXT: vpsrlvw %zmm4, %zmm1, %zmm1
+; AVX512VLVBMI2-NEXT: vporq %zmm1, %zmm5, %zmm1
+; AVX512VLVBMI2-NEXT: vpmovwb %zmm1, %ymm1
+; AVX512VLVBMI2-NEXT: vptestnmb %ymm3, %ymm2, %k1
+; AVX512VLVBMI2-NEXT: vmovdqu8 %ymm0, %ymm1 {%k1}
+; AVX512VLVBMI2-NEXT: vmovdqa %ymm1, %ymm0
+; AVX512VLVBMI2-NEXT: retq
+;
; XOPAVX1-LABEL: var_funnnel_v32i8:
; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
@@ -906,6 +1012,22 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %
; AVX512BW-NEXT: vmovdqa %ymm1, %ymm0
; AVX512BW-NEXT: retq
;
+; AVX512VBMI2-LABEL: splatvar_funnnel_v4i64:
+; AVX512VBMI2: # %bb.0:
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512VBMI2-NEXT: vpbroadcastq %xmm2, %ymm2
+; AVX512VBMI2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [63,63,63,63]
+; AVX512VBMI2-NEXT: vpand %ymm3, %ymm2, %ymm4
+; AVX512VBMI2-NEXT: vpsllq %xmm4, %ymm0, %ymm5
+; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm6 = [64,64]
+; AVX512VBMI2-NEXT: vpsubq %xmm4, %xmm6, %xmm4
+; AVX512VBMI2-NEXT: vpsrlq %xmm4, %ymm1, %ymm1
+; AVX512VBMI2-NEXT: vpor %ymm1, %ymm5, %ymm1
+; AVX512VBMI2-NEXT: vptestnmq %zmm3, %zmm2, %k1
+; AVX512VBMI2-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
+; AVX512VBMI2-NEXT: vmovdqa %ymm1, %ymm0
+; AVX512VBMI2-NEXT: retq
+;
; AVX512VLBW-LABEL: splatvar_funnnel_v4i64:
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vpbroadcastq %xmm2, %ymm2
@@ -921,6 +1043,12 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %
; AVX512VLBW-NEXT: vmovdqa %ymm1, %ymm0
; AVX512VLBW-NEXT: retq
;
+; AVX512VLVBMI2-LABEL: splatvar_funnnel_v4i64:
+; AVX512VLVBMI2: # %bb.0:
+; AVX512VLVBMI2-NEXT: vpbroadcastq %xmm2, %ymm2
+; AVX512VLVBMI2-NEXT: vpshldvq %ymm2, %ymm1, %ymm0
+; AVX512VLVBMI2-NEXT: retq
+;
; XOPAVX1-LABEL: splatvar_funnnel_v4i64:
; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vmovddup {{.*#+}} xmm2 = xmm2[0,0]
@@ -1064,6 +1192,24 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %
; AVX512BW-NEXT: vmovdqa %ymm1, %ymm0
; AVX512BW-NEXT: retq
;
+; AVX512VBMI2-LABEL: splatvar_funnnel_v8i32:
+; AVX512VBMI2: # %bb.0:
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512VBMI2-NEXT: vpbroadcastd %xmm2, %ymm2
+; AVX512VBMI2-NEXT: vpbroadcastd {{.*#+}} ymm3 = [31,31,31,31,31,31,31,31]
+; AVX512VBMI2-NEXT: vpand %ymm3, %ymm2, %ymm4
+; AVX512VBMI2-NEXT: vpmovzxdq {{.*#+}} xmm5 = xmm4[0],zero,xmm4[1],zero
+; AVX512VBMI2-NEXT: vpslld %xmm5, %ymm0, %ymm5
+; AVX512VBMI2-NEXT: vpbroadcastd {{.*#+}} xmm6 = [32,32,32,32]
+; AVX512VBMI2-NEXT: vpsubd %xmm4, %xmm6, %xmm4
+; AVX512VBMI2-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero
+; AVX512VBMI2-NEXT: vpsrld %xmm4, %ymm1, %ymm1
+; AVX512VBMI2-NEXT: vpor %ymm1, %ymm5, %ymm1
+; AVX512VBMI2-NEXT: vptestnmd %zmm3, %zmm2, %k1
+; AVX512VBMI2-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
+; AVX512VBMI2-NEXT: vmovdqa %ymm1, %ymm0
+; AVX512VBMI2-NEXT: retq
+;
; AVX512VLBW-LABEL: splatvar_funnnel_v8i32:
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vpbroadcastd %xmm2, %ymm2
@@ -1081,6 +1227,12 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %
; AVX512VLBW-NEXT: vmovdqa %ymm1, %ymm0
; AVX512VLBW-NEXT: retq
;
+; AVX512VLVBMI2-LABEL: splatvar_funnnel_v8i32:
+; AVX512VLVBMI2: # %bb.0:
+; AVX512VLVBMI2-NEXT: vpbroadcastd %xmm2, %ymm2
+; AVX512VLVBMI2-NEXT: vpshldvd %ymm2, %ymm1, %ymm0
+; AVX512VLVBMI2-NEXT: retq
+;
; XOPAVX1-LABEL: splatvar_funnnel_v8i32:
; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[0,0,0,0]
@@ -1228,6 +1380,24 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i
; AVX512BW-NEXT: vmovdqa %ymm1, %ymm0
; AVX512BW-NEXT: retq
;
+; AVX512VBMI2-LABEL: splatvar_funnnel_v16i16:
+; AVX512VBMI2: # %bb.0:
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512VBMI2-NEXT: vpbroadcastw %xmm2, %ymm2
+; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512VBMI2-NEXT: vpand %ymm3, %ymm2, %ymm4
+; AVX512VBMI2-NEXT: vpmovzxwq {{.*#+}} xmm5 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
+; AVX512VBMI2-NEXT: vpsllw %xmm5, %ymm0, %ymm5
+; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm6 = [16,16,16,16,16,16,16,16]
+; AVX512VBMI2-NEXT: vpsubw %xmm4, %xmm6, %xmm4
+; AVX512VBMI2-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
+; AVX512VBMI2-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
+; AVX512VBMI2-NEXT: vpor %ymm1, %ymm5, %ymm1
+; AVX512VBMI2-NEXT: vptestnmw %zmm3, %zmm2, %k1
+; AVX512VBMI2-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1}
+; AVX512VBMI2-NEXT: vmovdqa %ymm1, %ymm0
+; AVX512VBMI2-NEXT: retq
+;
; AVX512VLBW-LABEL: splatvar_funnnel_v16i16:
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vpbroadcastw %xmm2, %ymm2
@@ -1245,6 +1415,12 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i
; AVX512VLBW-NEXT: vmovdqa %ymm1, %ymm0
; AVX512VLBW-NEXT: retq
;
+; AVX512VLVBMI2-LABEL: splatvar_funnnel_v16i16:
+; AVX512VLVBMI2: # %bb.0:
+; AVX512VLVBMI2-NEXT: vpbroadcastw %xmm2, %ymm2
+; AVX512VLVBMI2-NEXT: vpshldvw %ymm2, %ymm1, %ymm0
+; AVX512VLVBMI2-NEXT: retq
+;
; XOPAVX1-LABEL: splatvar_funnnel_v16i16:
; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,0,2,3,4,5,6,7]
@@ -1430,6 +1606,27 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %
; AVX512BW-NEXT: vmovdqa %ymm1, %ymm0
; AVX512BW-NEXT: retq
;
+; AVX512VBMI2-LABEL: splatvar_funnnel_v32i8:
+; AVX512VBMI2: # %bb.0:
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512VBMI2-NEXT: vpbroadcastb %xmm2, %ymm2
+; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
+; AVX512VBMI2-NEXT: vpand %ymm3, %ymm2, %ymm4
+; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} zmm5 = ymm4[0],zero,ymm4[1],zero,ymm4[2],zero,ymm4[3],zero,ymm4[4],zero,ymm4[5],zero,ymm4[6],zero,ymm4[7],zero,ymm4[8],zero,ymm4[9],zero,ymm4[10],zero,ymm4[11],zero,ymm4[12],zero,ymm4[13],zero,ymm4[14],zero,ymm4[15],zero,ymm4[16],zero,ymm4[17],zero,ymm4[18],zero,ymm4[19],zero,ymm4[20],zero,ymm4[21],zero,ymm4[22],zero,ymm4[23],zero,ymm4[24],zero,ymm4[25],zero,ymm4[26],zero,ymm4[27],zero,ymm4[28],zero,ymm4[29],zero,ymm4[30],zero,ymm4[31],zero
+; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} zmm6 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
+; AVX512VBMI2-NEXT: vpsllvw %zmm5, %zmm6, %zmm5
+; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm6 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
+; AVX512VBMI2-NEXT: vpsubb %ymm4, %ymm6, %ymm4
+; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} zmm4 = ymm4[0],zero,ymm4[1],zero,ymm4[2],zero,ymm4[3],zero,ymm4[4],zero,ymm4[5],zero,ymm4[6],zero,ymm4[7],zero,ymm4[8],zero,ymm4[9],zero,ymm4[10],zero,ymm4[11],zero,ymm4[12],zero,ymm4[13],zero,ymm4[14],zero,ymm4[15],zero,ymm4[16],zero,ymm4[17],zero,ymm4[18],zero,ymm4[19],zero,ymm4[20],zero,ymm4[21],zero,ymm4[22],zero,ymm4[23],zero,ymm4[24],zero,ymm4[25],zero,ymm4[26],zero,ymm4[27],zero,ymm4[28],zero,ymm4[29],zero,ymm4[30],zero,ymm4[31],zero
+; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
+; AVX512VBMI2-NEXT: vpsrlvw %zmm4, %zmm1, %zmm1
+; AVX512VBMI2-NEXT: vporq %zmm1, %zmm5, %zmm1
+; AVX512VBMI2-NEXT: vpmovwb %zmm1, %ymm1
+; AVX512VBMI2-NEXT: vptestnmb %zmm3, %zmm2, %k1
+; AVX512VBMI2-NEXT: vmovdqu8 %zmm0, %zmm1 {%k1}
+; AVX512VBMI2-NEXT: vmovdqa %ymm1, %ymm0
+; AVX512VBMI2-NEXT: retq
+;
; AVX512VLBW-LABEL: splatvar_funnnel_v32i8:
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vpbroadcastb %xmm2, %ymm2
@@ -1450,6 +1647,26 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %
; AVX512VLBW-NEXT: vmovdqa %ymm1, %ymm0
; AVX512VLBW-NEXT: retq
;
+; AVX512VLVBMI2-LABEL: splatvar_funnnel_v32i8:
+; AVX512VLVBMI2: # %bb.0:
+; AVX512VLVBMI2-NEXT: vpbroadcastb %xmm2, %ymm2
+; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} ymm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
+; AVX512VLVBMI2-NEXT: vpand %ymm3, %ymm2, %ymm4
+; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} zmm5 = ymm4[0],zero,ymm4[1],zero,ymm4[2],zero,ymm4[3],zero,ymm4[4],zero,ymm4[5],zero,ymm4[6],zero,ymm4[7],zero,ymm4[8],zero,ymm4[9],zero,ymm4[10],zero,ymm4[11],zero,ymm4[12],zero,ymm4[13],zero,ymm4[14],zero,ymm4[15],zero,ymm4[16],zero,ymm4[17],zero,ymm4[18],zero,ymm4[19],zero,ymm4[20],zero,ymm4[21],zero,ymm4[22],zero,ymm4[23],zero,ymm4[24],zero,ymm4[25],zero,ymm4[26],zero,ymm4[27],zero,ymm4[28],zero,ymm4[29],zero,ymm4[30],zero,ymm4[31],zero
+; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} zmm6 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
+; AVX512VLVBMI2-NEXT: vpsllvw %zmm5, %zmm6, %zmm5
+; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} ymm6 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
+; AVX512VLVBMI2-NEXT: vpsubb %ymm4, %ymm6, %ymm4
+; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} zmm4 = ymm4[0],zero,ymm4[1],zero,ymm4[2],zero,ymm4[3],zero,ymm4[4],zero,ymm4[5],zero,ymm4[6],zero,ymm4[7],zero,ymm4[8],zero,ymm4[9],zero,ymm4[10],zero,ymm4[11],zero,ymm4[12],zero,ymm4[13],zero,ymm4[14],zero,ymm4[15],zero,ymm4[16],zero,ymm4[17],zero,ymm4[18],zero,ymm4[19],zero,ymm4[20],zero,ymm4[21],zero,ymm4[22],zero,ymm4[23],zero,ymm4[24],zero,ymm4[25],zero,ymm4[26],zero,ymm4[27],zero,ymm4[28],zero,ymm4[29],zero,ymm4[30],zero,ymm4[31],zero
+; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
+; AVX512VLVBMI2-NEXT: vpsrlvw %zmm4, %zmm1, %zmm1
+; AVX512VLVBMI2-NEXT: vporq %zmm1, %zmm5, %zmm1
+; AVX512VLVBMI2-NEXT: vpmovwb %zmm1, %ymm1
+; AVX512VLVBMI2-NEXT: vptestnmb %ymm3, %ymm2, %k1
+; AVX512VLVBMI2-NEXT: vmovdqu8 %ymm0, %ymm1 {%k1}
+; AVX512VLVBMI2-NEXT: vmovdqa %ymm1, %ymm0
+; AVX512VLVBMI2-NEXT: retq
+;
; XOPAVX1-LABEL: splatvar_funnnel_v32i8:
; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vpxor %xmm8, %xmm8, %xmm8
@@ -1537,12 +1754,45 @@ define <4 x i64> @constant_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y) nounwind {
; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
-; AVX512-LABEL: constant_funnnel_v4i64:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpsrlvq {{.*}}(%rip), %ymm1, %ymm1
-; AVX512-NEXT: vpsllvq {{.*}}(%rip), %ymm0, %ymm0
-; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: retq
+; AVX512F-LABEL: constant_funnnel_v4i64:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vpsrlvq {{.*}}(%rip), %ymm1, %ymm1
+; AVX512F-NEXT: vpsllvq {{.*}}(%rip), %ymm0, %ymm0
+; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: constant_funnnel_v4i64:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpsrlvq {{.*}}(%rip), %ymm1, %ymm1
+; AVX512VL-NEXT: vpsllvq {{.*}}(%rip), %ymm0, %ymm0
+; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT: retq
+;
+; AVX512BW-LABEL: constant_funnnel_v4i64:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpsrlvq {{.*}}(%rip), %ymm1, %ymm1
+; AVX512BW-NEXT: vpsllvq {{.*}}(%rip), %ymm0, %ymm0
+; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT: retq
+;
+; AVX512VBMI2-LABEL: constant_funnnel_v4i64:
+; AVX512VBMI2: # %bb.0:
+; AVX512VBMI2-NEXT: vpsrlvq {{.*}}(%rip), %ymm1, %ymm1
+; AVX512VBMI2-NEXT: vpsllvq {{.*}}(%rip), %ymm0, %ymm0
+; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VBMI2-NEXT: retq
+;
+; AVX512VLBW-LABEL: constant_funnnel_v4i64:
+; AVX512VLBW: # %bb.0:
+; AVX512VLBW-NEXT: vpsrlvq {{.*}}(%rip), %ymm1, %ymm1
+; AVX512VLBW-NEXT: vpsllvq {{.*}}(%rip), %ymm0, %ymm0
+; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VLBW-NEXT: retq
+;
+; AVX512VLVBMI2-LABEL: constant_funnnel_v4i64:
+; AVX512VLVBMI2: # %bb.0:
+; AVX512VLVBMI2-NEXT: vpshldvq {{.*}}(%rip), %ymm1, %ymm0
+; AVX512VLVBMI2-NEXT: retq
;
; XOPAVX1-LABEL: constant_funnnel_v4i64:
; XOPAVX1: # %bb.0:
@@ -1600,12 +1850,45 @@ define <8 x i32> @constant_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y) nounwind {
; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
-; AVX512-LABEL: constant_funnnel_v8i32:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpsrlvd {{.*}}(%rip), %ymm1, %ymm1
-; AVX512-NEXT: vpsllvd {{.*}}(%rip), %ymm0, %ymm0
-; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: retq
+; AVX512F-LABEL: constant_funnnel_v8i32:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vpsrlvd {{.*}}(%rip), %ymm1, %ymm1
+; AVX512F-NEXT: vpsllvd {{.*}}(%rip), %ymm0, %ymm0
+; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: constant_funnnel_v8i32:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpsrlvd {{.*}}(%rip), %ymm1, %ymm1
+; AVX512VL-NEXT: vpsllvd {{.*}}(%rip), %ymm0, %ymm0
+; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT: retq
+;
+; AVX512BW-LABEL: constant_funnnel_v8i32:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpsrlvd {{.*}}(%rip), %ymm1, %ymm1
+; AVX512BW-NEXT: vpsllvd {{.*}}(%rip), %ymm0, %ymm0
+; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT: retq
+;
+; AVX512VBMI2-LABEL: constant_funnnel_v8i32:
+; AVX512VBMI2: # %bb.0:
+; AVX512VBMI2-NEXT: vpsrlvd {{.*}}(%rip), %ymm1, %ymm1
+; AVX512VBMI2-NEXT: vpsllvd {{.*}}(%rip), %ymm0, %ymm0
+; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VBMI2-NEXT: retq
+;
+; AVX512VLBW-LABEL: constant_funnnel_v8i32:
+; AVX512VLBW: # %bb.0:
+; AVX512VLBW-NEXT: vpsrlvd {{.*}}(%rip), %ymm1, %ymm1
+; AVX512VLBW-NEXT: vpsllvd {{.*}}(%rip), %ymm0, %ymm0
+; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VLBW-NEXT: retq
+;
+; AVX512VLVBMI2-LABEL: constant_funnnel_v8i32:
+; AVX512VLVBMI2: # %bb.0:
+; AVX512VLVBMI2-NEXT: vpshldvd {{.*}}(%rip), %ymm1, %ymm0
+; AVX512VLVBMI2-NEXT: retq
;
; XOPAVX1-LABEL: constant_funnnel_v8i32:
; XOPAVX1: # %bb.0:
@@ -1693,6 +1976,19 @@ define <16 x i16> @constant_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y) nounwin
; AVX512BW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; AVX512BW-NEXT: retq
;
+; AVX512VBMI2-LABEL: constant_funnnel_v16i16:
+; AVX512VBMI2: # %bb.0:
+; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm2 = [16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1]
+; AVX512VBMI2-NEXT: vpsrlvw %zmm2, %zmm1, %zmm1
+; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; AVX512VBMI2-NEXT: vpsllvw %zmm2, %zmm0, %zmm2
+; AVX512VBMI2-NEXT: vpor %ymm1, %ymm2, %ymm1
+; AVX512VBMI2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7],ymm0[8],ymm1[9,10,11,12,13,14,15]
+; AVX512VBMI2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
+; AVX512VBMI2-NEXT: retq
+;
; AVX512VLBW-LABEL: constant_funnnel_v16i16:
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vpsrlvw {{.*}}(%rip), %ymm1, %ymm1
@@ -1702,6 +1998,11 @@ define <16 x i16> @constant_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y) nounwin
; AVX512VLBW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; AVX512VLBW-NEXT: retq
;
+; AVX512VLVBMI2-LABEL: constant_funnnel_v16i16:
+; AVX512VLVBMI2: # %bb.0:
+; AVX512VLVBMI2-NEXT: vpshldvw {{.*}}(%rip), %ymm1, %ymm0
+; AVX512VLVBMI2-NEXT: retq
+;
; XOPAVX1-LABEL: constant_funnnel_v16i16:
; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vpshlw {{.*}}(%rip), %xmm1, %xmm2
@@ -1866,6 +2167,18 @@ define <32 x i8> @constant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwind {
; AVX512BW-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
; AVX512BW-NEXT: retq
;
+; AVX512VBMI2-LABEL: constant_funnnel_v32i8:
+; AVX512VBMI2: # %bb.0:
+; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
+; AVX512VBMI2-NEXT: vpsrlvw {{.*}}(%rip), %zmm1, %zmm1
+; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} zmm2 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
+; AVX512VBMI2-NEXT: vpsllvw {{.*}}(%rip), %zmm2, %zmm2
+; AVX512VBMI2-NEXT: vporq %zmm1, %zmm2, %zmm1
+; AVX512VBMI2-NEXT: vpmovwb %zmm1, %ymm1
+; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255]
+; AVX512VBMI2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; AVX512VBMI2-NEXT: retq
+;
; AVX512VLBW-LABEL: constant_funnnel_v32i8:
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
@@ -1880,6 +2193,20 @@ define <32 x i8> @constant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwind {
; AVX512VLBW-NEXT: vmovdqa %ymm1, %ymm0
; AVX512VLBW-NEXT: retq
;
+; AVX512VLVBMI2-LABEL: constant_funnnel_v32i8:
+; AVX512VLVBMI2: # %bb.0:
+; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
+; AVX512VLVBMI2-NEXT: vpsrlvw {{.*}}(%rip), %zmm1, %zmm1
+; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} zmm2 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
+; AVX512VLVBMI2-NEXT: vpsllvw {{.*}}(%rip), %zmm2, %zmm2
+; AVX512VLVBMI2-NEXT: vporq %zmm1, %zmm2, %zmm1
+; AVX512VLVBMI2-NEXT: vpmovwb %zmm1, %ymm1
+; AVX512VLVBMI2-NEXT: movl $16843009, %eax # imm = 0x1010101
+; AVX512VLVBMI2-NEXT: kmovd %eax, %k1
+; AVX512VLVBMI2-NEXT: vmovdqu8 %ymm0, %ymm1 {%k1}
+; AVX512VLVBMI2-NEXT: vmovdqa %ymm1, %ymm0
+; AVX512VLVBMI2-NEXT: retq
+;
; XOPAVX1-LABEL: constant_funnnel_v32i8:
; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
@@ -1942,12 +2269,45 @@ define <4 x i64> @splatconstant_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y) nounwi
; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
-; AVX512-LABEL: splatconstant_funnnel_v4i64:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpsrlq $50, %ymm1, %ymm1
-; AVX512-NEXT: vpsllq $14, %ymm0, %ymm0
-; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: retq
+; AVX512F-LABEL: splatconstant_funnnel_v4i64:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vpsrlq $50, %ymm1, %ymm1
+; AVX512F-NEXT: vpsllq $14, %ymm0, %ymm0
+; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: splatconstant_funnnel_v4i64:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpsrlq $50, %ymm1, %ymm1
+; AVX512VL-NEXT: vpsllq $14, %ymm0, %ymm0
+; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT: retq
+;
+; AVX512BW-LABEL: splatconstant_funnnel_v4i64:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpsrlq $50, %ymm1, %ymm1
+; AVX512BW-NEXT: vpsllq $14, %ymm0, %ymm0
+; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT: retq
+;
+; AVX512VBMI2-LABEL: splatconstant_funnnel_v4i64:
+; AVX512VBMI2: # %bb.0:
+; AVX512VBMI2-NEXT: vpsrlq $50, %ymm1, %ymm1
+; AVX512VBMI2-NEXT: vpsllq $14, %ymm0, %ymm0
+; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VBMI2-NEXT: retq
+;
+; AVX512VLBW-LABEL: splatconstant_funnnel_v4i64:
+; AVX512VLBW: # %bb.0:
+; AVX512VLBW-NEXT: vpsrlq $50, %ymm1, %ymm1
+; AVX512VLBW-NEXT: vpsllq $14, %ymm0, %ymm0
+; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VLBW-NEXT: retq
+;
+; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v4i64:
+; AVX512VLVBMI2: # %bb.0:
+; AVX512VLVBMI2-NEXT: vpshldq $14, %ymm1, %ymm0, %ymm0
+; AVX512VLVBMI2-NEXT: retq
;
; XOPAVX1-LABEL: splatconstant_funnnel_v4i64:
; XOPAVX1: # %bb.0:
@@ -1993,12 +2353,45 @@ define <8 x i32> @splatconstant_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y) nounwi
; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
-; AVX512-LABEL: splatconstant_funnnel_v8i32:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpsrld $28, %ymm1, %ymm1
-; AVX512-NEXT: vpslld $4, %ymm0, %ymm0
-; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: retq
+; AVX512F-LABEL: splatconstant_funnnel_v8i32:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vpsrld $28, %ymm1, %ymm1
+; AVX512F-NEXT: vpslld $4, %ymm0, %ymm0
+; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: splatconstant_funnnel_v8i32:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpsrld $28, %ymm1, %ymm1
+; AVX512VL-NEXT: vpslld $4, %ymm0, %ymm0
+; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT: retq
+;
+; AVX512BW-LABEL: splatconstant_funnnel_v8i32:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpsrld $28, %ymm1, %ymm1
+; AVX512BW-NEXT: vpslld $4, %ymm0, %ymm0
+; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT: retq
+;
+; AVX512VBMI2-LABEL: splatconstant_funnnel_v8i32:
+; AVX512VBMI2: # %bb.0:
+; AVX512VBMI2-NEXT: vpsrld $28, %ymm1, %ymm1
+; AVX512VBMI2-NEXT: vpslld $4, %ymm0, %ymm0
+; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VBMI2-NEXT: retq
+;
+; AVX512VLBW-LABEL: splatconstant_funnnel_v8i32:
+; AVX512VLBW: # %bb.0:
+; AVX512VLBW-NEXT: vpsrld $28, %ymm1, %ymm1
+; AVX512VLBW-NEXT: vpslld $4, %ymm0, %ymm0
+; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VLBW-NEXT: retq
+;
+; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v8i32:
+; AVX512VLVBMI2: # %bb.0:
+; AVX512VLVBMI2-NEXT: vpshldd $4, %ymm1, %ymm0, %ymm0
+; AVX512VLVBMI2-NEXT: retq
;
; XOPAVX1-LABEL: splatconstant_funnnel_v8i32:
; XOPAVX1: # %bb.0:
@@ -2044,12 +2437,45 @@ define <16 x i16> @splatconstant_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y) no
; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
-; AVX512-LABEL: splatconstant_funnnel_v16i16:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpsrlw $9, %ymm1, %ymm1
-; AVX512-NEXT: vpsllw $7, %ymm0, %ymm0
-; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: retq
+; AVX512F-LABEL: splatconstant_funnnel_v16i16:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vpsrlw $9, %ymm1, %ymm1
+; AVX512F-NEXT: vpsllw $7, %ymm0, %ymm0
+; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: splatconstant_funnnel_v16i16:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpsrlw $9, %ymm1, %ymm1
+; AVX512VL-NEXT: vpsllw $7, %ymm0, %ymm0
+; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT: retq
+;
+; AVX512BW-LABEL: splatconstant_funnnel_v16i16:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpsrlw $9, %ymm1, %ymm1
+; AVX512BW-NEXT: vpsllw $7, %ymm0, %ymm0
+; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT: retq
+;
+; AVX512VBMI2-LABEL: splatconstant_funnnel_v16i16:
+; AVX512VBMI2: # %bb.0:
+; AVX512VBMI2-NEXT: vpsrlw $9, %ymm1, %ymm1
+; AVX512VBMI2-NEXT: vpsllw $7, %ymm0, %ymm0
+; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VBMI2-NEXT: retq
+;
+; AVX512VLBW-LABEL: splatconstant_funnnel_v16i16:
+; AVX512VLBW: # %bb.0:
+; AVX512VLBW-NEXT: vpsrlw $9, %ymm1, %ymm1
+; AVX512VLBW-NEXT: vpsllw $7, %ymm0, %ymm0
+; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VLBW-NEXT: retq
+;
+; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v16i16:
+; AVX512VLVBMI2: # %bb.0:
+; AVX512VLVBMI2-NEXT: vpshldw $7, %ymm1, %ymm0, %ymm0
+; AVX512VLVBMI2-NEXT: retq
;
; XOPAVX1-LABEL: splatconstant_funnnel_v16i16:
; XOPAVX1: # %bb.0:
OpenPOWER on IntegriCloud