summaryrefslogtreecommitdiffstats
path: root/llvm/test
diff options
context:
space:
mode:
authorIgor Breger <igor.breger@intel.com>2016-02-07 08:30:50 +0000
committerIgor Breger <igor.breger@intel.com>2016-02-07 08:30:50 +0000
commit0aeda3746443657cb1c0df4da18137e4a49659f5 (patch)
treee653e9fdefdf589e00aef590dc44264809f2757c /llvm/test
parent46bafbd0fe5236fb3d17851f296e7b3437929c35 (diff)
downloadbcm5719-llvm-0aeda3746443657cb1c0df4da18137e4a49659f5.tar.gz
bcm5719-llvm-0aeda3746443657cb1c0df4da18137e4a49659f5.zip
AVX512: VPBROADCASTB/W/D/Q from GPR intrinsics implementation.
Differential Revision: http://reviews.llvm.org/D16813 llvm-svn: 260024
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/CodeGen/X86/avx-isa-check.ll75
-rw-r--r--llvm/test/CodeGen/X86/avx512-intrinsics.ll59
-rw-r--r--llvm/test/CodeGen/X86/avx512bw-intrinsics.ll64
-rw-r--r--llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll79
-rw-r--r--llvm/test/CodeGen/X86/avx512vl-intrinsics.ll79
-rw-r--r--llvm/test/CodeGen/X86/masked_gather_scatter.ll3
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-v1.ll54
7 files changed, 363 insertions, 50 deletions
diff --git a/llvm/test/CodeGen/X86/avx-isa-check.ll b/llvm/test/CodeGen/X86/avx-isa-check.ll
index 7664b7d2093..7a7959d29bb 100644
--- a/llvm/test/CodeGen/X86/avx-isa-check.ll
+++ b/llvm/test/CodeGen/X86/avx-isa-check.ll
@@ -1,5 +1,6 @@
; check AVX2 instructions that are disabled in case avx512VL/avx512BW present
-
+
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=corei7-avx -o /dev/null
; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=core-avx2 -mattr=+avx2 -o /dev/null
; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl -o /dev/null
; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl -mattr=+avx512vl -o /dev/null
@@ -576,6 +577,78 @@ entry:
ret <8 x i16> %C
}
+define <32 x i8> @_broadcast32xi8(i8 %a) {
+ %b = insertelement <32 x i8> undef, i8 %a, i32 0
+ %c = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer
+ ret <32 x i8> %c
+}
+
+define <16 x i8> @_broadcast16xi8(i8 %a) {
+ %b = insertelement <16 x i8> undef, i8 %a, i32 0
+ %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
+ ret <16 x i8> %c
+}
+
+define <16 x i16> @_broadcast16xi16(i16 %a) {
+ %b = insertelement <16 x i16> undef, i16 %a, i32 0
+ %c = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer
+ ret <16 x i16> %c
+}
+
+define <8 x i16> @_broadcast8xi16(i16 %a) {
+ %b = insertelement <8 x i16> undef, i16 %a, i32 0
+ %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
+ ret <8 x i16> %c
+}
+
+define <8 x i32> @_broadcast8xi32(i32 %a) {
+ %b = insertelement <8 x i32> undef, i32 %a, i32 0
+ %c = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer
+ ret <8 x i32> %c
+}
+
+define <4 x i32> @_broadcast4xi32(i32 %a) {
+ %b = insertelement <4 x i32> undef, i32 %a, i32 0
+ %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
+ ret <4 x i32> %c
+}
+
+define <4 x i64> @_broadcast4xi64(i64 %a) {
+ %b = insertelement <4 x i64> undef, i64 %a, i64 0
+ %c = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> zeroinitializer
+ ret <4 x i64> %c
+}
+
+define <2 x i64> @_broadcast2xi64(i64 %a) {
+ %b = insertelement <2 x i64> undef, i64 %a, i64 0
+ %c = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer
+ ret <2 x i64> %c
+}
+
+define <8 x float> @_broadcast8xfloat(float %a) {
+ %b = insertelement <8 x float> undef, float %a, i32 0
+ %c = shufflevector <8 x float> %b, <8 x float> undef, <8 x i32> zeroinitializer
+ ret <8 x float> %c
+}
+
+define <4 x float> @_broadcast4xfloat(float %a) {
+ %b = insertelement <4 x float> undef, float %a, i32 0
+ %c = shufflevector <4 x float> %b, <4 x float> undef, <4 x i32> zeroinitializer
+ ret <4 x float> %c
+}
+
+define <4 x double> @_broadcast4xdouble(double %a) {
+ %b = insertelement <4 x double> undef, double %a, i32 0
+ %c = shufflevector <4 x double> %b, <4 x double> undef, <4 x i32> zeroinitializer
+ ret <4 x double> %c
+}
+
+define <2 x double> @_broadcast2xdouble(double %a) {
+ %b = insertelement <2 x double> undef, double %a, i32 0
+ %c = shufflevector <2 x double> %b, <2 x double> undef, <2 x i32> zeroinitializer
+ ret <2 x double> %c
+}
+
define <4 x float> @test_x86_fmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
%x = fmul <4 x float> %a0, %a1
%res = fsub <4 x float> %x, %a2
diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics.ll b/llvm/test/CodeGen/X86/avx512-intrinsics.ll
index c21379f0840..756a3b5e510 100644
--- a/llvm/test/CodeGen/X86/avx512-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512-intrinsics.ll
@@ -574,16 +574,6 @@ define <16 x i32>@test_int_x86_avx512_pbroadcastd_512(<4 x i32> %x0, <16 x i32>
}
declare <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32>, <16 x i32>, i16)
-define <16 x i32> @test_x86_pbroadcastd_i32_512(i32 %a0) {
-; CHECK-LABEL: test_x86_pbroadcastd_i32_512:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpbroadcastd %edi, %zmm0
-; CHECK-NEXT: retq
- %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.i32.512(i32 %a0) ; <<16 x i32>> [#uses=1]
- ret <16 x i32> %res
-}
-declare <16 x i32> @llvm.x86.avx512.pbroadcastd.i32.512(i32) nounwind readonly
-
define <8 x i64>@test_int_x86_avx512_pbroadcastq_512(<2 x i64> %x0, <8 x i64> %x1, i8 %mask) {
; CHECK-LABEL: test_int_x86_avx512_pbroadcastq_512:
; CHECK: ## BB#0:
@@ -603,16 +593,6 @@ define <8 x i64>@test_int_x86_avx512_pbroadcastq_512(<2 x i64> %x0, <8 x i64> %x
}
declare <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64>, <8 x i64>, i8)
-define <8 x i64> @test_x86_pbroadcastq_i64_512(i64 %a0) {
-; CHECK-LABEL: test_x86_pbroadcastq_i64_512:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpbroadcastq %rdi, %zmm0
-; CHECK-NEXT: retq
- %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.i64.512(i64 %a0) ; <<8 x i64>> [#uses=1]
- ret <8 x i64> %res
-}
-declare <8 x i64> @llvm.x86.avx512.pbroadcastq.i64.512(i64) nounwind readonly
-
define <16 x i32> @test_conflict_d(<16 x i32> %a) {
; CHECK-LABEL: test_conflict_d:
; CHECK: ## BB#0:
@@ -7357,6 +7337,45 @@ define i8@test_int_x86_avx512_ptestnm_q_512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2
ret i8 %res2
}
+define <16 x i32>@test_int_x86_avx512_mask_pbroadcastd_gpr_512(i32 %x0, <16 x i32> %x1, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcastd_gpr_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpbroadcastd %edi, %zmm0 {%k1}
+; CHECK-NEXT: vpbroadcastd %edi, %zmm1 {%k1} {z}
+; CHECK-NEXT: vpbroadcastd %edi, %zmm2
+; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
+; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.512(i32 %x0, <16 x i32> %x1, i16 -1)
+ %res1 = call <16 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.512(i32 %x0, <16 x i32> %x1, i16 %mask)
+ %res2 = call <16 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.512(i32 %x0, <16 x i32> zeroinitializer, i16 %mask)
+ %res3 = add <16 x i32> %res, %res1
+ %res4 = add <16 x i32> %res2, %res3
+ ret <16 x i32> %res4
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.512(i32, <16 x i32>, i16)
+
+define <8 x i64>@test_int_x86_avx512_mask_pbroadcastq_gpr_512(i64 %x0, <8 x i64> %x1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcastq_gpr_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpbroadcastq %rdi, %zmm0 {%k1}
+; CHECK-NEXT: vpbroadcastq %rdi, %zmm1 {%k1} {z}
+; CHECK-NEXT: vpbroadcastq %rdi, %zmm2
+; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
+; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.512(i64 %x0, <8 x i64> %x1,i8 -1)
+ %res1 = call <8 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.512(i64 %x0, <8 x i64> %x1,i8 %mask)
+ %res2 = call <8 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.512(i64 %x0, <8 x i64> zeroinitializer,i8 %mask)
+ %res3 = add <8 x i64> %res, %res1
+ %res4 = add <8 x i64> %res2, %res3
+ ret <8 x i64> %res4
+}
+declare <8 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.512(i64, <8 x i64>, i8)
+
declare <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32)
define <2 x double>@test_int_x86_avx512_mask_vfmadd_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){
diff --git a/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll b/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll
index 26a80e3f4a6..5592f7f61ac 100644
--- a/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll
@@ -3516,3 +3516,67 @@ define i32@test_int_x86_avx512_ptestnm_w_512(<32 x i16> %x0, <32 x i16> %x1, i32
%res2 = add i32 %res, %res1
ret i32 %res2
}
+
+declare <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8, <64 x i8>, i64)
+
+define <64 x i8>@test_int_x86_avx512_mask_pbroadcast_b_gpr_512(i8 %x0, <64 x i8> %x1, i64 %mask) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovq %rsi, %k1
+; AVX512BW-NEXT: vpbroadcastb %dil, %zmm0 {%k1}
+; AVX512BW-NEXT: vpbroadcastb %dil, %zmm1 {%k1} {z}
+; AVX512BW-NEXT: vpbroadcastb %dil, %zmm2
+; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0
+; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: movb {{[0-9]+}}(%esp), %al
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
+; AVX512F-32-NEXT: vpbroadcastb %al, %zmm1 {%k1} {z}
+; AVX512F-32-NEXT: vpbroadcastb %al, %zmm0 {%k1}
+; AVX512F-32-NEXT: vpbroadcastb %al, %zmm2
+; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0
+; AVX512F-32-NEXT: vpaddb %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
+ %res = call <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8 %x0, <64 x i8> %x1, i64 -1)
+ %res1 = call <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8 %x0, <64 x i8> %x1, i64 %mask)
+ %res2 = call <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8 %x0, <64 x i8> zeroinitializer, i64 %mask)
+ %res3 = add <64 x i8> %res, %res1
+ %res4 = add <64 x i8> %res2, %res3
+ ret <64 x i8> %res4
+}
+
+declare <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16, <32 x i16>, i32)
+
+define <32 x i16>@test_int_x86_avx512_mask_pbroadcast_w_gpr_512(i16 %x0, <32 x i16> %x1, i32 %mask) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %esi, %k1
+; AVX512BW-NEXT: vpbroadcastw %di, %zmm0 {%k1}
+; AVX512BW-NEXT: vpbroadcastw %di, %zmm1 {%k1} {z}
+; AVX512BW-NEXT: vpbroadcastw %di, %zmm2
+; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
+; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: movw {{[0-9]+}}(%esp), %ax
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: vpbroadcastw %ax, %zmm0 {%k1}
+; AVX512F-32-NEXT: vpbroadcastw %ax, %zmm1 {%k1} {z}
+; AVX512F-32-NEXT: vpbroadcastw %ax, %zmm2
+; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0
+; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
+ %res = call <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16 %x0, <32 x i16> %x1, i32 -1)
+ %res1 = call <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16 %x0, <32 x i16> %x1, i32 %mask)
+ %res2 = call <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16 %x0, <32 x i16> zeroinitializer, i32 %mask)
+ %res3 = add <32 x i16> %res, %res1
+ %res4 = add <32 x i16> %res2, %res3
+ ret <32 x i16> %res4
+}
diff --git a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll
index 16fc0d51ac3..ad16ba1845d 100644
--- a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll
@@ -5450,3 +5450,82 @@ define i16@test_int_x86_avx512_ptestnm_w_256(<16 x i16> %x0, <16 x i16> %x1, i16
ret i16 %res2
}
+declare <32 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.256(i8, <32 x i8>, i32)
+
+define <32 x i8>@test_int_x86_avx512_mask_pbroadcast_b_gpr_256(i8 %x0, <32 x i8> %x1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vpbroadcastb %dil, %ymm0 {%k1}
+; CHECK-NEXT: vpbroadcastb %dil, %ymm1 {%k1} {z}
+; CHECK-NEXT: vpbroadcastb %dil, %ymm2
+; CHECK-NEXT: vpaddb %ymm0, %ymm2, %ymm0
+; CHECK-NEXT: vpaddb %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <32 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.256(i8 %x0, <32 x i8> %x1, i32 -1)
+ %res1 = call <32 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.256(i8 %x0, <32 x i8> %x1, i32 %mask)
+ %res2 = call <32 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.256(i8 %x0, <32 x i8> zeroinitializer, i32 %mask)
+ %res3 = add <32 x i8> %res, %res1
+ %res4 = add <32 x i8> %res2, %res3
+ ret <32 x i8> %res4
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8, <16 x i8>, i16)
+
+define <16 x i8>@test_int_x86_avx512_mask_pbroadcast_b_gpr_128(i8 %x0, <16 x i8> %x1, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpbroadcastb %dil, %xmm0 {%k1}
+; CHECK-NEXT: vpbroadcastb %dil, %xmm1 {%k1} {z}
+; CHECK-NEXT: vpbroadcastb %dil, %xmm2
+; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0
+; CHECK-NEXT: vpaddb %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8 %x0, <16 x i8> %x1, i16 -1)
+ %res1 = call <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8 %x0, <16 x i8> %x1, i16 %mask)
+ %res2 = call <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8 %x0, <16 x i8> zeroinitializer, i16 %mask)
+ %res3 = add <16 x i8> %res, %res1
+ %res4 = add <16 x i8> %res2, %res3
+ ret <16 x i8> %res4
+}
+
+declare <16 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.256(i16, <16 x i16>, i16)
+
+define <16 x i16>@test_int_x86_avx512_mask_pbroadcast_w_gpr_256(i16 %x0, <16 x i16> %x1, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpbroadcastw %di, %ymm0 {%k1}
+; CHECK-NEXT: vpbroadcastw %di, %ymm1 {%k1} {z}
+; CHECK-NEXT: vpbroadcastw %di, %ymm2
+; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0
+; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <16 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.256(i16 %x0, <16 x i16> %x1, i16 -1)
+ %res1 = call <16 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.256(i16 %x0, <16 x i16> %x1, i16 %mask)
+ %res2 = call <16 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.256(i16 %x0, <16 x i16> zeroinitializer, i16 %mask)
+ %res3 = add <16 x i16> %res, %res1
+ %res4 = add <16 x i16> %res2, %res3
+ ret <16 x i16> %res4
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.128(i16, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_pbroadcast_w_gpr_128(i16 %x0, <8 x i16> %x1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpbroadcastw %di, %xmm0 {%k1}
+; CHECK-NEXT: vpbroadcastw %di, %xmm1 {%k1} {z}
+; CHECK-NEXT: vpbroadcastw %di, %xmm2
+; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0
+; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.128(i16 %x0, <8 x i16> %x1, i8 -1)
+ %res1 = call <8 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.128(i16 %x0, <8 x i16> %x1, i8 %mask)
+ %res2 = call <8 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.128(i16 %x0, <8 x i16> zeroinitializer, i8 %mask)
+ %res3 = add <8 x i16> %res, %res1
+ %res4 = add <8 x i16> %res2, %res3
+ ret <8 x i16> %res4
+}
diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll
index 157adeee823..293ebdf915d 100644
--- a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll
@@ -8236,3 +8236,82 @@ define i8@test_int_x86_avx512_ptestnm_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2
ret i8 %res2
}
+declare <8 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.256(i32, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_mask_pbroadcast_d_gpr_256(i32 %x0, <8 x i32> %x1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcast_d_gpr_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpbroadcastd %edi, %ymm0 {%k1}
+; CHECK-NEXT: vpbroadcastd %edi, %ymm1 {%k1} {z}
+; CHECK-NEXT: vpbroadcastd %edi, %ymm2
+; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0
+; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.256(i32 %x0, <8 x i32> %x1, i8 -1)
+ %res1 = call <8 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.256(i32 %x0, <8 x i32> %x1, i8 %mask)
+ %res2 = call <8 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.256(i32 %x0, <8 x i32> zeroinitializer, i8 %mask)
+ %res3 = add <8 x i32> %res, %res1
+ %res4 = add <8 x i32> %res2, %res3
+ ret <8 x i32> %res4
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.128(i32, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_pbroadcast_d_gpr_128(i32 %x0, <4 x i32> %x1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcast_d_gpr_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpbroadcastd %edi, %xmm0 {%k1}
+; CHECK-NEXT: vpbroadcastd %edi, %xmm1 {%k1} {z}
+; CHECK-NEXT: vpbroadcastd %edi, %xmm2
+; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0
+; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.128(i32 %x0, <4 x i32> %x1, i8 -1)
+ %res1 = call <4 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.128(i32 %x0, <4 x i32> %x1, i8 %mask)
+ %res2 = call <4 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.128(i32 %x0, <4 x i32> zeroinitializer, i8 %mask)
+ %res3 = add <4 x i32> %res, %res1
+ %res4 = add <4 x i32> %res2, %res3
+ ret <4 x i32> %res4
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.256(i64, <4 x i64>, i8)
+
+define <4 x i64>@test_int_x86_avx512_mask_pbroadcast_q_gpr_256(i64 %x0, <4 x i64> %x1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcast_q_gpr_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpbroadcastq %rdi, %ymm0 {%k1}
+; CHECK-NEXT: vpbroadcastq %rdi, %ymm1 {%k1} {z}
+; CHECK-NEXT: vpbroadcastq %rdi, %ymm2
+; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0
+; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.256(i64 %x0, <4 x i64> %x1,i8 -1)
+ %res1 = call <4 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.256(i64 %x0, <4 x i64> %x1,i8 %mask)
+ %res2 = call <4 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.256(i64 %x0, <4 x i64> zeroinitializer,i8 %mask)
+ %res3 = add <4 x i64> %res, %res1
+ %res4 = add <4 x i64> %res2, %res3
+ ret <4 x i64> %res4
+}
+
+declare <2 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.128(i64, <2 x i64>, i8)
+
+define <2 x i64>@test_int_x86_avx512_mask_pbroadcast_q_gpr_128(i64 %x0, <2 x i64> %x1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcast_q_gpr_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpbroadcastq %rdi, %xmm0 {%k1}
+; CHECK-NEXT: vpbroadcastq %rdi, %xmm1 {%k1} {z}
+; CHECK-NEXT: vpbroadcastq %rdi, %xmm2
+; CHECK-NEXT: vpaddq %xmm0, %xmm2, %xmm0
+; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.128(i64 %x0, <2 x i64> %x1,i8 -1)
+ %res1 = call <2 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.128(i64 %x0, <2 x i64> %x1,i8 %mask)
+ %res2 = call <2 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.128(i64 %x0, <2 x i64> zeroinitializer,i8 %mask)
+ %res3 = add <2 x i64> %res, %res1
+ %res4 = add <2 x i64> %res2, %res3
+ ret <2 x i64> %res4
+}
diff --git a/llvm/test/CodeGen/X86/masked_gather_scatter.ll b/llvm/test/CodeGen/X86/masked_gather_scatter.ll
index 9f8e819cad5..9588467e96d 100644
--- a/llvm/test/CodeGen/X86/masked_gather_scatter.ll
+++ b/llvm/test/CodeGen/X86/masked_gather_scatter.ll
@@ -637,8 +637,7 @@ define <16 x float> @test14(float* %base, i32 %ind, <16 x float*> %vec) {
; SKX-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm1
; SKX-NEXT: vinserti64x2 $0, %xmm1, %zmm0, %zmm0
; SKX-NEXT: vpbroadcastq %xmm0, %zmm0
-; SKX-NEXT: vmovd %esi, %xmm1
-; SKX-NEXT: vpbroadcastd %xmm1, %ymm1
+; SKX-NEXT: vpbroadcastd %esi, %ymm1
; SKX-NEXT: vpmovsxdq %ymm1, %zmm1
; SKX-NEXT: vpsllq $2, %zmm1, %zmm1
; SKX-NEXT: vpaddq %zmm1, %zmm0, %zmm0
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-v1.ll b/llvm/test/CodeGen/X86/vector-shuffle-v1.ll
index f9ad5a4cc45..8edc8193189 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-v1.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-v1.ll
@@ -74,13 +74,13 @@ define <8 x i1> @shuf8i1_3_6_1_0_3_7_7_0(<8 x i64> %a, <8 x i64> %b, <8 x i64> %
; AVX512F-LABEL: shuf8i1_3_6_1_0_3_7_7_0:
; AVX512F: # BB#0:
; AVX512F-NEXT: vpcmpeqq %zmm2, %zmm0, %k1
-; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0
-; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} {z}
-; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [3,6,1,0,3,7,7,0]
-; AVX512F-NEXT: vpermq %zmm1, %zmm2, %zmm1
-; AVX512F-NEXT: vpsllq $63, %zmm1, %zmm1
-; AVX512F-NEXT: vptestmq %zmm1, %zmm1, %k1
-; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
+; AVX512F-NEXT: movq {{.*}}(%rip), %rax
+; AVX512F-NEXT: vpbroadcastq %rax, %zmm0 {%k1} {z}
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,6,1,0,3,7,7,0]
+; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0
+; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1
+; AVX512F-NEXT: vpbroadcastq %rax, %zmm0 {%k1} {z}
; AVX512F-NEXT: vpmovqw %zmm0, %xmm0
; AVX512F-NEXT: retq
;
@@ -105,14 +105,14 @@ define <16 x i1> @shuf16i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0(<16 x i32> %a, <1
; AVX512F: # BB#0:
; AVX512F-NEXT: vpcmpeqd %zmm2, %zmm0, %k1
; AVX512F-NEXT: vpcmpeqd %zmm3, %zmm1, %k2
-; AVX512F-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0
-; AVX512F-NEXT: vmovdqa32 %zmm0, %zmm1 {%k2} {z}
-; AVX512F-NEXT: vmovdqa32 %zmm0, %zmm2 {%k1} {z}
-; AVX512F-NEXT: vmovdqa32 {{.*#+}} zmm3 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0]
-; AVX512F-NEXT: vpermt2d %zmm1, %zmm3, %zmm2
-; AVX512F-NEXT: vpslld $31, %zmm2, %zmm1
-; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k1
-; AVX512F-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
+; AVX512F-NEXT: movl {{.*}}(%rip), %eax
+; AVX512F-NEXT: vpbroadcastd %eax, %zmm0 {%k2} {z}
+; AVX512F-NEXT: vpbroadcastd %eax, %zmm1 {%k1} {z}
+; AVX512F-NEXT: vmovdqa32 {{.*#+}} zmm2 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0]
+; AVX512F-NEXT: vpermt2d %zmm0, %zmm2, %zmm1
+; AVX512F-NEXT: vpslld $31, %zmm1, %zmm0
+; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1
+; AVX512F-NEXT: vpbroadcastd %eax, %zmm0 {%k1} {z}
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
; AVX512F-NEXT: retq
;
@@ -163,13 +163,13 @@ define <8 x i1> @shuf8i1_u_2_u_u_2_u_2_u(i8 %a) {
; AVX512F-LABEL: shuf8i1_u_2_u_u_2_u_2_u:
; AVX512F: # BB#0:
; AVX512F-NEXT: kmovw %edi, %k1
-; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0
-; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} {z}
-; AVX512F-NEXT: vextracti32x4 $1, %zmm1, %xmm1
-; AVX512F-NEXT: vpbroadcastq %xmm1, %zmm1
-; AVX512F-NEXT: vpsllq $63, %zmm1, %zmm1
-; AVX512F-NEXT: vptestmq %zmm1, %zmm1, %k1
-; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
+; AVX512F-NEXT: movq {{.*}}(%rip), %rax
+; AVX512F-NEXT: vpbroadcastq %rax, %zmm0 {%k1} {z}
+; AVX512F-NEXT: vextracti32x4 $1, %zmm0, %xmm0
+; AVX512F-NEXT: vpbroadcastq %xmm0, %zmm0
+; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0
+; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1
+; AVX512F-NEXT: vpbroadcastq %rax, %zmm0 {%k1} {z}
; AVX512F-NEXT: vpmovqw %zmm0, %xmm0
; AVX512F-NEXT: retq
;
@@ -310,12 +310,12 @@ define i8 @shuf8i1__9_6_1_10_3_7_7_1(i8 %a) {
; AVX512F-NEXT: kmovw %edi, %k1
; AVX512F-NEXT: movb $51, %al
; AVX512F-NEXT: kmovw %eax, %k2
-; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0
-; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm1 {%k2} {z}
-; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
+; AVX512F-NEXT: movq {{.*}}(%rip), %rax
+; AVX512F-NEXT: vpbroadcastq %rax, %zmm0 {%k2} {z}
+; AVX512F-NEXT: vpbroadcastq %rax, %zmm1 {%k1} {z}
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [9,6,1,0,3,7,7,1]
-; AVX512F-NEXT: vpermt2q %zmm0, %zmm2, %zmm1
-; AVX512F-NEXT: vpsllq $63, %zmm1, %zmm0
+; AVX512F-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
+; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0
; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: retq
OpenPOWER on IntegriCloud