summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen')
-rw-r--r--llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll24
-rw-r--r--llvm/test/CodeGen/X86/avx512bw-intrinsics.ll56
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll14
3 files changed, 70 insertions, 24 deletions
diff --git a/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll
index 1c52144a92f..507205ceb4f 100644
--- a/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll
+++ b/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll
@@ -968,3 +968,27 @@ define <32 x i16>@test_int_x86_avx512_mask_psll_wi_512(<32 x i16> %x0, i32 %x1,
ret <32 x i16> %res4
}
+declare <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
+
+define <64 x i8>@test_int_x86_avx512_mask_pshuf_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_pshuf_b_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vpshufb %zmm1, %zmm0, %zmm3
+; AVX512BW-NEXT: kmovq %rdi, %k1
+; AVX512BW-NEXT: vpshufb %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-NEXT: vpaddb %zmm3, %zmm2, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_pshuf_b_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vpshufb %zmm1, %zmm0, %zmm3
+; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: vpshufb %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512F-32-NEXT: vpaddb %zmm3, %zmm2, %zmm0
+; AVX512F-32-NEXT: retl
+ %res = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
+ %res1 = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
+ %res2 = add <64 x i8> %res, %res1
+ ret <64 x i8> %res2
+}
+
diff --git a/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll b/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll
index 151a1d55753..05b59aff9b5 100644
--- a/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll
@@ -1747,29 +1747,57 @@ define <32 x i16>@test_int_x86_avx512_mask_pavg_w_512(<32 x i16> %x0, <32 x i16>
ret <32 x i16> %res2
}
-declare <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
+declare <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8>, <64 x i8>)
-define <64 x i8>@test_int_x86_avx512_mask_pshuf_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
-; AVX512BW-LABEL: test_int_x86_avx512_mask_pshuf_b_512:
+define <64 x i8>@test_int_x86_avx512_pshuf_b_512(<64 x i8> %x0, <64 x i8> %x1) {
+; AVX512BW-LABEL: test_int_x86_avx512_pshuf_b_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vpshufb %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_pshuf_b_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vpshufb %zmm1, %zmm0, %zmm0
+; AVX512F-32-NEXT: retl
+ %res = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1)
+ ret <64 x i8> %res
+}
+
+define <64 x i8>@test_int_x86_avx512_pshuf_b_512_mask(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %mask) {
+; AVX512BW-LABEL: test_int_x86_avx512_pshuf_b_512_mask:
; AVX512BW: ## BB#0:
; AVX512BW-NEXT: kmovq %rdi, %k1
; AVX512BW-NEXT: vpshufb %zmm1, %zmm0, %zmm2 {%k1}
-; AVX512BW-NEXT: vpshufb %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0
+; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0
; AVX512BW-NEXT: retq
;
-; AVX512F-32-LABEL: test_int_x86_avx512_mask_pshuf_b_512:
+; AVX512F-32-LABEL: test_int_x86_avx512_pshuf_b_512_mask:
; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
-; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
-; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
+; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpshufb %zmm1, %zmm0, %zmm2 {%k1}
-; AVX512F-32-NEXT: vpshufb %zmm1, %zmm0, %zmm0
-; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0
+; AVX512F-32-NEXT: vmovdqa64 %zmm2, %zmm0
; AVX512F-32-NEXT: retl
- %res = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
- %res1 = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
- %res2 = add <64 x i8> %res, %res1
+ %res = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1)
+ %mask.cast = bitcast i64 %mask to <64 x i1>
+ %res2 = select <64 x i1> %mask.cast, <64 x i8> %res, <64 x i8> %x2
+ ret <64 x i8> %res2
+}
+
+define <64 x i8>@test_int_x86_avx512_pshuf_b_512_maskz(<64 x i8> %x0, <64 x i8> %x1, i64 %mask) {
+; AVX512BW-LABEL: test_int_x86_avx512_pshuf_b_512_maskz:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovq %rdi, %k1
+; AVX512BW-NEXT: vpshufb %zmm1, %zmm0, %zmm0 {%k1} {z}
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_pshuf_b_512_maskz:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: vpshufb %zmm1, %zmm0, %zmm0 {%k1} {z}
+; AVX512F-32-NEXT: retl
+ %res = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1)
+ %mask.cast = bitcast i64 %mask to <64 x i1>
+ %res2 = select <64 x i1> %mask.cast, <64 x i8> %res, <64 x i8> zeroinitializer
ret <64 x i8> %res2
}
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll
index dc9df248a76..c4568eaf722 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll
@@ -593,9 +593,7 @@ define <64 x i8> @combine_pshufb_identity_mask(<64 x i8> %x0, i64 %m) {
; X32: # BB#0:
; X32-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
; X32-NEXT: vmovdqu8 {{.*#+}} zmm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
-; X32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
-; X32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
-; X32-NEXT: kunpckdq %k0, %k1, %k1
+; X32-NEXT: kmovq {{[0-9]+}}(%esp), %k1
; X32-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3
; X32-NEXT: vpshufb %zmm2, %zmm0, %zmm3 {%k1}
; X32-NEXT: vpshufb %zmm2, %zmm3, %zmm1 {%k1}
@@ -604,9 +602,9 @@ define <64 x i8> @combine_pshufb_identity_mask(<64 x i8> %x0, i64 %m) {
;
; X64-LABEL: combine_pshufb_identity_mask:
; X64: # BB#0:
-; X64-NEXT: kmovq %rdi, %k1
; X64-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
; X64-NEXT: vmovdqu8 {{.*#+}} zmm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
+; X64-NEXT: kmovq %rdi, %k1
; X64-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3
; X64-NEXT: vpshufb %zmm2, %zmm0, %zmm3 {%k1}
; X64-NEXT: vpshufb %zmm2, %zmm3, %zmm1 {%k1}
@@ -759,9 +757,7 @@ define <64 x i8> @combine_pshufb_as_pslldq(<64 x i8> %a0) {
define <64 x i8> @combine_pshufb_as_pslldq_mask(<64 x i8> %a0, i64 %m) {
; X32-LABEL: combine_pshufb_as_pslldq_mask:
; X32: # BB#0:
-; X32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
-; X32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
-; X32-NEXT: kunpckdq %k0, %k1, %k1
+; X32-NEXT: kmovq {{[0-9]+}}(%esp), %k1
; X32-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[0,1,2,3,4,5],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[16,17,18,19,20,21],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[32,33,34,35,36,37],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[48,49,50,51,52,53]
; X32-NEXT: retl
;
@@ -790,9 +786,7 @@ define <64 x i8> @combine_pshufb_as_psrldq(<64 x i8> %a0) {
define <64 x i8> @combine_pshufb_as_psrldq_mask(<64 x i8> %a0, i64 %m) {
; X32-LABEL: combine_pshufb_as_psrldq_mask:
; X32: # BB#0:
-; X32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
-; X32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
-; X32-NEXT: kunpckdq %k0, %k1, %k1
+; X32-NEXT: kmovq {{[0-9]+}}(%esp), %k1
; X32-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[31],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[47],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[63],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; X32-NEXT: retl
;
OpenPOWER on IntegriCloud