diff options
| author | Craig Topper <craig.topper@gmail.com> | 2016-12-10 23:09:43 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@gmail.com> | 2016-12-10 23:09:43 +0000 |
| commit | edab02b50b6b3b5e4f785adaacd108cd921d1f3f (patch) | |
| tree | c285ba3f53a4f648eb0f66e095655f89332f5ccf /llvm/test/CodeGen | |
| parent | 4c48bbe94da7d0c6b949631b5ecdc2ff045960e1 (diff) | |
| download | bcm5719-llvm-edab02b50b6b3b5e4f785adaacd108cd921d1f3f.tar.gz bcm5719-llvm-edab02b50b6b3b5e4f785adaacd108cd921d1f3f.zip | |
[X86] Remove masking from 512-bit PSHUFB intrinsics in preparation for being able to constant fold it in InstCombineCalls like we do for 128/256-bit.
llvm-svn: 289344
Diffstat (limited to 'llvm/test/CodeGen')
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll | 24 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512bw-intrinsics.ll | 56 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll | 14 |
3 files changed, 70 insertions, 24 deletions
diff --git a/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll index 1c52144a92f..507205ceb4f 100644 --- a/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll @@ -968,3 +968,27 @@ define <32 x i16>@test_int_x86_avx512_mask_psll_wi_512(<32 x i16> %x0, i32 %x1, ret <32 x i16> %res4 } +declare <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) + +define <64 x i8>@test_int_x86_avx512_mask_pshuf_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { +; AVX512BW-LABEL: test_int_x86_avx512_mask_pshuf_b_512: +; AVX512BW: ## BB#0: +; AVX512BW-NEXT: vpshufb %zmm1, %zmm0, %zmm3 +; AVX512BW-NEXT: kmovq %rdi, %k1 +; AVX512BW-NEXT: vpshufb %zmm1, %zmm0, %zmm2 {%k1} +; AVX512BW-NEXT: vpaddb %zmm3, %zmm2, %zmm0 +; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_mask_pshuf_b_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: vpshufb %zmm1, %zmm0, %zmm3 +; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpshufb %zmm1, %zmm0, %zmm2 {%k1} +; AVX512F-32-NEXT: vpaddb %zmm3, %zmm2, %zmm0 +; AVX512F-32-NEXT: retl + %res = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) + %res1 = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) + %res2 = add <64 x i8> %res, %res1 + ret <64 x i8> %res2 +} + diff --git a/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll b/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll index 151a1d55753..05b59aff9b5 100644 --- a/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll @@ -1747,29 +1747,57 @@ define <32 x i16>@test_int_x86_avx512_mask_pavg_w_512(<32 x i16> %x0, <32 x i16> ret <32 x i16> %res2 } -declare <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) +declare <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8>, <64 x i8>) -define <64 x i8>@test_int_x86_avx512_mask_pshuf_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { -; AVX512BW-LABEL: test_int_x86_avx512_mask_pshuf_b_512: +define <64 x i8>@test_int_x86_avx512_pshuf_b_512(<64 x i8> %x0, <64 x i8> %x1) { +; AVX512BW-LABEL: test_int_x86_avx512_pshuf_b_512: +; AVX512BW: ## BB#0: +; AVX512BW-NEXT: vpshufb %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_pshuf_b_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: vpshufb %zmm1, %zmm0, %zmm0 +; AVX512F-32-NEXT: retl + %res = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1) + ret <64 x i8> %res +} + +define <64 x i8>@test_int_x86_avx512_pshuf_b_512_mask(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %mask) { +; AVX512BW-LABEL: test_int_x86_avx512_pshuf_b_512_mask: ; AVX512BW: ## BB#0: ; AVX512BW-NEXT: kmovq %rdi, %k1 ; AVX512BW-NEXT: vpshufb %zmm1, %zmm0, %zmm2 {%k1} -; AVX512BW-NEXT: vpshufb %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0 +; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_int_x86_avx512_mask_pshuf_b_512: +; AVX512F-32-LABEL: test_int_x86_avx512_pshuf_b_512_mask: ; AVX512F-32: # BB#0: -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 +; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1 ; AVX512F-32-NEXT: vpshufb %zmm1, %zmm0, %zmm2 {%k1} -; AVX512F-32-NEXT: vpshufb %zmm1, %zmm0, %zmm0 -; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0 +; AVX512F-32-NEXT: vmovdqa64 %zmm2, %zmm0 ; AVX512F-32-NEXT: retl - %res = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) - %res1 = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) - %res2 = add <64 x i8> %res, %res1 + %res = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1) + %mask.cast = bitcast i64 %mask to <64 x i1> + %res2 = select <64 x i1> %mask.cast, <64 x i8> %res, <64 x i8> %x2 + ret <64 x i8> %res2 +} + +define <64 x i8>@test_int_x86_avx512_pshuf_b_512_maskz(<64 x i8> %x0, <64 x i8> %x1, i64 %mask) { +; AVX512BW-LABEL: test_int_x86_avx512_pshuf_b_512_maskz: +; AVX512BW: ## BB#0: +; AVX512BW-NEXT: kmovq %rdi, %k1 +; AVX512BW-NEXT: vpshufb %zmm1, %zmm0, %zmm0 {%k1} {z} +; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_pshuf_b_512_maskz: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpshufb %zmm1, %zmm0, %zmm0 {%k1} {z} +; AVX512F-32-NEXT: retl + %res = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1) + %mask.cast = bitcast i64 %mask to <64 x i1> + %res2 = select <64 x i1> %mask.cast, <64 x i8> %res, <64 x i8> zeroinitializer ret <64 x i8> %res2 } diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll index dc9df248a76..c4568eaf722 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll @@ -593,9 +593,7 @@ define <64 x i8> @combine_pshufb_identity_mask(<64 x i8> %x0, i64 %m) { ; X32: # BB#0: ; X32-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 ; X32-NEXT: vmovdqu8 {{.*#+}} zmm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0] -; X32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 -; X32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; X32-NEXT: kunpckdq %k0, %k1, %k1 +; X32-NEXT: kmovq {{[0-9]+}}(%esp), %k1 ; X32-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 ; X32-NEXT: vpshufb %zmm2, %zmm0, %zmm3 {%k1} ; X32-NEXT: vpshufb %zmm2, %zmm3, %zmm1 {%k1} @@ -604,9 +602,9 @@ define <64 x i8> @combine_pshufb_identity_mask(<64 x i8> %x0, i64 %m) { ; ; X64-LABEL: combine_pshufb_identity_mask: ; X64: # BB#0: -; X64-NEXT: kmovq %rdi, %k1 ; X64-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 ; X64-NEXT: vmovdqu8 {{.*#+}} zmm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0] +; X64-NEXT: kmovq %rdi, %k1 ; X64-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 ; X64-NEXT: vpshufb %zmm2, %zmm0, %zmm3 {%k1} ; X64-NEXT: vpshufb %zmm2, %zmm3, %zmm1 {%k1} @@ -759,9 +757,7 @@ define <64 x i8> @combine_pshufb_as_pslldq(<64 x i8> %a0) { define <64 x i8> @combine_pshufb_as_pslldq_mask(<64 x i8> %a0, i64 %m) { ; X32-LABEL: combine_pshufb_as_pslldq_mask: ; X32: # BB#0: -; X32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 -; X32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; X32-NEXT: kunpckdq %k0, %k1, %k1 +; X32-NEXT: kmovq {{[0-9]+}}(%esp), %k1 ; X32-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[0,1,2,3,4,5],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[16,17,18,19,20,21],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[32,33,34,35,36,37],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[48,49,50,51,52,53] ; X32-NEXT: retl ; @@ -790,9 +786,7 @@ define <64 x i8> @combine_pshufb_as_psrldq(<64 x i8> %a0) { define <64 x i8> @combine_pshufb_as_psrldq_mask(<64 x i8> %a0, i64 %m) { ; X32-LABEL: combine_pshufb_as_psrldq_mask: ; X32: # BB#0: -; X32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 -; X32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; X32-NEXT: kunpckdq %k0, %k1, %k1 +; X32-NEXT: kmovq {{[0-9]+}}(%esp), %k1 ; X32-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[31],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[47],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[63],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; X32-NEXT: retl ; |

