diff options
| author | Craig Topper <craig.topper@gmail.com> | 2016-10-18 05:44:04 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@gmail.com> | 2016-10-18 05:44:04 +0000 |
| commit | 72b9f9864f15b72e61095e7524c8e6c3fb7742c4 (patch) | |
| tree | 6c3ceb63b93461c5e2619c758fb758efd972529e /llvm/test/CodeGen/X86 | |
| parent | b7c9deb5870358b9a7a655053e27d20b5c6ba442 (diff) | |
| download | bcm5719-llvm-72b9f9864f15b72e61095e7524c8e6c3fb7742c4.tar.gz bcm5719-llvm-72b9f9864f15b72e61095e7524c8e6c3fb7742c4.zip | |
[AVX-512] Add test case to check shuffle decoding for masked vpermilps for r284450.
This is harder to do for vpermilpd as shuffle combining turns the constant vector into an immediate since all vpermilpd's inputs with constant vector can also be encoded with the immediate form.
llvm-svn: 284455
Diffstat (limited to 'llvm/test/CodeGen/X86')
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512-intrinsics.ll | 19 |
1 files changed, 19 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics.ll b/llvm/test/CodeGen/X86/avx512-intrinsics.ll index fb0af943555..79d4094b01e 100644 --- a/llvm/test/CodeGen/X86/avx512-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics.ll @@ -4510,6 +4510,25 @@ define <16 x float>@test_int_x86_avx512_mask_vpermilvar_ps_512(<16 x float> %x0, ret <16 x float> %res4 } +; Test case to make sure we can print shuffle decode comments for constant pool loads. +define <16 x float>@test_int_x86_avx512_mask_vpermilvar_ps_512_constant_pool(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_512_constant_pool: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpermilps {{.*#+}} zmm2 = zmm0[2,3,0,1,7,6,5,4,9,8,11,10,12,13,14,15] +; CHECK-NEXT: vpermilps {{.*#+}} zmm1 = zmm0[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] +; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[1,0,3,2,4,5,6,7,10,11,8,9,14,15,13,12] +; CHECK-NEXT: vaddps %zmm1, %zmm2, %zmm1 +; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: retq + %res = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 3, i32 2, i32 1, i32 0, i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3>, <16 x float> %x2, i16 %x3) + %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3, i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3>, <16 x float> zeroinitializer, i16 %x3) + %res2 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 1, i32 0>, <16 x float> %x2, i16 -1) + %res3 = fadd <16 x float> %res, %res1 + %res4 = fadd <16 x float> %res2, %res3 + ret <16 x float> %res4 +} + declare <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float>, <4 x float>, i32, <16 x float>, i16) define <16 x float>@test_int_x86_avx512_mask_insertf32x4_512(<16 x float> %x0, <4 x float> %x1, <16 x float> %x3, i16 %x4) { |

