From 48adedffb7ee3a38ab6f1540fa4277ead8c75a53 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 5 Jul 2016 18:31:17 +0000 Subject: [X86][AVX512] Fixed decoding of permd/permpd variable mask shuffles + enabled them for target shuffle combining Corrected element mask masking to extract the bottom index bits (now matches the perm2 implementation but for unary inputs). llvm-svn: 274571 --- .../X86/vector-shuffle-combining-avx512bw.ll | 49 ++++++++++++++++++++++ 1 file changed, 49 insertions(+) (limited to 'llvm/test') diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll index 3c09c1cbaef..4a80663f672 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll @@ -3,6 +3,9 @@ declare <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) +declare <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double>, <8 x i64>, <8 x double>, i8) +declare <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) + declare <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64>, <8 x double>, <8 x double>, i8) declare <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32>, <16 x float>, <16 x float>, i16) @@ -10,6 +13,52 @@ declare <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64>, <8 x i64>, declare <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) declare <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) +define <8 x double> @combine_permvar_8f64_identity(<8 x double> %x0, <8 x double> %x1) { +; CHECK-LABEL: combine_permvar_8f64_identity: +; CHECK: # BB#0: +; CHECK-NEXT: retq + %res0 = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %x0, <8 x i64> , <8 x double> %x1, i8 -1) + %res1 = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %res0, <8 x i64> , <8 x double> %res0, i8 -1) + ret <8 x double> %res1 +} +define <8 x double> @combine_permvar_8f64_identity_mask(<8 x double> %x0, <8 x double> %x1, i8 %m) { +; CHECK-LABEL: combine_permvar_8f64_identity_mask: +; CHECK: # BB#0: +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [7,6,5,4,3,2,1,0] +; CHECK-NEXT: vpermpd %zmm0, %zmm2, %zmm1 {%k1} +; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm0 = [7,14,5,12,3,10,1,8] +; CHECK-NEXT: vpermpd %zmm1, %zmm0, %zmm1 {%k1} +; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: retq + %res0 = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %x0, <8 x i64> , <8 x double> %x1, i8 %m) + %res1 = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %res0, <8 x i64> , <8 x double> %res0, i8 %m) + ret <8 x double> %res1 +} + +define <8 x i64> @combine_permvar_8i64_identity(<8 x i64> %x0, <8 x i64> %x1) { +; CHECK-LABEL: combine_permvar_8i64_identity: +; CHECK: # BB#0: +; CHECK-NEXT: retq + %res0 = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %x0, <8 x i64> , <8 x i64> %x1, i8 -1) + %res1 = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %res0, <8 x i64> , <8 x i64> %res0, i8 -1) + ret <8 x i64> %res1 +} +define <8 x i64> @combine_permvar_8i64_identity_mask(<8 x i64> %x0, <8 x i64> %x1, i8 %m) { +; CHECK-LABEL: combine_permvar_8i64_identity_mask: +; CHECK: # BB#0: +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [7,6,5,4,3,2,1,0] +; CHECK-NEXT: vpermq %zmm0, %zmm2, %zmm1 {%k1} +; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm0 = [7,14,5,12,3,10,1,8] +; CHECK-NEXT: vpermq %zmm1, %zmm0, %zmm1 {%k1} +; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: retq + %res0 = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %x0, <8 x i64> , <8 x i64> %x1, i8 %m) + %res1 = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %res0, <8 x i64> , <8 x i64> %res0, i8 %m) + ret <8 x i64> %res1 +} + define <8 x double> @combine_vpermt2var_8f64_identity(<8 x double> %x0, <8 x double> %x1) { ; CHECK-LABEL: combine_vpermt2var_8f64_identity: ; CHECK: # BB#0: -- cgit v1.2.3