diff options
author | Craig Topper <craig.topper@intel.com> | 2018-05-13 18:03:59 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@intel.com> | 2018-05-13 18:03:59 +0000 |
commit | 85906cf0415ad8c740d7bc93d73d89533cfdb308 (patch) | |
tree | 15fc6f0b45fef44d593a0289072d59d9dbec054c /llvm | |
parent | a39c409619f3c06e4febd8d1bb0d5efc7d0a940d (diff) | |
download | bcm5719-llvm-85906cf0415ad8c740d7bc93d73d89533cfdb308.tar.gz bcm5719-llvm-85906cf0415ad8c740d7bc93d73d89533cfdb308.zip |
[X86] Remove and autoupgrade masked vpermd/vpermps intrinsics.
llvm-svn: 332198
Diffstat (limited to 'llvm')
-rw-r--r-- | llvm/include/llvm/IR/IntrinsicsX86.td | 6 | ||||
-rw-r--r-- | llvm/lib/IR/AutoUpgrade.cpp | 20 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86IntrinsicsInfo.h | 4 | ||||
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll | 40 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx512vl-intrinsics.ll | 40 |
6 files changed, 53 insertions, 59 deletions
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td index d3b3d1a5e4c..aa8df70e7a2 100644 --- a/llvm/include/llvm/IR/IntrinsicsX86.td +++ b/llvm/include/llvm/IR/IntrinsicsX86.td @@ -3289,15 +3289,9 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx512_mask_permvar_qi_512 : GCCBuiltin<"__builtin_ia32_permvarqi512_mask">, Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>; - def int_x86_avx512_mask_permvar_sf_256 : // TODO: Remove this intrinsic - Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, - llvm_v8i32_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_permvar_sf_512 : GCCBuiltin<"__builtin_ia32_permvarsf512_mask">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16i32_ty, llvm_v16f32_ty, llvm_i16_ty], [IntrNoMem]>; - def int_x86_avx512_mask_permvar_si_256 : // TODO: Remove this intrinsic - Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, - llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_permvar_si_512 : GCCBuiltin<"__builtin_ia32_permvarsi512_mask">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 9d603146b00..0a3ecaed6d7 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -181,6 +181,8 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) { Name == "avx512.mask.cvttps2dq.256" || // Added in 7.0 Name == "avx512.mask.cvtps2pd.128" || // Added in 7.0 Name == "avx512.mask.cvtps2pd.256" || // Added in 7.0 + Name == "avx512.mask.permvar.sf.256" || // Added in 7.0 + Name == "avx512.mask.permvar.si.256" || // Added in 7.0 Name == "sse2.pmulu.dq" || // Added in 7.0 Name == "sse41.pmuldq" || // Added in 7.0 Name == "avx2.pmulu.dq" || // Added in 7.0 @@ -1190,19 +1192,23 @@ static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, else llvm_unreachable("Unexpected intrinsic"); } else if (Name == "cvtdq2ps.128") { - IID = Intrinsic::x86_sse2_cvtdq2ps; + IID = Intrinsic::x86_sse2_cvtdq2ps; } else if (Name == "cvtdq2ps.256") { - IID = Intrinsic::x86_avx_cvtdq2_ps_256; + IID = Intrinsic::x86_avx_cvtdq2_ps_256; } else if (Name == "cvtpd2dq.256") { - IID = Intrinsic::x86_avx_cvt_pd2dq_256; + IID = Intrinsic::x86_avx_cvt_pd2dq_256; } else if (Name == "cvtpd2ps.256") { - IID = Intrinsic::x86_avx_cvt_pd2_ps_256; + IID = Intrinsic::x86_avx_cvt_pd2_ps_256; } else if (Name == "cvttpd2dq.256") { - IID = Intrinsic::x86_avx_cvtt_pd2dq_256; + IID = Intrinsic::x86_avx_cvtt_pd2dq_256; } else if (Name == "cvttps2dq.128") { - IID = Intrinsic::x86_sse2_cvttps2dq; + IID = Intrinsic::x86_sse2_cvttps2dq; } else if (Name == "cvttps2dq.256") { - IID = Intrinsic::x86_avx_cvtt_ps2dq_256; + IID = Intrinsic::x86_avx_cvtt_ps2dq_256; + } else if (Name == "permvar.sf.256") { + IID = Intrinsic::x86_avx2_permps; + } else if (Name == "permvar.si.256") { + IID = Intrinsic::x86_avx2_permd; } else return false; diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index 483aafedc82..2078e1f9baa 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -817,12 +817,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86ISD::VPERMV, 0), X86_INTRINSIC_DATA(avx512_mask_permvar_qi_512, VPERM_2OP_MASK, X86ISD::VPERMV, 0), - X86_INTRINSIC_DATA(avx512_mask_permvar_sf_256, VPERM_2OP_MASK, - X86ISD::VPERMV, 0), X86_INTRINSIC_DATA(avx512_mask_permvar_sf_512, VPERM_2OP_MASK, X86ISD::VPERMV, 0), - X86_INTRINSIC_DATA(avx512_mask_permvar_si_256, VPERM_2OP_MASK, - X86ISD::VPERMV, 0), X86_INTRINSIC_DATA(avx512_mask_permvar_si_512, VPERM_2OP_MASK, X86ISD::VPERMV, 0), X86_INTRINSIC_DATA(avx512_mask_pmov_db_128, INTR_TYPE_1OP_MASK, diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index c323bfc3519..ff3fb9387d2 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -2796,9 +2796,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_avx512_mask_permvar_qi_128: case Intrinsic::x86_avx512_mask_permvar_qi_256: case Intrinsic::x86_avx512_mask_permvar_qi_512: - case Intrinsic::x86_avx512_mask_permvar_sf_256: case Intrinsic::x86_avx512_mask_permvar_sf_512: - case Intrinsic::x86_avx512_mask_permvar_si_256: case Intrinsic::x86_avx512_mask_permvar_si_512: if (Value *V = simplifyX86vpermv(*II, Builder)) { // We simplified the permuting, now create a select for the masking. diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll index 219e93c27ba..f3288a01831 100644 --- a/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll @@ -6737,3 +6737,43 @@ define <8 x i32>@test_int_x86_avx512_mask_cvtt_ps2dq_256(<8 x float> %x0, <8 x i %res2 = add <8 x i32> %res, %res1 ret <8 x i32> %res2 } + +declare <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float>, <8 x i32>, <8 x float>, i8) + +define <8 x float>@test_int_x86_avx512_mask_permvar_sf_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_permvar_sf_256: +; CHECK: ## %bb.0: +; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x16,0xd8] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x16,0xd0] +; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xa9,0x16,0xc0] +; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc0] +; CHECK-NEXT: vaddps %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc3] +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) + %res1 = call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> zeroinitializer, i8 %x3) + %res2 = call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1) + %res3 = fadd <8 x float> %res, %res1 + %res4 = fadd <8 x float> %res3, %res2 + ret <8 x float> %res4 +} + +declare <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) + +define <8 x i32>@test_int_x86_avx512_mask_permvar_si_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_permvar_si_256: +; CHECK: ## %bb.0: +; CHECK-NEXT: vpermd %ymm0, %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x36,0xd8] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: vpermd %ymm0, %ymm1, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x36,0xd0] +; CHECK-NEXT: vpermd %ymm0, %ymm1, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xa9,0x36,0xc0] +; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] +; CHECK-NEXT: vpaddd %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3] +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) + %res1 = call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3) + %res2 = call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) + %res3 = add <8 x i32> %res, %res1 + %res4 = add <8 x i32> %res3, %res2 + ret <8 x i32> %res4 +} diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll index ee06fbb2ba3..69854d2485b 100644 --- a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll @@ -3075,46 +3075,6 @@ define <4 x i64>@test_int_x86_avx512_mask_permvar_di_256(<4 x i64> %x0, <4 x i64 ret <4 x i64> %res4 } -declare <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float>, <8 x i32>, <8 x float>, i8) - -define <8 x float>@test_int_x86_avx512_mask_permvar_sf_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) { -; CHECK-LABEL: test_int_x86_avx512_mask_permvar_sf_256: -; CHECK: ## %bb.0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x16,0xd8] -; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x16,0xd0] -; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xa9,0x16,0xc0] -; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc0] -; CHECK-NEXT: vaddps %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc3] -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) - %res1 = call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> zeroinitializer, i8 %x3) - %res2 = call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1) - %res3 = fadd <8 x float> %res, %res1 - %res4 = fadd <8 x float> %res3, %res2 - ret <8 x float> %res4 -} - -declare <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) - -define <8 x i32>@test_int_x86_avx512_mask_permvar_si_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { -; CHECK-LABEL: test_int_x86_avx512_mask_permvar_si_256: -; CHECK: ## %bb.0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vpermd %ymm0, %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x36,0xd8] -; CHECK-NEXT: vpermd %ymm0, %ymm1, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x36,0xd0] -; CHECK-NEXT: vpermd %ymm0, %ymm1, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xa9,0x36,0xc0] -; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] -; CHECK-NEXT: vpaddd %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3] -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) - %res1 = call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3) - %res2 = call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) - %res3 = add <8 x i32> %res, %res1 - %res4 = add <8 x i32> %res3, %res2 - ret <8 x i32> %res4 -} - declare <2 x double> @llvm.x86.avx512.mask.fixupimm.pd.128(<2 x double>, <2 x double>, <2 x i64>, i32, i8) define <2 x double>@test_int_x86_avx512_mask_fixupimm_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i8 %x4) { |