diff options
| author | Craig Topper <craig.topper@intel.com> | 2018-06-21 05:00:56 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2018-06-21 05:00:56 +0000 |
| commit | 296526bf46f48255f103e97e4970c17a81ebbf08 (patch) | |
| tree | 683c6ad1b4057cf932bfaad04bc02ba8cf6e8d70 /llvm/lib | |
| parent | 433b9761ceccc16047d1f1e889f111d245729919 (diff) | |
| download | bcm5719-llvm-296526bf46f48255f103e97e4970c17a81ebbf08.tar.gz bcm5719-llvm-296526bf46f48255f103e97e4970c17a81ebbf08.zip | |
[X86] Remove masking from 512-bit floating max/min intrinsics. Use select instruction instead.
llvm-svn: 335199
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/IR/AutoUpgrade.cpp | 44 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86IntrinsicsInfo.h | 12 |
2 files changed, 36 insertions, 20 deletions
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 7aa2685a14e..4a79275feea 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -225,14 +225,6 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) { Name.startswith("avx512.cvtw2mask.") || // Added in 7.0 Name.startswith("avx512.cvtd2mask.") || // Added in 7.0 Name.startswith("avx512.cvtq2mask.") || // Added in 7.0 - Name == "avx512.mask.max.pd.128" || // Added in 5.0 - Name == "avx512.mask.max.pd.256" || // Added in 5.0 - Name == "avx512.mask.max.ps.128" || // Added in 5.0 - Name == "avx512.mask.max.ps.256" || // Added in 5.0 - Name == "avx512.mask.min.pd.128" || // Added in 5.0 - Name == "avx512.mask.min.pd.256" || // Added in 5.0 - Name == "avx512.mask.min.ps.128" || // Added in 5.0 - Name == "avx512.mask.min.ps.256" || // Added in 5.0 Name.startswith("avx512.mask.vpermilvar.") || // Added in 4.0 Name.startswith("avx512.mask.psll.d") || // Added in 4.0 Name.startswith("avx512.mask.psll.q") || // Added in 4.0 @@ -274,10 +266,12 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) { Name.startswith("avx512.mask.dbpsadbw.") || // Added in 7.0 Name.startswith("avx512.mask.vpshld.") || // Added in 7.0 Name.startswith("avx512.mask.vpshrd.") || // Added in 7.0 - Name.startswith("avx512.mask.add.p") || // Added in 7.0 - Name.startswith("avx512.mask.sub.p") || // Added in 7.0 - Name.startswith("avx512.mask.mul.p") || // Added in 7.0 - Name.startswith("avx512.mask.div.p") || // Added in 7.0 + Name.startswith("avx512.mask.add.p") || // Added in 7.0. 128/256 in 4.0 + Name.startswith("avx512.mask.sub.p") || // Added in 7.0. 128/256 in 4.0 + Name.startswith("avx512.mask.mul.p") || // Added in 7.0. 128/256 in 4.0 + Name.startswith("avx512.mask.div.p") || // Added in 7.0. 128/256 in 4.0 + Name.startswith("avx512.mask.max.p") || // Added in 7.0. 128/256 in 5.0 + Name.startswith("avx512.mask.min.p") || // Added in 7.0. 128/256 in 5.0 Name == "sse.cvtsi2ss" || // Added in 7.0 Name == "sse.cvtsi642ss" || // Added in 7.0 Name == "sse2.cvtsi2sd" || // Added in 7.0 @@ -2383,6 +2377,32 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { } Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2)); + } else if (IsX86 && Name.startswith("avx512.mask.max.p") && + Name.drop_front(18) == ".512") { + Intrinsic::ID IID; + if (Name[17] == 's') + IID = Intrinsic::x86_avx512_max_ps_512; + else + IID = Intrinsic::x86_avx512_max_pd_512; + + Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), + { CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(4) }); + Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, + CI->getArgOperand(2)); + } else if (IsX86 && Name.startswith("avx512.mask.min.p") && + Name.drop_front(18) == ".512") { + Intrinsic::ID IID; + if (Name[17] == 's') + IID = Intrinsic::x86_avx512_min_ps_512; + else + IID = Intrinsic::x86_avx512_min_pd_512; + + Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), + { CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(4) }); + Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, + CI->getArgOperand(2)); } else if (IsX86 && Name.startswith("avx512.mask.lzcnt.")) { Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz, diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index 65954f01b81..61fac9fcb0b 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -668,18 +668,10 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86ISD::VGETMANTS, X86ISD::VGETMANTS_RND), X86_INTRINSIC_DATA(avx512_mask_getmant_ss, INTR_TYPE_3OP_SCALAR_MASK, X86ISD::VGETMANTS, X86ISD::VGETMANTS_RND), - X86_INTRINSIC_DATA(avx512_mask_max_pd_512, INTR_TYPE_2OP_MASK, X86ISD::FMAX, - X86ISD::FMAX_RND), - X86_INTRINSIC_DATA(avx512_mask_max_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FMAX, - X86ISD::FMAX_RND), X86_INTRINSIC_DATA(avx512_mask_max_sd_round, INTR_TYPE_SCALAR_MASK, X86ISD::FMAXS, X86ISD::FMAXS_RND), X86_INTRINSIC_DATA(avx512_mask_max_ss_round, INTR_TYPE_SCALAR_MASK, X86ISD::FMAXS, X86ISD::FMAXS_RND), - X86_INTRINSIC_DATA(avx512_mask_min_pd_512, INTR_TYPE_2OP_MASK, X86ISD::FMIN, - X86ISD::FMIN_RND), - X86_INTRINSIC_DATA(avx512_mask_min_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FMIN, - X86ISD::FMIN_RND), X86_INTRINSIC_DATA(avx512_mask_min_sd_round, INTR_TYPE_SCALAR_MASK, X86ISD::FMINS, X86ISD::FMINS_RND), X86_INTRINSIC_DATA(avx512_mask_min_ss_round, INTR_TYPE_SCALAR_MASK, @@ -1085,6 +1077,10 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_maskz_vpshrdv_w_256, FMA_OP_MASKZ, X86ISD::VSHRDV, 0), X86_INTRINSIC_DATA(avx512_maskz_vpshrdv_w_512, FMA_OP_MASKZ, X86ISD::VSHRDV, 0), + X86_INTRINSIC_DATA(avx512_max_pd_512, INTR_TYPE_2OP, X86ISD::FMAX, X86ISD::FMAX_RND), + X86_INTRINSIC_DATA(avx512_max_ps_512, INTR_TYPE_2OP, X86ISD::FMAX, X86ISD::FMAX_RND), + X86_INTRINSIC_DATA(avx512_min_pd_512, INTR_TYPE_2OP, X86ISD::FMIN, X86ISD::FMIN_RND), + X86_INTRINSIC_DATA(avx512_min_ps_512, INTR_TYPE_2OP, X86ISD::FMIN, X86ISD::FMIN_RND), X86_INTRINSIC_DATA(avx512_mul_pd_512, INTR_TYPE_2OP, ISD::FMUL, X86ISD::FMUL_RND), X86_INTRINSIC_DATA(avx512_mul_ps_512, INTR_TYPE_2OP, ISD::FMUL, X86ISD::FMUL_RND), X86_INTRINSIC_DATA(avx512_packssdw_512, INTR_TYPE_2OP, X86ISD::PACKSS, 0), |

