diff options
author | Craig Topper <craig.topper@gmail.com> | 2016-11-06 16:29:08 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@gmail.com> | 2016-11-06 16:29:08 +0000 |
commit | c9467ed31ed4a48309c51a4bdac114970d26eeaf (patch) | |
tree | 0150afd8c23a859fc4ab99b610233a69e9310e23 /llvm/lib | |
parent | 1b468b4e3a9caa5b23e11307b7c671389437b6c6 (diff) | |
download | bcm5719-llvm-c9467ed31ed4a48309c51a4bdac114970d26eeaf.tar.gz bcm5719-llvm-c9467ed31ed4a48309c51a4bdac114970d26eeaf.zip |
[AVX-512] Remove intrinsics for 128/256-bit masked shift by single element in xmm. Instead upgrade them to a select and the older SSE/AVX2 intrinsic.
llvm-svn: 286070
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/IR/AutoUpgrade.cpp | 59 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86IntrinsicsInfo.h | 16 |
2 files changed, 59 insertions, 16 deletions
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index b2c65a49a75..9001af5c0ec 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -295,6 +295,22 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { Name == "avx512.mask.sub.pd.256" || Name == "avx512.mask.sub.ps.128" || Name == "avx512.mask.sub.ps.256" || + Name == "avx512.mask.psll.d.128" || + Name == "avx512.mask.psll.d.256" || + Name == "avx512.mask.psll.q.128" || + Name == "avx512.mask.psll.q.256" || + Name == "avx512.mask.psll.w.128" || + Name == "avx512.mask.psll.w.256" || + Name == "avx512.mask.psra.d.128" || + Name == "avx512.mask.psra.d.256" || + Name == "avx512.mask.psra.w.128" || + Name == "avx512.mask.psra.w.256" || + Name == "avx512.mask.psrl.d.128" || + Name == "avx512.mask.psrl.d.256" || + Name == "avx512.mask.psrl.q.128" || + Name == "avx512.mask.psrl.q.256" || + Name == "avx512.mask.psrl.w.128" || + Name == "avx512.mask.psrl.w.256" || Name.startswith("sse41.pmovsx") || Name.startswith("sse41.pmovzx") || Name.startswith("avx2.pmovsx") || @@ -669,6 +685,17 @@ static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI, std::max(NumElts, 8U))); } +// Replace a masked intrinsic with an older unmasked intrinsic. +static Value *UpgradeX86MaskedShift(IRBuilder<> &Builder, CallInst &CI, + Intrinsic::ID IID) { + Function *F = CI.getCalledFunction(); + Function *Intrin = Intrinsic::getDeclaration(F->getParent(), IID); + Value *Rep = Builder.CreateCall(Intrin, + { CI.getArgOperand(0), CI.getArgOperand(1) }); + return EmitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2)); +} + + /// Upgrade a call to an old intrinsic. All argument and return casting must be /// provided to seamlessly integrate with existing context. void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { @@ -1323,6 +1350,38 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1)); Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2)); + } else if (IsX86 && Name == "avx512.mask.psll.d.128") { + Rep = UpgradeX86MaskedShift(Builder, *CI, Intrinsic::x86_sse2_psll_d); + } else if (IsX86 && Name == "avx512.mask.psll.d.256") { + Rep = UpgradeX86MaskedShift(Builder, *CI, Intrinsic::x86_avx2_psll_d); + } else if (IsX86 && Name == "avx512.mask.psll.q.128") { + Rep = UpgradeX86MaskedShift(Builder, *CI, Intrinsic::x86_sse2_psll_q); + } else if (IsX86 && Name == "avx512.mask.psll.q.256") { + Rep = UpgradeX86MaskedShift(Builder, *CI, Intrinsic::x86_avx2_psll_q); + } else if (IsX86 && Name == "avx512.mask.psll.w.128") { + Rep = UpgradeX86MaskedShift(Builder, *CI, Intrinsic::x86_sse2_psll_w); + } else if (IsX86 && Name == "avx512.mask.psll.w.256") { + Rep = UpgradeX86MaskedShift(Builder, *CI, Intrinsic::x86_avx2_psll_w); + } else if (IsX86 && Name == "avx512.mask.psra.d.128") { + Rep = UpgradeX86MaskedShift(Builder, *CI, Intrinsic::x86_sse2_psra_d); + } else if (IsX86 && Name == "avx512.mask.psra.d.256") { + Rep = UpgradeX86MaskedShift(Builder, *CI, Intrinsic::x86_avx2_psra_d); + } else if (IsX86 && Name == "avx512.mask.psra.w.128") { + Rep = UpgradeX86MaskedShift(Builder, *CI, Intrinsic::x86_sse2_psra_w); + } else if (IsX86 && Name == "avx512.mask.psra.w.256") { + Rep = UpgradeX86MaskedShift(Builder, *CI, Intrinsic::x86_avx2_psra_w); + } else if (IsX86 && Name == "avx512.mask.psrl.d.128") { + Rep = UpgradeX86MaskedShift(Builder, *CI, Intrinsic::x86_sse2_psrl_d); + } else if (IsX86 && Name == "avx512.mask.psrl.d.256") { + Rep = UpgradeX86MaskedShift(Builder, *CI, Intrinsic::x86_avx2_psrl_d); + } else if (IsX86 && Name == "avx512.mask.psrl.q.128") { + Rep = UpgradeX86MaskedShift(Builder, *CI, Intrinsic::x86_sse2_psrl_q); + } else if (IsX86 && Name == "avx512.mask.psrl.q.256") { + Rep = UpgradeX86MaskedShift(Builder, *CI, Intrinsic::x86_avx2_psrl_q); + } else if (IsX86 && Name == "avx512.mask.psrl.w.128") { + Rep = UpgradeX86MaskedShift(Builder, *CI, Intrinsic::x86_sse2_psrl_w); + } else if (IsX86 && Name == "avx512.mask.psrl.w.256") { + Rep = UpgradeX86MaskedShift(Builder, *CI, Intrinsic::x86_avx2_psrl_w); } else { llvm_unreachable("Unknown function for CallInst upgrade."); } diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index fd8e3971b0a..4ba0d862dc8 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -1147,19 +1147,13 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_pshuf_b_512, INTR_TYPE_2OP_MASK, X86ISD::PSHUFB, 0), X86_INTRINSIC_DATA(avx512_mask_psll_d, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0), - X86_INTRINSIC_DATA(avx512_mask_psll_d_128, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0), - X86_INTRINSIC_DATA(avx512_mask_psll_d_256, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0), X86_INTRINSIC_DATA(avx512_mask_psll_di_128, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSHLI, 0), X86_INTRINSIC_DATA(avx512_mask_psll_di_256, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSHLI, 0), X86_INTRINSIC_DATA(avx512_mask_psll_di_512, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSHLI, 0), X86_INTRINSIC_DATA(avx512_mask_psll_q, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0), - X86_INTRINSIC_DATA(avx512_mask_psll_q_128, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0), - X86_INTRINSIC_DATA(avx512_mask_psll_q_256, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0), X86_INTRINSIC_DATA(avx512_mask_psll_qi_128, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSHLI, 0), X86_INTRINSIC_DATA(avx512_mask_psll_qi_256, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSHLI, 0), X86_INTRINSIC_DATA(avx512_mask_psll_qi_512, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSHLI, 0), - X86_INTRINSIC_DATA(avx512_mask_psll_w_128, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0), - X86_INTRINSIC_DATA(avx512_mask_psll_w_256, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0), X86_INTRINSIC_DATA(avx512_mask_psll_w_512, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0), X86_INTRINSIC_DATA(avx512_mask_psll_wi_128, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSHLI, 0), X86_INTRINSIC_DATA(avx512_mask_psll_wi_256, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSHLI, 0), @@ -1174,8 +1168,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_psllv8_hi, INTR_TYPE_2OP_MASK, ISD::SHL, 0), X86_INTRINSIC_DATA(avx512_mask_psllv8_si, INTR_TYPE_2OP_MASK, ISD::SHL, 0), X86_INTRINSIC_DATA(avx512_mask_psra_d, INTR_TYPE_2OP_MASK, X86ISD::VSRA, 0), - X86_INTRINSIC_DATA(avx512_mask_psra_d_128, INTR_TYPE_2OP_MASK, X86ISD::VSRA, 0), - X86_INTRINSIC_DATA(avx512_mask_psra_d_256, INTR_TYPE_2OP_MASK, X86ISD::VSRA, 0), X86_INTRINSIC_DATA(avx512_mask_psra_di_128, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSRAI, 0), X86_INTRINSIC_DATA(avx512_mask_psra_di_256, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSRAI, 0), X86_INTRINSIC_DATA(avx512_mask_psra_di_512, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSRAI, 0), @@ -1185,8 +1177,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_psra_qi_128, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSRAI, 0), X86_INTRINSIC_DATA(avx512_mask_psra_qi_256, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSRAI, 0), X86_INTRINSIC_DATA(avx512_mask_psra_qi_512, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSRAI, 0), - X86_INTRINSIC_DATA(avx512_mask_psra_w_128, INTR_TYPE_2OP_MASK, X86ISD::VSRA, 0), - X86_INTRINSIC_DATA(avx512_mask_psra_w_256, INTR_TYPE_2OP_MASK, X86ISD::VSRA, 0), X86_INTRINSIC_DATA(avx512_mask_psra_w_512, INTR_TYPE_2OP_MASK, X86ISD::VSRA, 0), X86_INTRINSIC_DATA(avx512_mask_psra_wi_128, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSRAI, 0), X86_INTRINSIC_DATA(avx512_mask_psra_wi_256, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSRAI, 0), @@ -1201,19 +1191,13 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_psrav8_hi, INTR_TYPE_2OP_MASK, X86ISD::VSRAV, 0), X86_INTRINSIC_DATA(avx512_mask_psrav8_si, INTR_TYPE_2OP_MASK, X86ISD::VSRAV, 0), X86_INTRINSIC_DATA(avx512_mask_psrl_d, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0), - X86_INTRINSIC_DATA(avx512_mask_psrl_d_128, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0), - X86_INTRINSIC_DATA(avx512_mask_psrl_d_256, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0), X86_INTRINSIC_DATA(avx512_mask_psrl_di_128, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSRLI, 0), X86_INTRINSIC_DATA(avx512_mask_psrl_di_256, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSRLI, 0), X86_INTRINSIC_DATA(avx512_mask_psrl_di_512, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSRLI, 0), X86_INTRINSIC_DATA(avx512_mask_psrl_q, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0), - X86_INTRINSIC_DATA(avx512_mask_psrl_q_128, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0), - X86_INTRINSIC_DATA(avx512_mask_psrl_q_256, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0), X86_INTRINSIC_DATA(avx512_mask_psrl_qi_128, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSRLI, 0), X86_INTRINSIC_DATA(avx512_mask_psrl_qi_256, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSRLI, 0), X86_INTRINSIC_DATA(avx512_mask_psrl_qi_512, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSRLI, 0), - X86_INTRINSIC_DATA(avx512_mask_psrl_w_128, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0), - X86_INTRINSIC_DATA(avx512_mask_psrl_w_256, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0), X86_INTRINSIC_DATA(avx512_mask_psrl_w_512, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0), X86_INTRINSIC_DATA(avx512_mask_psrl_wi_128, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSRLI, 0), X86_INTRINSIC_DATA(avx512_mask_psrl_wi_256, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSRLI, 0), |