summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@gmail.com>2016-11-06 16:29:08 +0000
committerCraig Topper <craig.topper@gmail.com>2016-11-06 16:29:08 +0000
commitc9467ed31ed4a48309c51a4bdac114970d26eeaf (patch)
tree0150afd8c23a859fc4ab99b610233a69e9310e23 /llvm/lib
parent1b468b4e3a9caa5b23e11307b7c671389437b6c6 (diff)
downloadbcm5719-llvm-c9467ed31ed4a48309c51a4bdac114970d26eeaf.tar.gz
bcm5719-llvm-c9467ed31ed4a48309c51a4bdac114970d26eeaf.zip
[AVX-512] Remove intrinsics for 128/256-bit masked shift by single element in xmm. Instead upgrade them to a select and the older SSE/AVX2 intrinsic.
llvm-svn: 286070
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/IR/AutoUpgrade.cpp59
-rw-r--r--llvm/lib/Target/X86/X86IntrinsicsInfo.h16
2 files changed, 59 insertions, 16 deletions
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index b2c65a49a75..9001af5c0ec 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -295,6 +295,22 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
Name == "avx512.mask.sub.pd.256" ||
Name == "avx512.mask.sub.ps.128" ||
Name == "avx512.mask.sub.ps.256" ||
+ Name == "avx512.mask.psll.d.128" ||
+ Name == "avx512.mask.psll.d.256" ||
+ Name == "avx512.mask.psll.q.128" ||
+ Name == "avx512.mask.psll.q.256" ||
+ Name == "avx512.mask.psll.w.128" ||
+ Name == "avx512.mask.psll.w.256" ||
+ Name == "avx512.mask.psra.d.128" ||
+ Name == "avx512.mask.psra.d.256" ||
+ Name == "avx512.mask.psra.w.128" ||
+ Name == "avx512.mask.psra.w.256" ||
+ Name == "avx512.mask.psrl.d.128" ||
+ Name == "avx512.mask.psrl.d.256" ||
+ Name == "avx512.mask.psrl.q.128" ||
+ Name == "avx512.mask.psrl.q.256" ||
+ Name == "avx512.mask.psrl.w.128" ||
+ Name == "avx512.mask.psrl.w.256" ||
Name.startswith("sse41.pmovsx") ||
Name.startswith("sse41.pmovzx") ||
Name.startswith("avx2.pmovsx") ||
@@ -669,6 +685,17 @@ static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI,
std::max(NumElts, 8U)));
}
+// Replace a masked intrinsic with an older unmasked intrinsic.
+static Value *UpgradeX86MaskedShift(IRBuilder<> &Builder, CallInst &CI,
+ Intrinsic::ID IID) {
+ Function *F = CI.getCalledFunction();
+ Function *Intrin = Intrinsic::getDeclaration(F->getParent(), IID);
+ Value *Rep = Builder.CreateCall(Intrin,
+ { CI.getArgOperand(0), CI.getArgOperand(1) });
+ return EmitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
+}
+
+
/// Upgrade a call to an old intrinsic. All argument and return casting must be
/// provided to seamlessly integrate with existing context.
void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
@@ -1323,6 +1350,38 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
+ } else if (IsX86 && Name == "avx512.mask.psll.d.128") {
+ Rep = UpgradeX86MaskedShift(Builder, *CI, Intrinsic::x86_sse2_psll_d);
+ } else if (IsX86 && Name == "avx512.mask.psll.d.256") {
+ Rep = UpgradeX86MaskedShift(Builder, *CI, Intrinsic::x86_avx2_psll_d);
+ } else if (IsX86 && Name == "avx512.mask.psll.q.128") {
+ Rep = UpgradeX86MaskedShift(Builder, *CI, Intrinsic::x86_sse2_psll_q);
+ } else if (IsX86 && Name == "avx512.mask.psll.q.256") {
+ Rep = UpgradeX86MaskedShift(Builder, *CI, Intrinsic::x86_avx2_psll_q);
+ } else if (IsX86 && Name == "avx512.mask.psll.w.128") {
+ Rep = UpgradeX86MaskedShift(Builder, *CI, Intrinsic::x86_sse2_psll_w);
+ } else if (IsX86 && Name == "avx512.mask.psll.w.256") {
+ Rep = UpgradeX86MaskedShift(Builder, *CI, Intrinsic::x86_avx2_psll_w);
+ } else if (IsX86 && Name == "avx512.mask.psra.d.128") {
+ Rep = UpgradeX86MaskedShift(Builder, *CI, Intrinsic::x86_sse2_psra_d);
+ } else if (IsX86 && Name == "avx512.mask.psra.d.256") {
+ Rep = UpgradeX86MaskedShift(Builder, *CI, Intrinsic::x86_avx2_psra_d);
+ } else if (IsX86 && Name == "avx512.mask.psra.w.128") {
+ Rep = UpgradeX86MaskedShift(Builder, *CI, Intrinsic::x86_sse2_psra_w);
+ } else if (IsX86 && Name == "avx512.mask.psra.w.256") {
+ Rep = UpgradeX86MaskedShift(Builder, *CI, Intrinsic::x86_avx2_psra_w);
+ } else if (IsX86 && Name == "avx512.mask.psrl.d.128") {
+ Rep = UpgradeX86MaskedShift(Builder, *CI, Intrinsic::x86_sse2_psrl_d);
+ } else if (IsX86 && Name == "avx512.mask.psrl.d.256") {
+ Rep = UpgradeX86MaskedShift(Builder, *CI, Intrinsic::x86_avx2_psrl_d);
+ } else if (IsX86 && Name == "avx512.mask.psrl.q.128") {
+ Rep = UpgradeX86MaskedShift(Builder, *CI, Intrinsic::x86_sse2_psrl_q);
+ } else if (IsX86 && Name == "avx512.mask.psrl.q.256") {
+ Rep = UpgradeX86MaskedShift(Builder, *CI, Intrinsic::x86_avx2_psrl_q);
+ } else if (IsX86 && Name == "avx512.mask.psrl.w.128") {
+ Rep = UpgradeX86MaskedShift(Builder, *CI, Intrinsic::x86_sse2_psrl_w);
+ } else if (IsX86 && Name == "avx512.mask.psrl.w.256") {
+ Rep = UpgradeX86MaskedShift(Builder, *CI, Intrinsic::x86_avx2_psrl_w);
} else {
llvm_unreachable("Unknown function for CallInst upgrade.");
}
diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index fd8e3971b0a..4ba0d862dc8 100644
--- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -1147,19 +1147,13 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_pshuf_b_512, INTR_TYPE_2OP_MASK,
X86ISD::PSHUFB, 0),
X86_INTRINSIC_DATA(avx512_mask_psll_d, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
- X86_INTRINSIC_DATA(avx512_mask_psll_d_128, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
- X86_INTRINSIC_DATA(avx512_mask_psll_d_256, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
X86_INTRINSIC_DATA(avx512_mask_psll_di_128, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSHLI, 0),
X86_INTRINSIC_DATA(avx512_mask_psll_di_256, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSHLI, 0),
X86_INTRINSIC_DATA(avx512_mask_psll_di_512, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSHLI, 0),
X86_INTRINSIC_DATA(avx512_mask_psll_q, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
- X86_INTRINSIC_DATA(avx512_mask_psll_q_128, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
- X86_INTRINSIC_DATA(avx512_mask_psll_q_256, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
X86_INTRINSIC_DATA(avx512_mask_psll_qi_128, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSHLI, 0),
X86_INTRINSIC_DATA(avx512_mask_psll_qi_256, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSHLI, 0),
X86_INTRINSIC_DATA(avx512_mask_psll_qi_512, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSHLI, 0),
- X86_INTRINSIC_DATA(avx512_mask_psll_w_128, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
- X86_INTRINSIC_DATA(avx512_mask_psll_w_256, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
X86_INTRINSIC_DATA(avx512_mask_psll_w_512, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
X86_INTRINSIC_DATA(avx512_mask_psll_wi_128, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSHLI, 0),
X86_INTRINSIC_DATA(avx512_mask_psll_wi_256, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSHLI, 0),
@@ -1174,8 +1168,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_psllv8_hi, INTR_TYPE_2OP_MASK, ISD::SHL, 0),
X86_INTRINSIC_DATA(avx512_mask_psllv8_si, INTR_TYPE_2OP_MASK, ISD::SHL, 0),
X86_INTRINSIC_DATA(avx512_mask_psra_d, INTR_TYPE_2OP_MASK, X86ISD::VSRA, 0),
- X86_INTRINSIC_DATA(avx512_mask_psra_d_128, INTR_TYPE_2OP_MASK, X86ISD::VSRA, 0),
- X86_INTRINSIC_DATA(avx512_mask_psra_d_256, INTR_TYPE_2OP_MASK, X86ISD::VSRA, 0),
X86_INTRINSIC_DATA(avx512_mask_psra_di_128, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSRAI, 0),
X86_INTRINSIC_DATA(avx512_mask_psra_di_256, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSRAI, 0),
X86_INTRINSIC_DATA(avx512_mask_psra_di_512, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSRAI, 0),
@@ -1185,8 +1177,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_psra_qi_128, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSRAI, 0),
X86_INTRINSIC_DATA(avx512_mask_psra_qi_256, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSRAI, 0),
X86_INTRINSIC_DATA(avx512_mask_psra_qi_512, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSRAI, 0),
- X86_INTRINSIC_DATA(avx512_mask_psra_w_128, INTR_TYPE_2OP_MASK, X86ISD::VSRA, 0),
- X86_INTRINSIC_DATA(avx512_mask_psra_w_256, INTR_TYPE_2OP_MASK, X86ISD::VSRA, 0),
X86_INTRINSIC_DATA(avx512_mask_psra_w_512, INTR_TYPE_2OP_MASK, X86ISD::VSRA, 0),
X86_INTRINSIC_DATA(avx512_mask_psra_wi_128, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSRAI, 0),
X86_INTRINSIC_DATA(avx512_mask_psra_wi_256, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSRAI, 0),
@@ -1201,19 +1191,13 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_psrav8_hi, INTR_TYPE_2OP_MASK, X86ISD::VSRAV, 0),
X86_INTRINSIC_DATA(avx512_mask_psrav8_si, INTR_TYPE_2OP_MASK, X86ISD::VSRAV, 0),
X86_INTRINSIC_DATA(avx512_mask_psrl_d, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0),
- X86_INTRINSIC_DATA(avx512_mask_psrl_d_128, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0),
- X86_INTRINSIC_DATA(avx512_mask_psrl_d_256, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0),
X86_INTRINSIC_DATA(avx512_mask_psrl_di_128, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSRLI, 0),
X86_INTRINSIC_DATA(avx512_mask_psrl_di_256, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSRLI, 0),
X86_INTRINSIC_DATA(avx512_mask_psrl_di_512, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSRLI, 0),
X86_INTRINSIC_DATA(avx512_mask_psrl_q, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0),
- X86_INTRINSIC_DATA(avx512_mask_psrl_q_128, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0),
- X86_INTRINSIC_DATA(avx512_mask_psrl_q_256, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0),
X86_INTRINSIC_DATA(avx512_mask_psrl_qi_128, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSRLI, 0),
X86_INTRINSIC_DATA(avx512_mask_psrl_qi_256, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSRLI, 0),
X86_INTRINSIC_DATA(avx512_mask_psrl_qi_512, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSRLI, 0),
- X86_INTRINSIC_DATA(avx512_mask_psrl_w_128, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0),
- X86_INTRINSIC_DATA(avx512_mask_psrl_w_256, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0),
X86_INTRINSIC_DATA(avx512_mask_psrl_w_512, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0),
X86_INTRINSIC_DATA(avx512_mask_psrl_wi_128, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSRLI, 0),
X86_INTRINSIC_DATA(avx512_mask_psrl_wi_256, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSRLI, 0),
OpenPOWER on IntegriCloud