diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2018-12-21 09:04:14 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2018-12-21 09:04:14 +0000 |
| commit | 5d403f6bf8818415fd0425a89ece8a69fb6da155 (patch) | |
| tree | 2d53184bd5bfb5b71c12ffa033cf3fc8a3193dfa /llvm/lib | |
| parent | f069f1c288cbe1068abf8b15ca8f7f33c8005c7b (diff) | |
| download | bcm5719-llvm-5d403f6bf8818415fd0425a89ece8a69fb6da155.tar.gz bcm5719-llvm-5d403f6bf8818415fd0425a89ece8a69fb6da155.zip | |
[X86][SSE] Auto upgrade PADDS/PSUBS intrinsics to SADD_SAT/SSUB_SAT generic intrinsics (llvm)
This auto upgrades the signed SSE saturated math intrinsics to SADD_SAT/SSUB_SAT generic intrinsics.
Clang counterpart: https://reviews.llvm.org/D55890
Differential Revision: https://reviews.llvm.org/D55894
llvm-svn: 349892
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/IR/AutoUpgrade.cpp | 58 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86IntrinsicsInfo.h | 12 | ||||
| -rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 78 |
3 files changed, 23 insertions, 125 deletions
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 7802b2dcd9b..51fef27999b 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -77,10 +77,18 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) { Name == "addcarry.u64" || // Added in 8.0 Name == "subborrow.u32" || // Added in 8.0 Name == "subborrow.u64" || // Added in 8.0 + Name.startswith("sse2.padds.") || // Added in 8.0 + Name.startswith("sse2.psubs.") || // Added in 8.0 Name.startswith("sse2.paddus.") || // Added in 8.0 Name.startswith("sse2.psubus.") || // Added in 8.0 + Name.startswith("avx2.padds.") || // Added in 8.0 + Name.startswith("avx2.psubs.") || // Added in 8.0 Name.startswith("avx2.paddus.") || // Added in 8.0 Name.startswith("avx2.psubus.") || // Added in 8.0 + Name.startswith("avx512.padds.") || // Added in 8.0 + Name.startswith("avx512.psubs.") || // Added in 8.0 + Name.startswith("avx512.mask.padds.") || // Added in 8.0 + Name.startswith("avx512.mask.psubs.") || // Added in 8.0 Name.startswith("avx512.mask.paddus.") || // Added in 8.0 Name.startswith("avx512.mask.psubus.") || // Added in 8.0 Name=="ssse3.pabs.b.128" || // Added in 6.0 @@ -284,8 +292,6 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) { Name.startswith("avx512.mask.max.p") || // Added in 7.0. 128/256 in 5.0 Name.startswith("avx512.mask.min.p") || // Added in 7.0. 128/256 in 5.0 Name.startswith("avx512.mask.fpclass.p") || // Added in 7.0 - Name.startswith("avx512.mask.padds.") || // Added in 8.0 - Name.startswith("avx512.mask.psubs.") || // Added in 8.0 Name == "sse.cvtsi2ss" || // Added in 7.0 Name == "sse.cvtsi642ss" || // Added in 7.0 Name == "sse2.cvtsi2sd" || // Added in 7.0 @@ -928,12 +934,14 @@ static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, } static Value *UpgradeX86AddSubSatIntrinsics(IRBuilder<> &Builder, CallInst &CI, - bool IsAddition) { + bool IsSigned, bool IsAddition) { Type *Ty = CI.getType(); Value *Op0 = CI.getOperand(0); Value *Op1 = CI.getOperand(1); - Intrinsic::ID IID = IsAddition ? Intrinsic::uadd_sat : Intrinsic::usub_sat; + Intrinsic::ID IID = + IsSigned ? (IsAddition ? Intrinsic::sadd_sat : Intrinsic::ssub_sat) + : (IsAddition ? Intrinsic::uadd_sat : Intrinsic::usub_sat); Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty); Value *Res = Builder.CreateCall(Intrin, {Op0, Op1}); @@ -1380,36 +1388,6 @@ static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, IID = Intrinsic::x86_avx512_vpshrd_w_512; else llvm_unreachable("Unexpected intrinsic"); - } else if (Name.startswith("padds.")) { - if (VecWidth == 128 && EltWidth == 8) - IID = Intrinsic::x86_sse2_padds_b; - else if (VecWidth == 256 && EltWidth == 8) - IID = Intrinsic::x86_avx2_padds_b; - else if (VecWidth == 512 && EltWidth == 8) - IID = Intrinsic::x86_avx512_padds_b_512; - else if (VecWidth == 128 && EltWidth == 16) - IID = Intrinsic::x86_sse2_padds_w; - else if (VecWidth == 256 && EltWidth == 16) - IID = Intrinsic::x86_avx2_padds_w; - else if (VecWidth == 512 && EltWidth == 16) - IID = Intrinsic::x86_avx512_padds_w_512; - else - llvm_unreachable("Unexpected intrinsic"); - } else if (Name.startswith("psubs.")) { - if (VecWidth == 128 && EltWidth == 8) - IID = Intrinsic::x86_sse2_psubs_b; - else if (VecWidth == 256 && EltWidth == 8) - IID = Intrinsic::x86_avx2_psubs_b; - else if (VecWidth == 512 && EltWidth == 8) - IID = Intrinsic::x86_avx512_psubs_b_512; - else if (VecWidth == 128 && EltWidth == 16) - IID = Intrinsic::x86_sse2_psubs_w; - else if (VecWidth == 256 && EltWidth == 16) - IID = Intrinsic::x86_avx2_psubs_w; - else if (VecWidth == 512 && EltWidth == 16) - IID = Intrinsic::x86_avx512_psubs_w_512; - else - llvm_unreachable("Unexpected intrinsic"); } else return false; @@ -2093,6 +2071,16 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { if (CI->getNumArgOperands() == 3) Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1)); + } else if (IsX86 && (Name.startswith("sse2.padds.") || + Name.startswith("sse2.psubs.") || + Name.startswith("avx2.padds.") || + Name.startswith("avx2.psubs.") || + Name.startswith("avx512.padds.") || + Name.startswith("avx512.psubs.") || + Name.startswith("avx512.mask.padds.") || + Name.startswith("avx512.mask.psubs."))) { + bool IsAdd = Name.contains(".padds"); + Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI, true, IsAdd); } else if (IsX86 && (Name.startswith("sse2.paddus.") || Name.startswith("sse2.psubus.") || Name.startswith("avx2.paddus.") || @@ -2100,7 +2088,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { Name.startswith("avx512.mask.paddus.") || Name.startswith("avx512.mask.psubus."))) { bool IsAdd = Name.contains(".paddus"); - Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI, IsAdd); + Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI, false, IsAdd); } else if (IsX86 && Name.startswith("avx512.mask.palignr.")) { Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0), CI->getArgOperand(1), diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index dc8b3d62330..ab3fea62982 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -319,8 +319,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx2_packsswb, INTR_TYPE_2OP, X86ISD::PACKSS, 0), X86_INTRINSIC_DATA(avx2_packusdw, INTR_TYPE_2OP, X86ISD::PACKUS, 0), X86_INTRINSIC_DATA(avx2_packuswb, INTR_TYPE_2OP, X86ISD::PACKUS, 0), - X86_INTRINSIC_DATA(avx2_padds_b, INTR_TYPE_2OP, ISD::SADDSAT, 0), - X86_INTRINSIC_DATA(avx2_padds_w, INTR_TYPE_2OP, ISD::SADDSAT, 0), X86_INTRINSIC_DATA(avx2_permd, VPERM_2OP, X86ISD::VPERMV, 0), X86_INTRINSIC_DATA(avx2_permps, VPERM_2OP, X86ISD::VPERMV, 0), X86_INTRINSIC_DATA(avx2_phadd_d, INTR_TYPE_2OP, X86ISD::HADD, 0), @@ -361,8 +359,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx2_psrlv_d_256, INTR_TYPE_2OP, ISD::SRL, 0), X86_INTRINSIC_DATA(avx2_psrlv_q, INTR_TYPE_2OP, ISD::SRL, 0), X86_INTRINSIC_DATA(avx2_psrlv_q_256, INTR_TYPE_2OP, ISD::SRL, 0), - X86_INTRINSIC_DATA(avx2_psubs_b, INTR_TYPE_2OP, ISD::SSUBSAT, 0), - X86_INTRINSIC_DATA(avx2_psubs_w, INTR_TYPE_2OP, ISD::SSUBSAT, 0), X86_INTRINSIC_DATA(avx512_add_pd_512, INTR_TYPE_2OP, ISD::FADD, X86ISD::FADD_RND), X86_INTRINSIC_DATA(avx512_add_ps_512, INTR_TYPE_2OP, ISD::FADD, X86ISD::FADD_RND), X86_INTRINSIC_DATA(avx512_cmp_pd_128, CMP_MASK_CC, X86ISD::CMPM, 0), @@ -920,8 +916,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_packsswb_512, INTR_TYPE_2OP, X86ISD::PACKSS, 0), X86_INTRINSIC_DATA(avx512_packusdw_512, INTR_TYPE_2OP, X86ISD::PACKUS, 0), X86_INTRINSIC_DATA(avx512_packuswb_512, INTR_TYPE_2OP, X86ISD::PACKUS, 0), - X86_INTRINSIC_DATA(avx512_padds_b_512, INTR_TYPE_2OP, ISD::SADDSAT, 0), - X86_INTRINSIC_DATA(avx512_padds_w_512, INTR_TYPE_2OP, ISD::SADDSAT, 0), X86_INTRINSIC_DATA(avx512_permvar_df_256, VPERM_2OP, X86ISD::VPERMV, 0), X86_INTRINSIC_DATA(avx512_permvar_df_512, VPERM_2OP, X86ISD::VPERMV, 0), X86_INTRINSIC_DATA(avx512_permvar_di_256, VPERM_2OP, X86ISD::VPERMV, 0), @@ -980,8 +974,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_psrlv_w_128, INTR_TYPE_2OP, ISD::SRL, 0), X86_INTRINSIC_DATA(avx512_psrlv_w_256, INTR_TYPE_2OP, ISD::SRL, 0), X86_INTRINSIC_DATA(avx512_psrlv_w_512, INTR_TYPE_2OP, ISD::SRL, 0), - X86_INTRINSIC_DATA(avx512_psubs_b_512, INTR_TYPE_2OP, ISD::SSUBSAT, 0), - X86_INTRINSIC_DATA(avx512_psubs_w_512, INTR_TYPE_2OP, ISD::SSUBSAT, 0), X86_INTRINSIC_DATA(avx512_pternlog_d_128, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0), X86_INTRINSIC_DATA(avx512_pternlog_d_256, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0), X86_INTRINSIC_DATA(avx512_pternlog_d_512, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0), @@ -1144,8 +1136,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(sse2_packssdw_128, INTR_TYPE_2OP, X86ISD::PACKSS, 0), X86_INTRINSIC_DATA(sse2_packsswb_128, INTR_TYPE_2OP, X86ISD::PACKSS, 0), X86_INTRINSIC_DATA(sse2_packuswb_128, INTR_TYPE_2OP, X86ISD::PACKUS, 0), - X86_INTRINSIC_DATA(sse2_padds_b, INTR_TYPE_2OP, ISD::SADDSAT, 0), - X86_INTRINSIC_DATA(sse2_padds_w, INTR_TYPE_2OP, ISD::SADDSAT, 0), X86_INTRINSIC_DATA(sse2_pmadd_wd, INTR_TYPE_2OP, X86ISD::VPMADDWD, 0), X86_INTRINSIC_DATA(sse2_pmovmskb_128, INTR_TYPE_1OP, X86ISD::MOVMSK, 0), X86_INTRINSIC_DATA(sse2_pmulh_w, INTR_TYPE_2OP, ISD::MULHS, 0), @@ -1167,8 +1157,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(sse2_psrli_d, VSHIFT, X86ISD::VSRLI, 0), X86_INTRINSIC_DATA(sse2_psrli_q, VSHIFT, X86ISD::VSRLI, 0), X86_INTRINSIC_DATA(sse2_psrli_w, VSHIFT, X86ISD::VSRLI, 0), - X86_INTRINSIC_DATA(sse2_psubs_b, INTR_TYPE_2OP, ISD::SSUBSAT, 0), - X86_INTRINSIC_DATA(sse2_psubs_w, INTR_TYPE_2OP, ISD::SSUBSAT, 0), X86_INTRINSIC_DATA(sse2_ucomieq_sd, COMI, X86ISD::UCOMI, ISD::SETEQ), X86_INTRINSIC_DATA(sse2_ucomige_sd, COMI, X86ISD::UCOMI, ISD::SETGE), X86_INTRINSIC_DATA(sse2_ucomigt_sd, COMI, X86ISD::UCOMI, ISD::SETGT), diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 3e6a4965336..dbbe32b5636 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -248,67 +248,6 @@ Instruction *InstCombiner::SimplifyAnyMemSet(AnyMemSetInst *MI) { return nullptr; } -static Value *simplifyX86AddsSubs(const IntrinsicInst &II, - InstCombiner::BuilderTy &Builder) { - bool IsAddition; - - switch (II.getIntrinsicID()) { - default: llvm_unreachable("Unexpected intrinsic!"); - case Intrinsic::x86_sse2_padds_b: - case Intrinsic::x86_sse2_padds_w: - case Intrinsic::x86_avx2_padds_b: - case Intrinsic::x86_avx2_padds_w: - case Intrinsic::x86_avx512_padds_b_512: - case Intrinsic::x86_avx512_padds_w_512: - IsAddition = true; - break; - case Intrinsic::x86_sse2_psubs_b: - case Intrinsic::x86_sse2_psubs_w: - case Intrinsic::x86_avx2_psubs_b: - case Intrinsic::x86_avx2_psubs_w: - case Intrinsic::x86_avx512_psubs_b_512: - case Intrinsic::x86_avx512_psubs_w_512: - IsAddition = false; - break; - } - - auto *Arg0 = dyn_cast<Constant>(II.getOperand(0)); - auto *Arg1 = dyn_cast<Constant>(II.getOperand(1)); - auto VT = cast<VectorType>(II.getType()); - auto SVT = VT->getElementType(); - unsigned NumElems = VT->getNumElements(); - - if (!Arg0 || !Arg1) - return nullptr; - - SmallVector<Constant *, 64> Result; - - APInt MaxValue = APInt::getSignedMaxValue(SVT->getIntegerBitWidth()); - APInt MinValue = APInt::getSignedMinValue(SVT->getIntegerBitWidth()); - for (unsigned i = 0; i < NumElems; ++i) { - auto *Elt0 = Arg0->getAggregateElement(i); - auto *Elt1 = Arg1->getAggregateElement(i); - if (isa<UndefValue>(Elt0) || isa<UndefValue>(Elt1)) { - Result.push_back(UndefValue::get(SVT)); - continue; - } - - if (!isa<ConstantInt>(Elt0) || !isa<ConstantInt>(Elt1)) - return nullptr; - - const APInt &Val0 = cast<ConstantInt>(Elt0)->getValue(); - const APInt &Val1 = cast<ConstantInt>(Elt1)->getValue(); - bool Overflow = false; - APInt ResultElem = IsAddition ? Val0.sadd_ov(Val1, Overflow) - : Val0.ssub_ov(Val1, Overflow); - if (Overflow) - ResultElem = Val0.isNegative() ? MinValue : MaxValue; - Result.push_back(Constant::getIntegerValue(SVT, ResultElem)); - } - - return ConstantVector::get(Result); -} - static Value *simplifyX86immShift(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder) { bool LogicalShift = false; @@ -2789,23 +2728,6 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { break; } - // Constant fold add/sub with saturation intrinsics. - case Intrinsic::x86_sse2_padds_b: - case Intrinsic::x86_sse2_padds_w: - case Intrinsic::x86_sse2_psubs_b: - case Intrinsic::x86_sse2_psubs_w: - case Intrinsic::x86_avx2_padds_b: - case Intrinsic::x86_avx2_padds_w: - case Intrinsic::x86_avx2_psubs_b: - case Intrinsic::x86_avx2_psubs_w: - case Intrinsic::x86_avx512_padds_b_512: - case Intrinsic::x86_avx512_padds_w_512: - case Intrinsic::x86_avx512_psubs_b_512: - case Intrinsic::x86_avx512_psubs_w_512: - if (Value *V = simplifyX86AddsSubs(*II, Builder)) - return replaceInstUsesWith(*II, V); - break; - // Constant fold ashr( <A x Bi>, Ci ). // Constant fold lshr( <A x Bi>, Ci ). // Constant fold shl( <A x Bi>, Ci ). |

