summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2018-12-21 09:04:14 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2018-12-21 09:04:14 +0000
commit5d403f6bf8818415fd0425a89ece8a69fb6da155 (patch)
tree2d53184bd5bfb5b71c12ffa033cf3fc8a3193dfa /llvm/lib
parentf069f1c288cbe1068abf8b15ca8f7f33c8005c7b (diff)
downloadbcm5719-llvm-5d403f6bf8818415fd0425a89ece8a69fb6da155.tar.gz
bcm5719-llvm-5d403f6bf8818415fd0425a89ece8a69fb6da155.zip
[X86][SSE] Auto upgrade PADDS/PSUBS intrinsics to SADD_SAT/SSUB_SAT generic intrinsics (llvm)
This auto upgrades the signed SSE saturated math intrinsics to SADD_SAT/SSUB_SAT generic intrinsics. Clang counterpart: https://reviews.llvm.org/D55890 Differential Revision: https://reviews.llvm.org/D55894 llvm-svn: 349892
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/IR/AutoUpgrade.cpp58
-rw-r--r--llvm/lib/Target/X86/X86IntrinsicsInfo.h12
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp78
3 files changed, 23 insertions, 125 deletions
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index 7802b2dcd9b..51fef27999b 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -77,10 +77,18 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
Name == "addcarry.u64" || // Added in 8.0
Name == "subborrow.u32" || // Added in 8.0
Name == "subborrow.u64" || // Added in 8.0
+ Name.startswith("sse2.padds.") || // Added in 8.0
+ Name.startswith("sse2.psubs.") || // Added in 8.0
Name.startswith("sse2.paddus.") || // Added in 8.0
Name.startswith("sse2.psubus.") || // Added in 8.0
+ Name.startswith("avx2.padds.") || // Added in 8.0
+ Name.startswith("avx2.psubs.") || // Added in 8.0
Name.startswith("avx2.paddus.") || // Added in 8.0
Name.startswith("avx2.psubus.") || // Added in 8.0
+ Name.startswith("avx512.padds.") || // Added in 8.0
+ Name.startswith("avx512.psubs.") || // Added in 8.0
+ Name.startswith("avx512.mask.padds.") || // Added in 8.0
+ Name.startswith("avx512.mask.psubs.") || // Added in 8.0
Name.startswith("avx512.mask.paddus.") || // Added in 8.0
Name.startswith("avx512.mask.psubus.") || // Added in 8.0
Name=="ssse3.pabs.b.128" || // Added in 6.0
@@ -284,8 +292,6 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
Name.startswith("avx512.mask.max.p") || // Added in 7.0. 128/256 in 5.0
Name.startswith("avx512.mask.min.p") || // Added in 7.0. 128/256 in 5.0
Name.startswith("avx512.mask.fpclass.p") || // Added in 7.0
- Name.startswith("avx512.mask.padds.") || // Added in 8.0
- Name.startswith("avx512.mask.psubs.") || // Added in 8.0
Name == "sse.cvtsi2ss" || // Added in 7.0
Name == "sse.cvtsi642ss" || // Added in 7.0
Name == "sse2.cvtsi2sd" || // Added in 7.0
@@ -928,12 +934,14 @@ static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
}
static Value *UpgradeX86AddSubSatIntrinsics(IRBuilder<> &Builder, CallInst &CI,
- bool IsAddition) {
+ bool IsSigned, bool IsAddition) {
Type *Ty = CI.getType();
Value *Op0 = CI.getOperand(0);
Value *Op1 = CI.getOperand(1);
- Intrinsic::ID IID = IsAddition ? Intrinsic::uadd_sat : Intrinsic::usub_sat;
+ Intrinsic::ID IID =
+ IsSigned ? (IsAddition ? Intrinsic::sadd_sat : Intrinsic::ssub_sat)
+ : (IsAddition ? Intrinsic::uadd_sat : Intrinsic::usub_sat);
Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
Value *Res = Builder.CreateCall(Intrin, {Op0, Op1});
@@ -1380,36 +1388,6 @@ static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
IID = Intrinsic::x86_avx512_vpshrd_w_512;
else
llvm_unreachable("Unexpected intrinsic");
- } else if (Name.startswith("padds.")) {
- if (VecWidth == 128 && EltWidth == 8)
- IID = Intrinsic::x86_sse2_padds_b;
- else if (VecWidth == 256 && EltWidth == 8)
- IID = Intrinsic::x86_avx2_padds_b;
- else if (VecWidth == 512 && EltWidth == 8)
- IID = Intrinsic::x86_avx512_padds_b_512;
- else if (VecWidth == 128 && EltWidth == 16)
- IID = Intrinsic::x86_sse2_padds_w;
- else if (VecWidth == 256 && EltWidth == 16)
- IID = Intrinsic::x86_avx2_padds_w;
- else if (VecWidth == 512 && EltWidth == 16)
- IID = Intrinsic::x86_avx512_padds_w_512;
- else
- llvm_unreachable("Unexpected intrinsic");
- } else if (Name.startswith("psubs.")) {
- if (VecWidth == 128 && EltWidth == 8)
- IID = Intrinsic::x86_sse2_psubs_b;
- else if (VecWidth == 256 && EltWidth == 8)
- IID = Intrinsic::x86_avx2_psubs_b;
- else if (VecWidth == 512 && EltWidth == 8)
- IID = Intrinsic::x86_avx512_psubs_b_512;
- else if (VecWidth == 128 && EltWidth == 16)
- IID = Intrinsic::x86_sse2_psubs_w;
- else if (VecWidth == 256 && EltWidth == 16)
- IID = Intrinsic::x86_avx2_psubs_w;
- else if (VecWidth == 512 && EltWidth == 16)
- IID = Intrinsic::x86_avx512_psubs_w_512;
- else
- llvm_unreachable("Unexpected intrinsic");
} else
return false;
@@ -2093,6 +2071,16 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
if (CI->getNumArgOperands() == 3)
Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
CI->getArgOperand(1));
+ } else if (IsX86 && (Name.startswith("sse2.padds.") ||
+ Name.startswith("sse2.psubs.") ||
+ Name.startswith("avx2.padds.") ||
+ Name.startswith("avx2.psubs.") ||
+ Name.startswith("avx512.padds.") ||
+ Name.startswith("avx512.psubs.") ||
+ Name.startswith("avx512.mask.padds.") ||
+ Name.startswith("avx512.mask.psubs."))) {
+ bool IsAdd = Name.contains(".padds");
+ Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI, true, IsAdd);
} else if (IsX86 && (Name.startswith("sse2.paddus.") ||
Name.startswith("sse2.psubus.") ||
Name.startswith("avx2.paddus.") ||
@@ -2100,7 +2088,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Name.startswith("avx512.mask.paddus.") ||
Name.startswith("avx512.mask.psubus."))) {
bool IsAdd = Name.contains(".paddus");
- Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI, IsAdd);
+ Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI, false, IsAdd);
} else if (IsX86 && Name.startswith("avx512.mask.palignr.")) {
Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
CI->getArgOperand(1),
diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index dc8b3d62330..ab3fea62982 100644
--- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -319,8 +319,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx2_packsswb, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(avx2_packusdw, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
X86_INTRINSIC_DATA(avx2_packuswb, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
- X86_INTRINSIC_DATA(avx2_padds_b, INTR_TYPE_2OP, ISD::SADDSAT, 0),
- X86_INTRINSIC_DATA(avx2_padds_w, INTR_TYPE_2OP, ISD::SADDSAT, 0),
X86_INTRINSIC_DATA(avx2_permd, VPERM_2OP, X86ISD::VPERMV, 0),
X86_INTRINSIC_DATA(avx2_permps, VPERM_2OP, X86ISD::VPERMV, 0),
X86_INTRINSIC_DATA(avx2_phadd_d, INTR_TYPE_2OP, X86ISD::HADD, 0),
@@ -361,8 +359,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx2_psrlv_d_256, INTR_TYPE_2OP, ISD::SRL, 0),
X86_INTRINSIC_DATA(avx2_psrlv_q, INTR_TYPE_2OP, ISD::SRL, 0),
X86_INTRINSIC_DATA(avx2_psrlv_q_256, INTR_TYPE_2OP, ISD::SRL, 0),
- X86_INTRINSIC_DATA(avx2_psubs_b, INTR_TYPE_2OP, ISD::SSUBSAT, 0),
- X86_INTRINSIC_DATA(avx2_psubs_w, INTR_TYPE_2OP, ISD::SSUBSAT, 0),
X86_INTRINSIC_DATA(avx512_add_pd_512, INTR_TYPE_2OP, ISD::FADD, X86ISD::FADD_RND),
X86_INTRINSIC_DATA(avx512_add_ps_512, INTR_TYPE_2OP, ISD::FADD, X86ISD::FADD_RND),
X86_INTRINSIC_DATA(avx512_cmp_pd_128, CMP_MASK_CC, X86ISD::CMPM, 0),
@@ -920,8 +916,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_packsswb_512, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(avx512_packusdw_512, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
X86_INTRINSIC_DATA(avx512_packuswb_512, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
- X86_INTRINSIC_DATA(avx512_padds_b_512, INTR_TYPE_2OP, ISD::SADDSAT, 0),
- X86_INTRINSIC_DATA(avx512_padds_w_512, INTR_TYPE_2OP, ISD::SADDSAT, 0),
X86_INTRINSIC_DATA(avx512_permvar_df_256, VPERM_2OP, X86ISD::VPERMV, 0),
X86_INTRINSIC_DATA(avx512_permvar_df_512, VPERM_2OP, X86ISD::VPERMV, 0),
X86_INTRINSIC_DATA(avx512_permvar_di_256, VPERM_2OP, X86ISD::VPERMV, 0),
@@ -980,8 +974,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_psrlv_w_128, INTR_TYPE_2OP, ISD::SRL, 0),
X86_INTRINSIC_DATA(avx512_psrlv_w_256, INTR_TYPE_2OP, ISD::SRL, 0),
X86_INTRINSIC_DATA(avx512_psrlv_w_512, INTR_TYPE_2OP, ISD::SRL, 0),
- X86_INTRINSIC_DATA(avx512_psubs_b_512, INTR_TYPE_2OP, ISD::SSUBSAT, 0),
- X86_INTRINSIC_DATA(avx512_psubs_w_512, INTR_TYPE_2OP, ISD::SSUBSAT, 0),
X86_INTRINSIC_DATA(avx512_pternlog_d_128, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0),
X86_INTRINSIC_DATA(avx512_pternlog_d_256, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0),
X86_INTRINSIC_DATA(avx512_pternlog_d_512, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0),
@@ -1144,8 +1136,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(sse2_packssdw_128, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(sse2_packsswb_128, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(sse2_packuswb_128, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
- X86_INTRINSIC_DATA(sse2_padds_b, INTR_TYPE_2OP, ISD::SADDSAT, 0),
- X86_INTRINSIC_DATA(sse2_padds_w, INTR_TYPE_2OP, ISD::SADDSAT, 0),
X86_INTRINSIC_DATA(sse2_pmadd_wd, INTR_TYPE_2OP, X86ISD::VPMADDWD, 0),
X86_INTRINSIC_DATA(sse2_pmovmskb_128, INTR_TYPE_1OP, X86ISD::MOVMSK, 0),
X86_INTRINSIC_DATA(sse2_pmulh_w, INTR_TYPE_2OP, ISD::MULHS, 0),
@@ -1167,8 +1157,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(sse2_psrli_d, VSHIFT, X86ISD::VSRLI, 0),
X86_INTRINSIC_DATA(sse2_psrli_q, VSHIFT, X86ISD::VSRLI, 0),
X86_INTRINSIC_DATA(sse2_psrli_w, VSHIFT, X86ISD::VSRLI, 0),
- X86_INTRINSIC_DATA(sse2_psubs_b, INTR_TYPE_2OP, ISD::SSUBSAT, 0),
- X86_INTRINSIC_DATA(sse2_psubs_w, INTR_TYPE_2OP, ISD::SSUBSAT, 0),
X86_INTRINSIC_DATA(sse2_ucomieq_sd, COMI, X86ISD::UCOMI, ISD::SETEQ),
X86_INTRINSIC_DATA(sse2_ucomige_sd, COMI, X86ISD::UCOMI, ISD::SETGE),
X86_INTRINSIC_DATA(sse2_ucomigt_sd, COMI, X86ISD::UCOMI, ISD::SETGT),
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 3e6a4965336..dbbe32b5636 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -248,67 +248,6 @@ Instruction *InstCombiner::SimplifyAnyMemSet(AnyMemSetInst *MI) {
return nullptr;
}
-static Value *simplifyX86AddsSubs(const IntrinsicInst &II,
- InstCombiner::BuilderTy &Builder) {
- bool IsAddition;
-
- switch (II.getIntrinsicID()) {
- default: llvm_unreachable("Unexpected intrinsic!");
- case Intrinsic::x86_sse2_padds_b:
- case Intrinsic::x86_sse2_padds_w:
- case Intrinsic::x86_avx2_padds_b:
- case Intrinsic::x86_avx2_padds_w:
- case Intrinsic::x86_avx512_padds_b_512:
- case Intrinsic::x86_avx512_padds_w_512:
- IsAddition = true;
- break;
- case Intrinsic::x86_sse2_psubs_b:
- case Intrinsic::x86_sse2_psubs_w:
- case Intrinsic::x86_avx2_psubs_b:
- case Intrinsic::x86_avx2_psubs_w:
- case Intrinsic::x86_avx512_psubs_b_512:
- case Intrinsic::x86_avx512_psubs_w_512:
- IsAddition = false;
- break;
- }
-
- auto *Arg0 = dyn_cast<Constant>(II.getOperand(0));
- auto *Arg1 = dyn_cast<Constant>(II.getOperand(1));
- auto VT = cast<VectorType>(II.getType());
- auto SVT = VT->getElementType();
- unsigned NumElems = VT->getNumElements();
-
- if (!Arg0 || !Arg1)
- return nullptr;
-
- SmallVector<Constant *, 64> Result;
-
- APInt MaxValue = APInt::getSignedMaxValue(SVT->getIntegerBitWidth());
- APInt MinValue = APInt::getSignedMinValue(SVT->getIntegerBitWidth());
- for (unsigned i = 0; i < NumElems; ++i) {
- auto *Elt0 = Arg0->getAggregateElement(i);
- auto *Elt1 = Arg1->getAggregateElement(i);
- if (isa<UndefValue>(Elt0) || isa<UndefValue>(Elt1)) {
- Result.push_back(UndefValue::get(SVT));
- continue;
- }
-
- if (!isa<ConstantInt>(Elt0) || !isa<ConstantInt>(Elt1))
- return nullptr;
-
- const APInt &Val0 = cast<ConstantInt>(Elt0)->getValue();
- const APInt &Val1 = cast<ConstantInt>(Elt1)->getValue();
- bool Overflow = false;
- APInt ResultElem = IsAddition ? Val0.sadd_ov(Val1, Overflow)
- : Val0.ssub_ov(Val1, Overflow);
- if (Overflow)
- ResultElem = Val0.isNegative() ? MinValue : MaxValue;
- Result.push_back(Constant::getIntegerValue(SVT, ResultElem));
- }
-
- return ConstantVector::get(Result);
-}
-
static Value *simplifyX86immShift(const IntrinsicInst &II,
InstCombiner::BuilderTy &Builder) {
bool LogicalShift = false;
@@ -2789,23 +2728,6 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
break;
}
- // Constant fold add/sub with saturation intrinsics.
- case Intrinsic::x86_sse2_padds_b:
- case Intrinsic::x86_sse2_padds_w:
- case Intrinsic::x86_sse2_psubs_b:
- case Intrinsic::x86_sse2_psubs_w:
- case Intrinsic::x86_avx2_padds_b:
- case Intrinsic::x86_avx2_padds_w:
- case Intrinsic::x86_avx2_psubs_b:
- case Intrinsic::x86_avx2_psubs_w:
- case Intrinsic::x86_avx512_padds_b_512:
- case Intrinsic::x86_avx512_padds_w_512:
- case Intrinsic::x86_avx512_psubs_b_512:
- case Intrinsic::x86_avx512_psubs_w_512:
- if (Value *V = simplifyX86AddsSubs(*II, Builder))
- return replaceInstUsesWith(*II, V);
- break;
-
// Constant fold ashr( <A x Bi>, Ci ).
// Constant fold lshr( <A x Bi>, Ci ).
// Constant fold shl( <A x Bi>, Ci ).
OpenPOWER on IntegriCloud