summaryrefslogtreecommitdiffstats
path: root/llvm/lib/IR/AutoUpgrade.cpp
diff options
context:
space:
mode:
authorAlexander Ivchenko <alexander.ivchenko@intel.com>2018-04-19 12:13:30 +0000
committerAlexander Ivchenko <alexander.ivchenko@intel.com>2018-04-19 12:13:30 +0000
commite8fed1546e99e87bcad937f67cff2b5defcbe09c (patch)
tree74dc198633392aa19d83f43e7de4fa9bdb3d28cb /llvm/lib/IR/AutoUpgrade.cpp
parent9a175bc1bc171a03854c2ed177fbbea7390745a8 (diff)
downloadbcm5719-llvm-e8fed1546e99e87bcad937f67cff2b5defcbe09c.tar.gz
bcm5719-llvm-e8fed1546e99e87bcad937f67cff2b5defcbe09c.zip
Lowering x86 adds/addus/subs/subus intrinsics (llvm part)
This is the patch that lowers x86 intrinsics to native IR in order to enable optimizations. The patch also includes folding of previously missing saturation patterns so that IR emits the same machine instructions as the intrinsics. Patch by tkrupa Differential Revision: https://reviews.llvm.org/D44785 llvm-svn: 330322
Diffstat (limited to 'llvm/lib/IR/AutoUpgrade.cpp')
-rw-r--r--llvm/lib/IR/AutoUpgrade.cpp106
1 files changed, 104 insertions, 2 deletions
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index f3193f5893c..d7d045ffa07 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -84,7 +84,19 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
// like to use this information to remove upgrade code for some older
// intrinsics. It is currently undecided how we will determine that future
// point.
- if (Name=="ssse3.pabs.b.128" || // Added in 6.0
+ if (Name.startswith("sse2.padds") || // Added in 7.0
+ Name.startswith("sse2.paddus") || // Added in 7.0
+ Name.startswith("sse2.psubs") || // Added in 7.0
+ Name.startswith("sse2.psubus") || // Added in 7.0
+ Name.startswith("avx2.padds") || // Added in 7.0
+ Name.startswith("avx2.paddus") || // Added in 7.0
+ Name.startswith("avx2.psubs") || // Added in 7.0
+ Name.startswith("avx2.psubus") || // Added in 7.0
+ Name.startswith("avx512.mask.padds") || // Added in 7.0
+ Name.startswith("avx512.mask.paddus") || // Added in 7.0
+ Name.startswith("avx512.mask.psubs") || // Added in 7.0
+ Name.startswith("avx512.mask.psubus") || // Added in 7.0
+ Name=="ssse3.pabs.b.128" || // Added in 6.0
Name=="ssse3.pabs.w.128" || // Added in 6.0
Name=="ssse3.pabs.d.128" || // Added in 6.0
Name.startswith("avx512.mask.shuf.i") || // Added in 6.0
@@ -845,6 +857,77 @@ static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
return EmitX86Select(Builder, Mask, Align, Passthru);
}
+static Value *UpgradeX86AddSubSatIntrinsics(IRBuilder<> &Builder, CallInst &CI,
+ bool IsSigned, bool IsAddition) {
+ // Get elements.
+ Value *Op0 = CI.getArgOperand(0);
+ Value *Op1 = CI.getArgOperand(1);
+
+ // Extend elements.
+ Type *ResultType = CI.getType();
+ unsigned NumElts = ResultType->getVectorNumElements();
+
+ Value *Res;
+ if (!IsAddition && !IsSigned) {
+ Value *ICmp = Builder.CreateICmp(ICmpInst::ICMP_UGT, Op0, Op1);
+ Value *Select = Builder.CreateSelect(ICmp, Op0, Op1);
+ Res = Builder.CreateSub(Select, Op1);
+ } else {
+ Type *EltType = ResultType->getVectorElementType();
+ Type *ExtEltType = EltType == Builder.getInt8Ty() ? Builder.getInt16Ty()
+ : Builder.getInt32Ty();
+ Type *ExtVT = VectorType::get(ExtEltType, NumElts);
+ Op0 = IsSigned ? Builder.CreateSExt(Op0, ExtVT)
+ : Builder.CreateZExt(Op0, ExtVT);
+ Op1 = IsSigned ? Builder.CreateSExt(Op1, ExtVT)
+ : Builder.CreateZExt(Op1, ExtVT);
+
+ // Perform addition/substraction.
+ Res = IsAddition ? Builder.CreateAdd(Op0, Op1)
+ : Builder.CreateSub(Op0, Op1);
+
+ // Create a vector of maximum values of not extended type
+ // (if overflow occurs, it will be saturated to that value).
+ unsigned EltSizeInBits = EltType->getPrimitiveSizeInBits();
+ APInt MaxInt = IsSigned ? APInt::getSignedMaxValue(EltSizeInBits)
+ : APInt::getMaxValue(EltSizeInBits);
+ Value *MaxVec = ConstantInt::get(ResultType, MaxInt);
+ // Extend so that it can be compared to result of add/sub.
+ MaxVec = IsSigned ? Builder.CreateSExt(MaxVec, ExtVT)
+ : Builder.CreateZExt(MaxVec, ExtVT);
+
+ // Saturate overflow.
+ ICmpInst::Predicate Pred = IsSigned ? ICmpInst::ICMP_SLE
+ : ICmpInst::ICMP_ULE;
+ Value *Cmp = Builder.CreateICmp(Pred, Res,
+ MaxVec); // 1 if no overflow.
+ Res = Builder.CreateSelect(Cmp, Res,
+ MaxVec); // If overflowed, copy from max vec.
+
+ // Saturate underflow.
+ if (IsSigned) {
+ APInt MinInt = APInt::getSignedMinValue(EltSizeInBits);
+ Value *MinVec = ConstantInt::get(ResultType, MinInt);
+ // Extend so that it can be compared to result of add/sub.
+ MinVec = Builder.CreateSExt(MinVec, ExtVT);
+ Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SGT, Res,
+ MinVec); // 1 if no underflow.
+ Res = Builder.CreateSelect(Cmp, Res,
+ MinVec); // If underflowed, copy from min vec.
+ }
+
+ // Truncate to original type.
+ Res = Builder.CreateTrunc(Res, ResultType);
+ }
+
+ if (CI.getNumArgOperands() == 4) { // For masked intrinsics.
+ Value *VecSRC = CI.getArgOperand(2);
+ Value *Mask = CI.getArgOperand(3);
+ Res = EmitX86Select(Builder, Mask, Res, VecSRC);
+ }
+ return Res;
+}
+
static Value *UpgradeMaskedStore(IRBuilder<> &Builder,
Value *Ptr, Value *Data, Value *Mask,
bool Aligned) {
@@ -1684,6 +1767,26 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
ShuffleMask);
Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
CI->getArgOperand(1));
+ } else if (IsX86 && (Name.startswith("sse2.padds") ||
+ Name.startswith("avx2.padds") ||
+ Name.startswith("avx512.mask.padds"))) {
+ Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI,
+ true, true); // Signed add.
+ } else if (IsX86 && (Name.startswith("sse2.paddus") ||
+ Name.startswith("avx2.paddus") ||
+ Name.startswith("avx512.mask.paddus"))) {
+ Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI,
+ false, true); // Unsigned add.
+ } else if (IsX86 && (Name.startswith("sse2.psubs") ||
+ Name.startswith("avx2.psubs") ||
+ Name.startswith("avx512.mask.psubs"))) {
+ Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI,
+ true, false); // Signed sub.
+ } else if (IsX86 && (Name.startswith("sse2.psubus") ||
+ Name.startswith("avx2.psubus") ||
+ Name.startswith("avx512.mask.psubus"))) {
+ Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI,
+ false, false); // Unsigned sub.
} else if (IsX86 && (Name.startswith("avx2.pbroadcast") ||
Name.startswith("avx2.vbroadcast") ||
Name.startswith("avx512.pbroadcast") ||
@@ -1694,7 +1797,6 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Type *MaskTy = VectorType::get(Type::getInt32Ty(C), NumElts);
Rep = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()),
Constant::getNullValue(MaskTy));
-
if (CI->getNumArgOperands() == 3)
Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
CI->getArgOperand(1));
OpenPOWER on IntegriCloud