summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms
diff options
context:
space:
mode:
authorSanjay Patel <spatel@rotateright.com>2019-02-01 14:14:47 +0000
committerSanjay Patel <spatel@rotateright.com>2019-02-01 14:14:47 +0000
commitbe23a91fcd9870254a8c0f70f8f2cd1b3835f438 (patch)
treeb962e265d76d66ba0efe60dc4988e3ad9bcec316 /llvm/lib/Transforms
parentd9e66e1b44cfdbecde9143c112a93490c8c0223f (diff)
downloadbcm5719-llvm-be23a91fcd9870254a8c0f70f8f2cd1b3835f438.tar.gz
bcm5719-llvm-be23a91fcd9870254a8c0f70f8f2cd1b3835f438.zip
[InstCombine] try to reduce x86 addcarry to generic uaddo intrinsic
If we can reduce the x86-specific intrinsic to the generic op, it allows existing simplifications and value tracking folds. AFAICT, this always results in identical x86 codegen in the non-reduced case...which should be true because we semi-generically (too aggressively IMO) convert to llvm.uadd.with.overflow in CGP, so the DAG/isel must already combine/lower this intrinsic as expected. This isn't quite what was requested in: https://bugs.llvm.org/show_bug.cgi?id=40486 ...but we want to have these kinds of folds early for efficiency and to enable greater simplifications. For the case in the bug report where we have: _addcarry_u64(0, ahi, 0, &ahi) ...this gets completely simplified away in IR. Differential Revision: https://reviews.llvm.org/D57453 llvm-svn: 352870
Diffstat (limited to 'llvm/lib/Transforms')
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp33
1 files changed, 33 insertions, 0 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 7b09fe615f1..80d1144ee1b 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -751,6 +751,33 @@ static Value *simplifyX86movmsk(const IntrinsicInst &II,
return nullptr;
}
+static Value *simplifyX86addcarry(const IntrinsicInst &II,
+ InstCombiner::BuilderTy &Builder) {
+ Value *CarryIn = II.getArgOperand(0);
+ Value *Op1 = II.getArgOperand(1);
+ Value *Op2 = II.getArgOperand(2);
+ Type *RetTy = II.getType();
+ Type *OpTy = Op1->getType();
+ assert(RetTy->getStructElementType(0)->isIntegerTy(8) &&
+ RetTy->getStructElementType(1) == OpTy && OpTy == Op2->getType() &&
+ "Unexpected types for x86 addcarry");
+
+ // If carry-in is zero, this is just an unsigned add with overflow.
+ if (match(CarryIn, m_ZeroInt())) {
+ Value *UAdd = Builder.CreateIntrinsic(Intrinsic::uadd_with_overflow, OpTy,
+ { Op1, Op2 });
+ // The types have to be adjusted to match the x86 call types.
+ Value *UAddResult = Builder.CreateExtractValue(UAdd, 0);
+ Value *UAddOV = Builder.CreateZExt(Builder.CreateExtractValue(UAdd, 1),
+ Builder.getInt8Ty());
+ Value *Res = UndefValue::get(II.getType());
+ Res = Builder.CreateInsertValue(Res, UAddOV, 0);
+ return Builder.CreateInsertValue(Res, UAddResult, 1);
+ }
+
+ return nullptr;
+}
+
static Value *simplifyX86insertps(const IntrinsicInst &II,
InstCombiner::BuilderTy &Builder) {
auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2));
@@ -3109,6 +3136,12 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
return nullptr;
break;
+ case Intrinsic::x86_addcarry_32:
+ case Intrinsic::x86_addcarry_64:
+ if (Value *V = simplifyX86addcarry(*II, Builder))
+ return replaceInstUsesWith(*II, V);
+ break;
+
case Intrinsic::ppc_altivec_vperm:
// Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
// Note that ppc_altivec_vperm has a big-endian bias, so when creating
OpenPOWER on IntegriCloud