From be23a91fcd9870254a8c0f70f8f2cd1b3835f438 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Fri, 1 Feb 2019 14:14:47 +0000 Subject: [InstCombine] try to reduce x86 addcarry to generic uaddo intrinsic If we can reduce the x86-specific intrinsic to the generic op, it allows existing simplifications and value tracking folds. AFAICT, this always results in identical x86 codegen in the non-reduced case...which should be true because we semi-generically (too aggressively IMO) convert to llvm.uadd.with.overflow in CGP, so the DAG/isel must already combine/lower this intrinsic as expected. This isn't quite what was requested in: https://bugs.llvm.org/show_bug.cgi?id=40486 ...but we want to have these kinds of folds early for efficiency and to enable greater simplifications. For the case in the bug report where we have: _addcarry_u64(0, ahi, 0, &ahi) ...this gets completely simplified away in IR. Differential Revision: https://reviews.llvm.org/D57453 llvm-svn: 352870 --- .../Transforms/InstCombine/InstCombineCalls.cpp | 33 ++++++++++++++++++++++ 1 file changed, 33 insertions(+) (limited to 'llvm/lib') diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 7b09fe615f1..80d1144ee1b 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -751,6 +751,33 @@ static Value *simplifyX86movmsk(const IntrinsicInst &II, return nullptr; } +static Value *simplifyX86addcarry(const IntrinsicInst &II, + InstCombiner::BuilderTy &Builder) { + Value *CarryIn = II.getArgOperand(0); + Value *Op1 = II.getArgOperand(1); + Value *Op2 = II.getArgOperand(2); + Type *RetTy = II.getType(); + Type *OpTy = Op1->getType(); + assert(RetTy->getStructElementType(0)->isIntegerTy(8) && + RetTy->getStructElementType(1) == OpTy && OpTy == Op2->getType() && + "Unexpected types for x86 addcarry"); + + // If carry-in is zero, this is just an unsigned add with overflow. + if (match(CarryIn, m_ZeroInt())) { + Value *UAdd = Builder.CreateIntrinsic(Intrinsic::uadd_with_overflow, OpTy, + { Op1, Op2 }); + // The types have to be adjusted to match the x86 call types. + Value *UAddResult = Builder.CreateExtractValue(UAdd, 0); + Value *UAddOV = Builder.CreateZExt(Builder.CreateExtractValue(UAdd, 1), + Builder.getInt8Ty()); + Value *Res = UndefValue::get(II.getType()); + Res = Builder.CreateInsertValue(Res, UAddOV, 0); + return Builder.CreateInsertValue(Res, UAddResult, 1); + } + + return nullptr; +} + static Value *simplifyX86insertps(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder) { auto *CInt = dyn_cast(II.getArgOperand(2)); @@ -3109,6 +3136,12 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { return nullptr; break; + case Intrinsic::x86_addcarry_32: + case Intrinsic::x86_addcarry_64: + if (Value *V = simplifyX86addcarry(*II, Builder)) + return replaceInstUsesWith(*II, V); + break; + case Intrinsic::ppc_altivec_vperm: // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant. // Note that ppc_altivec_vperm has a big-endian bias, so when creating -- cgit v1.2.3