diff options
| -rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp | 114 | ||||
| -rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineInternal.h | 1 | ||||
| -rw-r--r-- | llvm/test/Transforms/InstCombine/narrow-math.ll | 60 |
3 files changed, 74 insertions, 101 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 73328666c09..c0fd14b2644 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1032,6 +1032,47 @@ static Instruction *canonicalizeLowbitMask(BinaryOperator &I, return BinaryOperator::CreateNot(NotMask, I.getName()); } +/// Try to narrow the width of an 'add' if at least 1 operand is an extend of +/// of a value. This requires a potentially expensive known bits check to make +/// sure the narrow op does not overflow. +Instruction *InstCombiner::narrowAddIfNoOverflow(BinaryOperator &I) { + // We need at least one extended operand. + Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); + Value *X; + bool IsSext = match(LHS, m_SExt(m_Value(X))); + if (!IsSext && !match(LHS, m_ZExt(m_Value(X)))) + return nullptr; + + // If both operands are the same extension from the same source type and we + // can eliminate at least one (hasOneUse), this might work. + CastInst::CastOps CastOpc = IsSext ? Instruction::SExt : Instruction::ZExt; + Value *Y; + if (!(match(RHS, m_ZExtOrSExt(m_Value(Y))) && X->getType() == Y->getType() && + cast<Operator>(RHS)->getOpcode() == CastOpc && + (LHS->hasOneUse() || RHS->hasOneUse()))) { + // If that did not match, see if the RHS is a constant. Truncating and + // extending must produce the same constant. + Constant *WideC; + if (!LHS->hasOneUse() || !match(RHS, m_Constant(WideC))) + return nullptr; + Constant *NarrowC = ConstantExpr::getTrunc(WideC, X->getType()); + if (ConstantExpr::getCast(CastOpc, NarrowC, I.getType()) != WideC) + return nullptr; + Y = NarrowC; + } + // Both operands have narrow versions. Last step: the math must not overflow + // in the narrow width. + bool WillNotOverflow = IsSext ? willNotOverflowSignedAdd(X, Y, I) + : willNotOverflowUnsignedAdd(X, Y, I); + if (!WillNotOverflow) + return nullptr; + + // add (ext X), (ext Y) --> ext (add X, Y) + // add (ext X), C --> ext (add X, C') + Value *NarrowAdd = Builder.CreateAdd(X, Y, "narrow", !IsSext, IsSext); + return CastInst::Create(CastOpc, NarrowAdd, I.getType()); +} + Instruction *InstCombiner::visitAdd(BinaryOperator &I) { if (Value *V = SimplifyAddInst(I.getOperand(0), I.getOperand(1), I.hasNoSignedWrap(), I.hasNoUnsignedWrap(), @@ -1191,77 +1232,8 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { } } - // Check for (add (sext x), y), see if we can merge this into an - // integer add followed by a sext. - if (SExtInst *LHSConv = dyn_cast<SExtInst>(LHS)) { - // (add (sext x), cst) --> (sext (add x, cst')) - if (auto *RHSC = dyn_cast<Constant>(RHS)) { - if (LHSConv->hasOneUse()) { - Constant *CI = - ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType()); - if (ConstantExpr::getSExt(CI, Ty) == RHSC && - willNotOverflowSignedAdd(LHSConv->getOperand(0), CI, I)) { - // Insert the new, smaller add. - Value *NewAdd = - Builder.CreateNSWAdd(LHSConv->getOperand(0), CI, "addconv"); - return new SExtInst(NewAdd, Ty); - } - } - } - - // (add (sext x), (sext y)) --> (sext (add int x, y)) - if (SExtInst *RHSConv = dyn_cast<SExtInst>(RHS)) { - // Only do this if x/y have the same type, if at least one of them has a - // single use (so we don't increase the number of sexts), and if the - // integer add will not overflow. - if (LHSConv->getOperand(0)->getType() == - RHSConv->getOperand(0)->getType() && - (LHSConv->hasOneUse() || RHSConv->hasOneUse()) && - willNotOverflowSignedAdd(LHSConv->getOperand(0), - RHSConv->getOperand(0), I)) { - // Insert the new integer add. - Value *NewAdd = Builder.CreateNSWAdd(LHSConv->getOperand(0), - RHSConv->getOperand(0), "addconv"); - return new SExtInst(NewAdd, Ty); - } - } - } - - // Check for (add (zext x), y), see if we can merge this into an - // integer add followed by a zext. - if (auto *LHSConv = dyn_cast<ZExtInst>(LHS)) { - // (add (zext x), cst) --> (zext (add x, cst')) - if (auto *RHSC = dyn_cast<Constant>(RHS)) { - if (LHSConv->hasOneUse()) { - Constant *CI = - ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType()); - if (ConstantExpr::getZExt(CI, Ty) == RHSC && - willNotOverflowUnsignedAdd(LHSConv->getOperand(0), CI, I)) { - // Insert the new, smaller add. - Value *NewAdd = - Builder.CreateNUWAdd(LHSConv->getOperand(0), CI, "addconv"); - return new ZExtInst(NewAdd, Ty); - } - } - } - - // (add (zext x), (zext y)) --> (zext (add int x, y)) - if (auto *RHSConv = dyn_cast<ZExtInst>(RHS)) { - // Only do this if x/y have the same type, if at least one of them has a - // single use (so we don't increase the number of zexts), and if the - // integer add will not overflow. - if (LHSConv->getOperand(0)->getType() == - RHSConv->getOperand(0)->getType() && - (LHSConv->hasOneUse() || RHSConv->hasOneUse()) && - willNotOverflowUnsignedAdd(LHSConv->getOperand(0), - RHSConv->getOperand(0), I)) { - // Insert the new integer add. - Value *NewAdd = Builder.CreateNUWAdd( - LHSConv->getOperand(0), RHSConv->getOperand(0), "addconv"); - return new ZExtInst(NewAdd, Ty); - } - } - } + if (Instruction *Ext = narrowAddIfNoOverflow(I)) + return Ext; // (add (xor A, B) (and A, B)) --> (or A, B) // (add (and A, B) (xor A, B)) --> (or A, B) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index 4d9a53c33f6..228a61672f7 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -526,6 +526,7 @@ private: Instruction *foldCastedBitwiseLogic(BinaryOperator &I); Instruction *narrowBinOp(TruncInst &Trunc); Instruction *narrowMaskedBinOp(BinaryOperator &And); + Instruction *narrowAddIfNoOverflow(BinaryOperator &I); Instruction *narrowRotate(TruncInst &Trunc); Instruction *optimizeBitCastFromPhi(CastInst &CI, PHINode *PN); diff --git a/llvm/test/Transforms/InstCombine/narrow-math.ll b/llvm/test/Transforms/InstCombine/narrow-math.ll index d0b4dbb3d13..5badaa61b72 100644 --- a/llvm/test/Transforms/InstCombine/narrow-math.ll +++ b/llvm/test/Transforms/InstCombine/narrow-math.ll @@ -9,8 +9,8 @@ define i64 @sext_sext_add(i32 %A) { ; CHECK-LABEL: @sext_sext_add( ; CHECK-NEXT: [[B:%.*]] = ashr i32 [[A:%.*]], 7 ; CHECK-NEXT: [[C:%.*]] = ashr i32 [[A]], 9 -; CHECK-NEXT: [[ADDCONV:%.*]] = add nsw i32 [[B]], [[C]] -; CHECK-NEXT: [[F:%.*]] = sext i32 [[ADDCONV]] to i64 +; CHECK-NEXT: [[NARROW:%.*]] = add nsw i32 [[B]], [[C]] +; CHECK-NEXT: [[F:%.*]] = sext i32 [[NARROW]] to i64 ; CHECK-NEXT: ret i64 [[F]] ; %B = ashr i32 %A, 7 @@ -65,8 +65,8 @@ define i64 @sext_sext_add_extra_use1(i32 %A) { ; CHECK-NEXT: [[C:%.*]] = ashr i32 [[A]], 9 ; CHECK-NEXT: [[D:%.*]] = sext i32 [[B]] to i64 ; CHECK-NEXT: call void @use(i64 [[D]]) -; CHECK-NEXT: [[ADDCONV:%.*]] = add nsw i32 [[B]], [[C]] -; CHECK-NEXT: [[F:%.*]] = sext i32 [[ADDCONV]] to i64 +; CHECK-NEXT: [[NARROW:%.*]] = add nsw i32 [[B]], [[C]] +; CHECK-NEXT: [[F:%.*]] = sext i32 [[NARROW]] to i64 ; CHECK-NEXT: ret i64 [[F]] ; %B = ashr i32 %A, 7 @@ -84,8 +84,8 @@ define i64 @sext_sext_add_extra_use2(i32 %A) { ; CHECK-NEXT: [[C:%.*]] = ashr i32 [[A]], 9 ; CHECK-NEXT: [[E:%.*]] = sext i32 [[C]] to i64 ; CHECK-NEXT: call void @use(i64 [[E]]) -; CHECK-NEXT: [[ADDCONV:%.*]] = add nsw i32 [[B]], [[C]] -; CHECK-NEXT: [[F:%.*]] = sext i32 [[ADDCONV]] to i64 +; CHECK-NEXT: [[NARROW:%.*]] = add nsw i32 [[B]], [[C]] +; CHECK-NEXT: [[F:%.*]] = sext i32 [[NARROW]] to i64 ; CHECK-NEXT: ret i64 [[F]] ; %B = ashr i32 %A, 7 @@ -124,8 +124,8 @@ define i64 @test1(i32 %V) { ; CHECK-LABEL: @test1( ; CHECK-NEXT: [[CALL1:%.*]] = call i32 @callee(), !range !0 ; CHECK-NEXT: [[CALL2:%.*]] = call i32 @callee(), !range !0 -; CHECK-NEXT: [[ADDCONV:%.*]] = add nuw nsw i32 [[CALL1]], [[CALL2]] -; CHECK-NEXT: [[ADD:%.*]] = zext i32 [[ADDCONV]] to i64 +; CHECK-NEXT: [[NARROW:%.*]] = add nuw nsw i32 [[CALL1]], [[CALL2]] +; CHECK-NEXT: [[ADD:%.*]] = zext i32 [[NARROW]] to i64 ; CHECK-NEXT: ret i64 [[ADD]] ; %call1 = call i32 @callee(), !range !0 @@ -185,8 +185,8 @@ define i64 @test4(i32 %V) { define i64 @test5(i32 %V) { ; CHECK-LABEL: @test5( ; CHECK-NEXT: [[ASHR:%.*]] = ashr i32 [[V:%.*]], 1 -; CHECK-NEXT: [[ADDCONV:%.*]] = add nsw i32 [[ASHR]], 1073741823 -; CHECK-NEXT: [[ADD:%.*]] = sext i32 [[ADDCONV]] to i64 +; CHECK-NEXT: [[NARROW:%.*]] = add nsw i32 [[ASHR]], 1073741823 +; CHECK-NEXT: [[ADD:%.*]] = sext i32 [[NARROW]] to i64 ; CHECK-NEXT: ret i64 [[ADD]] ; %ashr = ashr i32 %V, 1 @@ -215,8 +215,8 @@ define i64 @sext_add_constant_extra_use(i32 %V) { define <2 x i64> @test5_splat(<2 x i32> %V) { ; CHECK-LABEL: @test5_splat( ; CHECK-NEXT: [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], <i32 1, i32 1> -; CHECK-NEXT: [[ADDCONV:%.*]] = add nsw <2 x i32> [[ASHR]], <i32 1073741823, i32 1073741823> -; CHECK-NEXT: [[ADD:%.*]] = sext <2 x i32> [[ADDCONV]] to <2 x i64> +; CHECK-NEXT: [[NARROW:%.*]] = add nsw <2 x i32> [[ASHR]], <i32 1073741823, i32 1073741823> +; CHECK-NEXT: [[ADD:%.*]] = sext <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[ADD]] ; %ashr = ashr <2 x i32> %V, <i32 1, i32 1> @@ -228,8 +228,8 @@ define <2 x i64> @test5_splat(<2 x i32> %V) { define <2 x i64> @test5_vec(<2 x i32> %V) { ; CHECK-LABEL: @test5_vec( ; CHECK-NEXT: [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], <i32 1, i32 1> -; CHECK-NEXT: [[ADDCONV:%.*]] = add nsw <2 x i32> [[ASHR]], <i32 1, i32 2> -; CHECK-NEXT: [[ADD:%.*]] = sext <2 x i32> [[ADDCONV]] to <2 x i64> +; CHECK-NEXT: [[NARROW:%.*]] = add nsw <2 x i32> [[ASHR]], <i32 1, i32 2> +; CHECK-NEXT: [[ADD:%.*]] = sext <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[ADD]] ; %ashr = ashr <2 x i32> %V, <i32 1, i32 1> @@ -241,8 +241,8 @@ define <2 x i64> @test5_vec(<2 x i32> %V) { define i64 @test6(i32 %V) { ; CHECK-LABEL: @test6( ; CHECK-NEXT: [[ASHR:%.*]] = ashr i32 [[V:%.*]], 1 -; CHECK-NEXT: [[ADDCONV:%.*]] = add nsw i32 [[ASHR]], -1073741824 -; CHECK-NEXT: [[ADD:%.*]] = sext i32 [[ADDCONV]] to i64 +; CHECK-NEXT: [[NARROW:%.*]] = add nsw i32 [[ASHR]], -1073741824 +; CHECK-NEXT: [[ADD:%.*]] = sext i32 [[NARROW]] to i64 ; CHECK-NEXT: ret i64 [[ADD]] ; %ashr = ashr i32 %V, 1 @@ -254,8 +254,8 @@ define i64 @test6(i32 %V) { define <2 x i64> @test6_splat(<2 x i32> %V) { ; CHECK-LABEL: @test6_splat( ; CHECK-NEXT: [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], <i32 1, i32 1> -; CHECK-NEXT: [[ADDCONV:%.*]] = add nsw <2 x i32> [[ASHR]], <i32 -1073741824, i32 -1073741824> -; CHECK-NEXT: [[ADD:%.*]] = sext <2 x i32> [[ADDCONV]] to <2 x i64> +; CHECK-NEXT: [[NARROW:%.*]] = add nsw <2 x i32> [[ASHR]], <i32 -1073741824, i32 -1073741824> +; CHECK-NEXT: [[ADD:%.*]] = sext <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[ADD]] ; %ashr = ashr <2 x i32> %V, <i32 1, i32 1> @@ -267,8 +267,8 @@ define <2 x i64> @test6_splat(<2 x i32> %V) { define <2 x i64> @test6_vec(<2 x i32> %V) { ; CHECK-LABEL: @test6_vec( ; CHECK-NEXT: [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], <i32 1, i32 1> -; CHECK-NEXT: [[ADDCONV:%.*]] = add nsw <2 x i32> [[ASHR]], <i32 -1, i32 -2> -; CHECK-NEXT: [[ADD:%.*]] = sext <2 x i32> [[ADDCONV]] to <2 x i64> +; CHECK-NEXT: [[NARROW:%.*]] = add nsw <2 x i32> [[ASHR]], <i32 -1, i32 -2> +; CHECK-NEXT: [[ADD:%.*]] = sext <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[ADD]] ; %ashr = ashr <2 x i32> %V, <i32 1, i32 1> @@ -280,8 +280,8 @@ define <2 x i64> @test6_vec(<2 x i32> %V) { define <2 x i64> @test6_vec2(<2 x i32> %V) { ; CHECK-LABEL: @test6_vec2( ; CHECK-NEXT: [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], <i32 1, i32 1> -; CHECK-NEXT: [[ADDCONV:%.*]] = add nsw <2 x i32> [[ASHR]], <i32 -1, i32 1> -; CHECK-NEXT: [[ADD:%.*]] = sext <2 x i32> [[ADDCONV]] to <2 x i64> +; CHECK-NEXT: [[NARROW:%.*]] = add nsw <2 x i32> [[ASHR]], <i32 -1, i32 1> +; CHECK-NEXT: [[ADD:%.*]] = sext <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[ADD]] ; %ashr = ashr <2 x i32> %V, <i32 1, i32 1> @@ -293,8 +293,8 @@ define <2 x i64> @test6_vec2(<2 x i32> %V) { define i64 @test7(i32 %V) { ; CHECK-LABEL: @test7( ; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 [[V:%.*]], 1 -; CHECK-NEXT: [[ADDCONV:%.*]] = add nuw i32 [[LSHR]], 2147483647 -; CHECK-NEXT: [[ADD:%.*]] = zext i32 [[ADDCONV]] to i64 +; CHECK-NEXT: [[NARROW:%.*]] = add nuw i32 [[LSHR]], 2147483647 +; CHECK-NEXT: [[ADD:%.*]] = zext i32 [[NARROW]] to i64 ; CHECK-NEXT: ret i64 [[ADD]] ; %lshr = lshr i32 %V, 1 @@ -306,8 +306,8 @@ define i64 @test7(i32 %V) { define <2 x i64> @test7_splat(<2 x i32> %V) { ; CHECK-LABEL: @test7_splat( ; CHECK-NEXT: [[LSHR:%.*]] = lshr <2 x i32> [[V:%.*]], <i32 1, i32 1> -; CHECK-NEXT: [[ADDCONV:%.*]] = add nuw <2 x i32> [[LSHR]], <i32 2147483647, i32 2147483647> -; CHECK-NEXT: [[ADD:%.*]] = zext <2 x i32> [[ADDCONV]] to <2 x i64> +; CHECK-NEXT: [[NARROW:%.*]] = add nuw <2 x i32> [[LSHR]], <i32 2147483647, i32 2147483647> +; CHECK-NEXT: [[ADD:%.*]] = zext <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[ADD]] ; %lshr = lshr <2 x i32> %V, <i32 1, i32 1> @@ -319,8 +319,8 @@ define <2 x i64> @test7_splat(<2 x i32> %V) { define <2 x i64> @test7_vec(<2 x i32> %V) { ; CHECK-LABEL: @test7_vec( ; CHECK-NEXT: [[LSHR:%.*]] = lshr <2 x i32> [[V:%.*]], <i32 1, i32 1> -; CHECK-NEXT: [[ADDCONV:%.*]] = add nuw <2 x i32> [[LSHR]], <i32 1, i32 2> -; CHECK-NEXT: [[ADD:%.*]] = zext <2 x i32> [[ADDCONV]] to <2 x i64> +; CHECK-NEXT: [[NARROW:%.*]] = add nuw <2 x i32> [[LSHR]], <i32 1, i32 2> +; CHECK-NEXT: [[ADD:%.*]] = zext <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[ADD]] ; %lshr = lshr <2 x i32> %V, <i32 1, i32 1> @@ -463,8 +463,8 @@ define i64 @test11(i32 %V) { ; CHECK-LABEL: @test11( ; CHECK-NEXT: [[CALL1:%.*]] = call i32 @callee(), !range !1 ; CHECK-NEXT: [[CALL2:%.*]] = call i32 @callee(), !range !1 -; CHECK-NEXT: [[ADDCONV:%.*]] = add nsw i32 [[CALL1]], [[CALL2]] -; CHECK-NEXT: [[ADD:%.*]] = sext i32 [[ADDCONV]] to i64 +; CHECK-NEXT: [[NARROW:%.*]] = add nsw i32 [[CALL1]], [[CALL2]] +; CHECK-NEXT: [[ADD:%.*]] = sext i32 [[NARROW]] to i64 ; CHECK-NEXT: ret i64 [[ADD]] ; %call1 = call i32 @callee(), !range !1 |

