summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp114
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineInternal.h1
-rw-r--r--llvm/test/Transforms/InstCombine/narrow-math.ll60
3 files changed, 74 insertions, 101 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 73328666c09..c0fd14b2644 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -1032,6 +1032,47 @@ static Instruction *canonicalizeLowbitMask(BinaryOperator &I,
return BinaryOperator::CreateNot(NotMask, I.getName());
}
+/// Try to narrow the width of an 'add' if at least 1 operand is an extend of
+/// of a value. This requires a potentially expensive known bits check to make
+/// sure the narrow op does not overflow.
+Instruction *InstCombiner::narrowAddIfNoOverflow(BinaryOperator &I) {
+ // We need at least one extended operand.
+ Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+ Value *X;
+ bool IsSext = match(LHS, m_SExt(m_Value(X)));
+ if (!IsSext && !match(LHS, m_ZExt(m_Value(X))))
+ return nullptr;
+
+ // If both operands are the same extension from the same source type and we
+ // can eliminate at least one (hasOneUse), this might work.
+ CastInst::CastOps CastOpc = IsSext ? Instruction::SExt : Instruction::ZExt;
+ Value *Y;
+ if (!(match(RHS, m_ZExtOrSExt(m_Value(Y))) && X->getType() == Y->getType() &&
+ cast<Operator>(RHS)->getOpcode() == CastOpc &&
+ (LHS->hasOneUse() || RHS->hasOneUse()))) {
+ // If that did not match, see if the RHS is a constant. Truncating and
+ // extending must produce the same constant.
+ Constant *WideC;
+ if (!LHS->hasOneUse() || !match(RHS, m_Constant(WideC)))
+ return nullptr;
+ Constant *NarrowC = ConstantExpr::getTrunc(WideC, X->getType());
+ if (ConstantExpr::getCast(CastOpc, NarrowC, I.getType()) != WideC)
+ return nullptr;
+ Y = NarrowC;
+ }
+ // Both operands have narrow versions. Last step: the math must not overflow
+ // in the narrow width.
+ bool WillNotOverflow = IsSext ? willNotOverflowSignedAdd(X, Y, I)
+ : willNotOverflowUnsignedAdd(X, Y, I);
+ if (!WillNotOverflow)
+ return nullptr;
+
+ // add (ext X), (ext Y) --> ext (add X, Y)
+ // add (ext X), C --> ext (add X, C')
+ Value *NarrowAdd = Builder.CreateAdd(X, Y, "narrow", !IsSext, IsSext);
+ return CastInst::Create(CastOpc, NarrowAdd, I.getType());
+}
+
Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
if (Value *V = SimplifyAddInst(I.getOperand(0), I.getOperand(1),
I.hasNoSignedWrap(), I.hasNoUnsignedWrap(),
@@ -1191,77 +1232,8 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
}
}
- // Check for (add (sext x), y), see if we can merge this into an
- // integer add followed by a sext.
- if (SExtInst *LHSConv = dyn_cast<SExtInst>(LHS)) {
- // (add (sext x), cst) --> (sext (add x, cst'))
- if (auto *RHSC = dyn_cast<Constant>(RHS)) {
- if (LHSConv->hasOneUse()) {
- Constant *CI =
- ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType());
- if (ConstantExpr::getSExt(CI, Ty) == RHSC &&
- willNotOverflowSignedAdd(LHSConv->getOperand(0), CI, I)) {
- // Insert the new, smaller add.
- Value *NewAdd =
- Builder.CreateNSWAdd(LHSConv->getOperand(0), CI, "addconv");
- return new SExtInst(NewAdd, Ty);
- }
- }
- }
-
- // (add (sext x), (sext y)) --> (sext (add int x, y))
- if (SExtInst *RHSConv = dyn_cast<SExtInst>(RHS)) {
- // Only do this if x/y have the same type, if at least one of them has a
- // single use (so we don't increase the number of sexts), and if the
- // integer add will not overflow.
- if (LHSConv->getOperand(0)->getType() ==
- RHSConv->getOperand(0)->getType() &&
- (LHSConv->hasOneUse() || RHSConv->hasOneUse()) &&
- willNotOverflowSignedAdd(LHSConv->getOperand(0),
- RHSConv->getOperand(0), I)) {
- // Insert the new integer add.
- Value *NewAdd = Builder.CreateNSWAdd(LHSConv->getOperand(0),
- RHSConv->getOperand(0), "addconv");
- return new SExtInst(NewAdd, Ty);
- }
- }
- }
-
- // Check for (add (zext x), y), see if we can merge this into an
- // integer add followed by a zext.
- if (auto *LHSConv = dyn_cast<ZExtInst>(LHS)) {
- // (add (zext x), cst) --> (zext (add x, cst'))
- if (auto *RHSC = dyn_cast<Constant>(RHS)) {
- if (LHSConv->hasOneUse()) {
- Constant *CI =
- ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType());
- if (ConstantExpr::getZExt(CI, Ty) == RHSC &&
- willNotOverflowUnsignedAdd(LHSConv->getOperand(0), CI, I)) {
- // Insert the new, smaller add.
- Value *NewAdd =
- Builder.CreateNUWAdd(LHSConv->getOperand(0), CI, "addconv");
- return new ZExtInst(NewAdd, Ty);
- }
- }
- }
-
- // (add (zext x), (zext y)) --> (zext (add int x, y))
- if (auto *RHSConv = dyn_cast<ZExtInst>(RHS)) {
- // Only do this if x/y have the same type, if at least one of them has a
- // single use (so we don't increase the number of zexts), and if the
- // integer add will not overflow.
- if (LHSConv->getOperand(0)->getType() ==
- RHSConv->getOperand(0)->getType() &&
- (LHSConv->hasOneUse() || RHSConv->hasOneUse()) &&
- willNotOverflowUnsignedAdd(LHSConv->getOperand(0),
- RHSConv->getOperand(0), I)) {
- // Insert the new integer add.
- Value *NewAdd = Builder.CreateNUWAdd(
- LHSConv->getOperand(0), RHSConv->getOperand(0), "addconv");
- return new ZExtInst(NewAdd, Ty);
- }
- }
- }
+ if (Instruction *Ext = narrowAddIfNoOverflow(I))
+ return Ext;
// (add (xor A, B) (and A, B)) --> (or A, B)
// (add (and A, B) (xor A, B)) --> (or A, B)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 4d9a53c33f6..228a61672f7 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -526,6 +526,7 @@ private:
Instruction *foldCastedBitwiseLogic(BinaryOperator &I);
Instruction *narrowBinOp(TruncInst &Trunc);
Instruction *narrowMaskedBinOp(BinaryOperator &And);
+ Instruction *narrowAddIfNoOverflow(BinaryOperator &I);
Instruction *narrowRotate(TruncInst &Trunc);
Instruction *optimizeBitCastFromPhi(CastInst &CI, PHINode *PN);
diff --git a/llvm/test/Transforms/InstCombine/narrow-math.ll b/llvm/test/Transforms/InstCombine/narrow-math.ll
index d0b4dbb3d13..5badaa61b72 100644
--- a/llvm/test/Transforms/InstCombine/narrow-math.ll
+++ b/llvm/test/Transforms/InstCombine/narrow-math.ll
@@ -9,8 +9,8 @@ define i64 @sext_sext_add(i32 %A) {
; CHECK-LABEL: @sext_sext_add(
; CHECK-NEXT: [[B:%.*]] = ashr i32 [[A:%.*]], 7
; CHECK-NEXT: [[C:%.*]] = ashr i32 [[A]], 9
-; CHECK-NEXT: [[ADDCONV:%.*]] = add nsw i32 [[B]], [[C]]
-; CHECK-NEXT: [[F:%.*]] = sext i32 [[ADDCONV]] to i64
+; CHECK-NEXT: [[NARROW:%.*]] = add nsw i32 [[B]], [[C]]
+; CHECK-NEXT: [[F:%.*]] = sext i32 [[NARROW]] to i64
; CHECK-NEXT: ret i64 [[F]]
;
%B = ashr i32 %A, 7
@@ -65,8 +65,8 @@ define i64 @sext_sext_add_extra_use1(i32 %A) {
; CHECK-NEXT: [[C:%.*]] = ashr i32 [[A]], 9
; CHECK-NEXT: [[D:%.*]] = sext i32 [[B]] to i64
; CHECK-NEXT: call void @use(i64 [[D]])
-; CHECK-NEXT: [[ADDCONV:%.*]] = add nsw i32 [[B]], [[C]]
-; CHECK-NEXT: [[F:%.*]] = sext i32 [[ADDCONV]] to i64
+; CHECK-NEXT: [[NARROW:%.*]] = add nsw i32 [[B]], [[C]]
+; CHECK-NEXT: [[F:%.*]] = sext i32 [[NARROW]] to i64
; CHECK-NEXT: ret i64 [[F]]
;
%B = ashr i32 %A, 7
@@ -84,8 +84,8 @@ define i64 @sext_sext_add_extra_use2(i32 %A) {
; CHECK-NEXT: [[C:%.*]] = ashr i32 [[A]], 9
; CHECK-NEXT: [[E:%.*]] = sext i32 [[C]] to i64
; CHECK-NEXT: call void @use(i64 [[E]])
-; CHECK-NEXT: [[ADDCONV:%.*]] = add nsw i32 [[B]], [[C]]
-; CHECK-NEXT: [[F:%.*]] = sext i32 [[ADDCONV]] to i64
+; CHECK-NEXT: [[NARROW:%.*]] = add nsw i32 [[B]], [[C]]
+; CHECK-NEXT: [[F:%.*]] = sext i32 [[NARROW]] to i64
; CHECK-NEXT: ret i64 [[F]]
;
%B = ashr i32 %A, 7
@@ -124,8 +124,8 @@ define i64 @test1(i32 %V) {
; CHECK-LABEL: @test1(
; CHECK-NEXT: [[CALL1:%.*]] = call i32 @callee(), !range !0
; CHECK-NEXT: [[CALL2:%.*]] = call i32 @callee(), !range !0
-; CHECK-NEXT: [[ADDCONV:%.*]] = add nuw nsw i32 [[CALL1]], [[CALL2]]
-; CHECK-NEXT: [[ADD:%.*]] = zext i32 [[ADDCONV]] to i64
+; CHECK-NEXT: [[NARROW:%.*]] = add nuw nsw i32 [[CALL1]], [[CALL2]]
+; CHECK-NEXT: [[ADD:%.*]] = zext i32 [[NARROW]] to i64
; CHECK-NEXT: ret i64 [[ADD]]
;
%call1 = call i32 @callee(), !range !0
@@ -185,8 +185,8 @@ define i64 @test4(i32 %V) {
define i64 @test5(i32 %V) {
; CHECK-LABEL: @test5(
; CHECK-NEXT: [[ASHR:%.*]] = ashr i32 [[V:%.*]], 1
-; CHECK-NEXT: [[ADDCONV:%.*]] = add nsw i32 [[ASHR]], 1073741823
-; CHECK-NEXT: [[ADD:%.*]] = sext i32 [[ADDCONV]] to i64
+; CHECK-NEXT: [[NARROW:%.*]] = add nsw i32 [[ASHR]], 1073741823
+; CHECK-NEXT: [[ADD:%.*]] = sext i32 [[NARROW]] to i64
; CHECK-NEXT: ret i64 [[ADD]]
;
%ashr = ashr i32 %V, 1
@@ -215,8 +215,8 @@ define i64 @sext_add_constant_extra_use(i32 %V) {
define <2 x i64> @test5_splat(<2 x i32> %V) {
; CHECK-LABEL: @test5_splat(
; CHECK-NEXT: [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], <i32 1, i32 1>
-; CHECK-NEXT: [[ADDCONV:%.*]] = add nsw <2 x i32> [[ASHR]], <i32 1073741823, i32 1073741823>
-; CHECK-NEXT: [[ADD:%.*]] = sext <2 x i32> [[ADDCONV]] to <2 x i64>
+; CHECK-NEXT: [[NARROW:%.*]] = add nsw <2 x i32> [[ASHR]], <i32 1073741823, i32 1073741823>
+; CHECK-NEXT: [[ADD:%.*]] = sext <2 x i32> [[NARROW]] to <2 x i64>
; CHECK-NEXT: ret <2 x i64> [[ADD]]
;
%ashr = ashr <2 x i32> %V, <i32 1, i32 1>
@@ -228,8 +228,8 @@ define <2 x i64> @test5_splat(<2 x i32> %V) {
define <2 x i64> @test5_vec(<2 x i32> %V) {
; CHECK-LABEL: @test5_vec(
; CHECK-NEXT: [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], <i32 1, i32 1>
-; CHECK-NEXT: [[ADDCONV:%.*]] = add nsw <2 x i32> [[ASHR]], <i32 1, i32 2>
-; CHECK-NEXT: [[ADD:%.*]] = sext <2 x i32> [[ADDCONV]] to <2 x i64>
+; CHECK-NEXT: [[NARROW:%.*]] = add nsw <2 x i32> [[ASHR]], <i32 1, i32 2>
+; CHECK-NEXT: [[ADD:%.*]] = sext <2 x i32> [[NARROW]] to <2 x i64>
; CHECK-NEXT: ret <2 x i64> [[ADD]]
;
%ashr = ashr <2 x i32> %V, <i32 1, i32 1>
@@ -241,8 +241,8 @@ define <2 x i64> @test5_vec(<2 x i32> %V) {
define i64 @test6(i32 %V) {
; CHECK-LABEL: @test6(
; CHECK-NEXT: [[ASHR:%.*]] = ashr i32 [[V:%.*]], 1
-; CHECK-NEXT: [[ADDCONV:%.*]] = add nsw i32 [[ASHR]], -1073741824
-; CHECK-NEXT: [[ADD:%.*]] = sext i32 [[ADDCONV]] to i64
+; CHECK-NEXT: [[NARROW:%.*]] = add nsw i32 [[ASHR]], -1073741824
+; CHECK-NEXT: [[ADD:%.*]] = sext i32 [[NARROW]] to i64
; CHECK-NEXT: ret i64 [[ADD]]
;
%ashr = ashr i32 %V, 1
@@ -254,8 +254,8 @@ define i64 @test6(i32 %V) {
define <2 x i64> @test6_splat(<2 x i32> %V) {
; CHECK-LABEL: @test6_splat(
; CHECK-NEXT: [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], <i32 1, i32 1>
-; CHECK-NEXT: [[ADDCONV:%.*]] = add nsw <2 x i32> [[ASHR]], <i32 -1073741824, i32 -1073741824>
-; CHECK-NEXT: [[ADD:%.*]] = sext <2 x i32> [[ADDCONV]] to <2 x i64>
+; CHECK-NEXT: [[NARROW:%.*]] = add nsw <2 x i32> [[ASHR]], <i32 -1073741824, i32 -1073741824>
+; CHECK-NEXT: [[ADD:%.*]] = sext <2 x i32> [[NARROW]] to <2 x i64>
; CHECK-NEXT: ret <2 x i64> [[ADD]]
;
%ashr = ashr <2 x i32> %V, <i32 1, i32 1>
@@ -267,8 +267,8 @@ define <2 x i64> @test6_splat(<2 x i32> %V) {
define <2 x i64> @test6_vec(<2 x i32> %V) {
; CHECK-LABEL: @test6_vec(
; CHECK-NEXT: [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], <i32 1, i32 1>
-; CHECK-NEXT: [[ADDCONV:%.*]] = add nsw <2 x i32> [[ASHR]], <i32 -1, i32 -2>
-; CHECK-NEXT: [[ADD:%.*]] = sext <2 x i32> [[ADDCONV]] to <2 x i64>
+; CHECK-NEXT: [[NARROW:%.*]] = add nsw <2 x i32> [[ASHR]], <i32 -1, i32 -2>
+; CHECK-NEXT: [[ADD:%.*]] = sext <2 x i32> [[NARROW]] to <2 x i64>
; CHECK-NEXT: ret <2 x i64> [[ADD]]
;
%ashr = ashr <2 x i32> %V, <i32 1, i32 1>
@@ -280,8 +280,8 @@ define <2 x i64> @test6_vec(<2 x i32> %V) {
define <2 x i64> @test6_vec2(<2 x i32> %V) {
; CHECK-LABEL: @test6_vec2(
; CHECK-NEXT: [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], <i32 1, i32 1>
-; CHECK-NEXT: [[ADDCONV:%.*]] = add nsw <2 x i32> [[ASHR]], <i32 -1, i32 1>
-; CHECK-NEXT: [[ADD:%.*]] = sext <2 x i32> [[ADDCONV]] to <2 x i64>
+; CHECK-NEXT: [[NARROW:%.*]] = add nsw <2 x i32> [[ASHR]], <i32 -1, i32 1>
+; CHECK-NEXT: [[ADD:%.*]] = sext <2 x i32> [[NARROW]] to <2 x i64>
; CHECK-NEXT: ret <2 x i64> [[ADD]]
;
%ashr = ashr <2 x i32> %V, <i32 1, i32 1>
@@ -293,8 +293,8 @@ define <2 x i64> @test6_vec2(<2 x i32> %V) {
define i64 @test7(i32 %V) {
; CHECK-LABEL: @test7(
; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 [[V:%.*]], 1
-; CHECK-NEXT: [[ADDCONV:%.*]] = add nuw i32 [[LSHR]], 2147483647
-; CHECK-NEXT: [[ADD:%.*]] = zext i32 [[ADDCONV]] to i64
+; CHECK-NEXT: [[NARROW:%.*]] = add nuw i32 [[LSHR]], 2147483647
+; CHECK-NEXT: [[ADD:%.*]] = zext i32 [[NARROW]] to i64
; CHECK-NEXT: ret i64 [[ADD]]
;
%lshr = lshr i32 %V, 1
@@ -306,8 +306,8 @@ define i64 @test7(i32 %V) {
define <2 x i64> @test7_splat(<2 x i32> %V) {
; CHECK-LABEL: @test7_splat(
; CHECK-NEXT: [[LSHR:%.*]] = lshr <2 x i32> [[V:%.*]], <i32 1, i32 1>
-; CHECK-NEXT: [[ADDCONV:%.*]] = add nuw <2 x i32> [[LSHR]], <i32 2147483647, i32 2147483647>
-; CHECK-NEXT: [[ADD:%.*]] = zext <2 x i32> [[ADDCONV]] to <2 x i64>
+; CHECK-NEXT: [[NARROW:%.*]] = add nuw <2 x i32> [[LSHR]], <i32 2147483647, i32 2147483647>
+; CHECK-NEXT: [[ADD:%.*]] = zext <2 x i32> [[NARROW]] to <2 x i64>
; CHECK-NEXT: ret <2 x i64> [[ADD]]
;
%lshr = lshr <2 x i32> %V, <i32 1, i32 1>
@@ -319,8 +319,8 @@ define <2 x i64> @test7_splat(<2 x i32> %V) {
define <2 x i64> @test7_vec(<2 x i32> %V) {
; CHECK-LABEL: @test7_vec(
; CHECK-NEXT: [[LSHR:%.*]] = lshr <2 x i32> [[V:%.*]], <i32 1, i32 1>
-; CHECK-NEXT: [[ADDCONV:%.*]] = add nuw <2 x i32> [[LSHR]], <i32 1, i32 2>
-; CHECK-NEXT: [[ADD:%.*]] = zext <2 x i32> [[ADDCONV]] to <2 x i64>
+; CHECK-NEXT: [[NARROW:%.*]] = add nuw <2 x i32> [[LSHR]], <i32 1, i32 2>
+; CHECK-NEXT: [[ADD:%.*]] = zext <2 x i32> [[NARROW]] to <2 x i64>
; CHECK-NEXT: ret <2 x i64> [[ADD]]
;
%lshr = lshr <2 x i32> %V, <i32 1, i32 1>
@@ -463,8 +463,8 @@ define i64 @test11(i32 %V) {
; CHECK-LABEL: @test11(
; CHECK-NEXT: [[CALL1:%.*]] = call i32 @callee(), !range !1
; CHECK-NEXT: [[CALL2:%.*]] = call i32 @callee(), !range !1
-; CHECK-NEXT: [[ADDCONV:%.*]] = add nsw i32 [[CALL1]], [[CALL2]]
-; CHECK-NEXT: [[ADD:%.*]] = sext i32 [[ADDCONV]] to i64
+; CHECK-NEXT: [[NARROW:%.*]] = add nsw i32 [[CALL1]], [[CALL2]]
+; CHECK-NEXT: [[ADD:%.*]] = sext i32 [[NARROW]] to i64
; CHECK-NEXT: ret i64 [[ADD]]
;
%call1 = call i32 @callee(), !range !1
OpenPOWER on IntegriCloud