summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp59
-rw-r--r--llvm/test/Transforms/InstCombine/overflow_to_sat.ll63
2 files changed, 67 insertions, 55 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 8446183cfa1..104835bdfd8 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -1734,6 +1734,8 @@ static Instruction *foldAddSubSelect(SelectInst &SI,
/// Turn X + Y overflows ? -1 : X + Y -> uadd_sat X, Y
/// And X - Y overflows ? 0 : X - Y -> usub_sat X, Y
+/// Along with a number of patterns similar to:
+/// X + Y overflows ? (X < 0 ? INTMIN : INTMAX) : X + Y --> sadd_sat X, Y
static Instruction *
foldOverflowingAddSubSelect(SelectInst &SI, InstCombiner::BuilderTy &Builder) {
Value *CondVal = SI.getCondition();
@@ -1745,6 +1747,50 @@ foldOverflowingAddSubSelect(SelectInst &SI, InstCombiner::BuilderTy &Builder) {
!match(FalseVal, m_ExtractValue<0>(m_Specific(II))))
return nullptr;
+ Value *X = II->getLHS();
+ Value *Y = II->getRHS();
+
+ auto IsSignedSaturateLimit = [&](Value *Limit) {
+ Type *Ty = Limit->getType();
+
+ ICmpInst::Predicate Pred;
+ Value *TrueVal, *FalseVal, *Op;
+ const APInt *C;
+ if (!match(Limit, m_Select(m_ICmp(Pred, m_Value(Op), m_APInt(C)),
+ m_Value(TrueVal), m_Value(FalseVal))))
+ return false;
+
+ auto IsZeroOrOne = [](const APInt &C) {
+ return C.isNullValue() || C.isOneValue();
+ };
+ auto IsMinMax = [&](Value *Min, Value *Max) {
+ APInt MinVal = APInt::getSignedMinValue(Ty->getScalarSizeInBits());
+ APInt MaxVal = APInt::getSignedMaxValue(Ty->getScalarSizeInBits());
+ return match(Min, m_SpecificInt(MinVal)) &&
+ match(Max, m_SpecificInt(MaxVal));
+ };
+
+ if (Op != X && Op != Y)
+ return false;
+
+ // X + Y overflows ? (X <s 0 ? INTMIN : INTMAX) : X + Y --> sadd_sat X, Y
+ // X + Y overflows ? (X <s 1 ? INTMIN : INTMAX) : X + Y --> sadd_sat X, Y
+ // X + Y overflows ? (Y <s 0 ? INTMIN : INTMAX) : X + Y --> sadd_sat X, Y
+ // X + Y overflows ? (Y <s 1 ? INTMIN : INTMAX) : X + Y --> sadd_sat X, Y
+ if (Pred == ICmpInst::ICMP_SLT && IsZeroOrOne(*C) &&
+ IsMinMax(TrueVal, FalseVal))
+ return true;
+ // X + Y overflows ? (X >s 0 ? INTMAX : INTMIN) : X + Y --> sadd_sat X, Y
+ // X + Y overflows ? (X >s -1 ? INTMAX : INTMIN) : X + Y --> sadd_sat X, Y
+ // X + Y overflows ? (Y >s 0 ? INTMAX : INTMIN) : X + Y --> sadd_sat X, Y
+ // X + Y overflows ? (Y >s -1 ? INTMAX : INTMIN) : X + Y --> sadd_sat X, Y
+ if (Pred == ICmpInst::ICMP_SGT && IsZeroOrOne(*C + 1) &&
+ IsMinMax(FalseVal, TrueVal))
+ return true;
+
+ return false;
+ };
+
Intrinsic::ID NewIntrinsicID;
if (II->getIntrinsicID() == Intrinsic::uadd_with_overflow &&
match(TrueVal, m_AllOnes()))
@@ -1754,12 +1800,23 @@ foldOverflowingAddSubSelect(SelectInst &SI, InstCombiner::BuilderTy &Builder) {
match(TrueVal, m_Zero()))
// X - Y overflows ? 0 : X - Y -> usub_sat X, Y
NewIntrinsicID = Intrinsic::usub_sat;
+ else if (II->getIntrinsicID() == Intrinsic::sadd_with_overflow &&
+ IsSignedSaturateLimit(TrueVal))
+ // X + Y overflows ? (X <s 0 ? INTMIN : INTMAX) : X + Y --> sadd_sat X, Y
+ // X + Y overflows ? (X <s 1 ? INTMIN : INTMAX) : X + Y --> sadd_sat X, Y
+ // X + Y overflows ? (X >s 0 ? INTMAX : INTMIN) : X + Y --> sadd_sat X, Y
+ // X + Y overflows ? (X >s -1 ? INTMAX : INTMIN) : X + Y --> sadd_sat X, Y
+ // X + Y overflows ? (Y <s 0 ? INTMIN : INTMAX) : X + Y --> sadd_sat X, Y
+ // X + Y overflows ? (Y <s 1 ? INTMIN : INTMAX) : X + Y --> sadd_sat X, Y
+ // X + Y overflows ? (Y >s 0 ? INTMAX : INTMIN) : X + Y --> sadd_sat X, Y
+ // X + Y overflows ? (Y >s -1 ? INTMAX : INTMIN) : X + Y --> sadd_sat X, Y
+ NewIntrinsicID = Intrinsic::sadd_sat;
else
return nullptr;
Function *F =
Intrinsic::getDeclaration(SI.getModule(), NewIntrinsicID, SI.getType());
- return CallInst::Create(F, {II->getArgOperand(0), II->getArgOperand(1)});
+ return CallInst::Create(F, {X, Y});
}
Instruction *InstCombiner::foldSelectExtConst(SelectInst &Sel) {
diff --git a/llvm/test/Transforms/InstCombine/overflow_to_sat.ll b/llvm/test/Transforms/InstCombine/overflow_to_sat.ll
index 12ee430ecfc..474c8fe6b3c 100644
--- a/llvm/test/Transforms/InstCombine/overflow_to_sat.ll
+++ b/llvm/test/Transforms/InstCombine/overflow_to_sat.ll
@@ -47,12 +47,7 @@ define i8 @sadd_x_lt_min(i8 %x, i8 %y) {
define i8 @sadd_x_lt_max(i8 %x, i8 %y) {
; CHECK-LABEL: @sadd_x_lt_max(
-; CHECK-NEXT: [[AO:%.*]] = tail call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 [[X:%.*]], i8 [[Y:%.*]])
-; CHECK-NEXT: [[O:%.*]] = extractvalue { i8, i1 } [[AO]], 1
-; CHECK-NEXT: [[A:%.*]] = extractvalue { i8, i1 } [[AO]], 0
-; CHECK-NEXT: [[C:%.*]] = icmp slt i8 [[X]], 0
-; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i8 -128, i8 127
-; CHECK-NEXT: [[R:%.*]] = select i1 [[O]], i8 [[S]], i8 [[A]]
+; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[X:%.*]], i8 [[Y:%.*]])
; CHECK-NEXT: ret i8 [[R]]
;
%ao = tail call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 %x, i8 %y)
@@ -85,12 +80,7 @@ define i8 @sadd_x_le_min(i8 %x, i8 %y) {
define i8 @sadd_x_le_max(i8 %x, i8 %y) {
; CHECK-LABEL: @sadd_x_le_max(
-; CHECK-NEXT: [[AO:%.*]] = tail call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 [[X:%.*]], i8 [[Y:%.*]])
-; CHECK-NEXT: [[O:%.*]] = extractvalue { i8, i1 } [[AO]], 1
-; CHECK-NEXT: [[A:%.*]] = extractvalue { i8, i1 } [[AO]], 0
-; CHECK-NEXT: [[C:%.*]] = icmp slt i8 [[X]], 1
-; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i8 -128, i8 127
-; CHECK-NEXT: [[R:%.*]] = select i1 [[O]], i8 [[S]], i8 [[A]]
+; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[X:%.*]], i8 [[Y:%.*]])
; CHECK-NEXT: ret i8 [[R]]
;
%ao = tail call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 %x, i8 %y)
@@ -104,12 +94,7 @@ define i8 @sadd_x_le_max(i8 %x, i8 %y) {
define i8 @sadd_x_gt_min(i8 %x, i8 %y) {
; CHECK-LABEL: @sadd_x_gt_min(
-; CHECK-NEXT: [[AO:%.*]] = tail call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 [[X:%.*]], i8 [[Y:%.*]])
-; CHECK-NEXT: [[O:%.*]] = extractvalue { i8, i1 } [[AO]], 1
-; CHECK-NEXT: [[A:%.*]] = extractvalue { i8, i1 } [[AO]], 0
-; CHECK-NEXT: [[C:%.*]] = icmp sgt i8 [[X]], 0
-; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i8 127, i8 -128
-; CHECK-NEXT: [[R:%.*]] = select i1 [[O]], i8 [[S]], i8 [[A]]
+; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[X:%.*]], i8 [[Y:%.*]])
; CHECK-NEXT: ret i8 [[R]]
;
%ao = tail call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 %x, i8 %y)
@@ -142,12 +127,7 @@ define i8 @sadd_x_gt_max(i8 %x, i8 %y) {
define i8 @sadd_x_ge_min(i8 %x, i8 %y) {
; CHECK-LABEL: @sadd_x_ge_min(
-; CHECK-NEXT: [[AO:%.*]] = tail call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 [[X:%.*]], i8 [[Y:%.*]])
-; CHECK-NEXT: [[O:%.*]] = extractvalue { i8, i1 } [[AO]], 1
-; CHECK-NEXT: [[A:%.*]] = extractvalue { i8, i1 } [[AO]], 0
-; CHECK-NEXT: [[C:%.*]] = icmp sgt i8 [[X]], -1
-; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i8 127, i8 -128
-; CHECK-NEXT: [[R:%.*]] = select i1 [[O]], i8 [[S]], i8 [[A]]
+; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[X:%.*]], i8 [[Y:%.*]])
; CHECK-NEXT: ret i8 [[R]]
;
%ao = tail call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 %x, i8 %y)
@@ -200,12 +180,7 @@ define i8 @sadd_y_lt_min(i8 %x, i8 %y) {
define i8 @sadd_y_lt_max(i8 %x, i8 %y) {
; CHECK-LABEL: @sadd_y_lt_max(
-; CHECK-NEXT: [[AO:%.*]] = tail call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 [[X:%.*]], i8 [[Y:%.*]])
-; CHECK-NEXT: [[O:%.*]] = extractvalue { i8, i1 } [[AO]], 1
-; CHECK-NEXT: [[A:%.*]] = extractvalue { i8, i1 } [[AO]], 0
-; CHECK-NEXT: [[C:%.*]] = icmp slt i8 [[Y]], 0
-; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i8 -128, i8 127
-; CHECK-NEXT: [[R:%.*]] = select i1 [[O]], i8 [[S]], i8 [[A]]
+; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[X:%.*]], i8 [[Y:%.*]])
; CHECK-NEXT: ret i8 [[R]]
;
%ao = tail call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 %x, i8 %y)
@@ -238,12 +213,7 @@ define i8 @sadd_y_le_min(i8 %x, i8 %y) {
define i8 @sadd_y_le_max(i8 %x, i8 %y) {
; CHECK-LABEL: @sadd_y_le_max(
-; CHECK-NEXT: [[AO:%.*]] = tail call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 [[X:%.*]], i8 [[Y:%.*]])
-; CHECK-NEXT: [[O:%.*]] = extractvalue { i8, i1 } [[AO]], 1
-; CHECK-NEXT: [[A:%.*]] = extractvalue { i8, i1 } [[AO]], 0
-; CHECK-NEXT: [[C:%.*]] = icmp slt i8 [[Y]], 1
-; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i8 -128, i8 127
-; CHECK-NEXT: [[R:%.*]] = select i1 [[O]], i8 [[S]], i8 [[A]]
+; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[X:%.*]], i8 [[Y:%.*]])
; CHECK-NEXT: ret i8 [[R]]
;
%ao = tail call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 %x, i8 %y)
@@ -257,12 +227,7 @@ define i8 @sadd_y_le_max(i8 %x, i8 %y) {
define i8 @sadd_y_gt_min(i8 %x, i8 %y) {
; CHECK-LABEL: @sadd_y_gt_min(
-; CHECK-NEXT: [[AO:%.*]] = tail call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 [[X:%.*]], i8 [[Y:%.*]])
-; CHECK-NEXT: [[O:%.*]] = extractvalue { i8, i1 } [[AO]], 1
-; CHECK-NEXT: [[A:%.*]] = extractvalue { i8, i1 } [[AO]], 0
-; CHECK-NEXT: [[C:%.*]] = icmp sgt i8 [[Y]], 0
-; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i8 127, i8 -128
-; CHECK-NEXT: [[R:%.*]] = select i1 [[O]], i8 [[S]], i8 [[A]]
+; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[X:%.*]], i8 [[Y:%.*]])
; CHECK-NEXT: ret i8 [[R]]
;
%ao = tail call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 %x, i8 %y)
@@ -295,12 +260,7 @@ define i8 @sadd_y_gt_max(i8 %x, i8 %y) {
define i8 @sadd_y_ge_min(i8 %x, i8 %y) {
; CHECK-LABEL: @sadd_y_ge_min(
-; CHECK-NEXT: [[AO:%.*]] = tail call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 [[X:%.*]], i8 [[Y:%.*]])
-; CHECK-NEXT: [[O:%.*]] = extractvalue { i8, i1 } [[AO]], 1
-; CHECK-NEXT: [[A:%.*]] = extractvalue { i8, i1 } [[AO]], 0
-; CHECK-NEXT: [[C:%.*]] = icmp sgt i8 [[Y]], -1
-; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i8 127, i8 -128
-; CHECK-NEXT: [[R:%.*]] = select i1 [[O]], i8 [[S]], i8 [[A]]
+; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[X:%.*]], i8 [[Y:%.*]])
; CHECK-NEXT: ret i8 [[R]]
;
%ao = tail call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 %x, i8 %y)
@@ -718,12 +678,7 @@ define i8 @ssub_y_ge_max(i8 %x, i8 %y) {
define i32 @sadd_i32(i32 %x, i32 %y) {
; CHECK-LABEL: @sadd_i32(
-; CHECK-NEXT: [[AO:%.*]] = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 [[X:%.*]], i32 [[Y:%.*]])
-; CHECK-NEXT: [[O:%.*]] = extractvalue { i32, i1 } [[AO]], 1
-; CHECK-NEXT: [[A:%.*]] = extractvalue { i32, i1 } [[AO]], 0
-; CHECK-NEXT: [[C:%.*]] = icmp slt i32 [[X]], 0
-; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i32 -2147483648, i32 2147483647
-; CHECK-NEXT: [[R:%.*]] = select i1 [[O]], i32 [[S]], i32 [[A]]
+; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[X:%.*]], i32 [[Y:%.*]])
; CHECK-NEXT: ret i32 [[R]]
;
%ao = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %x, i32 %y)
OpenPOWER on IntegriCloud