[InstCombine] form uaddsat from add+umin (PR14613)

This is the last step towards solving the examples shown in: https://bugs.llvm.org/show_bug.cgi?id=14613 With this change, x86 should end up with psubus instructions when those are available. All known codegen issues with expanding the saturating intrinsics were resolved with: D59006 / rL356855 We also have some early evidence in D58872 that using the intrinsics will lead to better perf. If some target regresses from this, custom lowering of the intrinsics (as in the above for x86) may be needed. llvm-svn: 357012
author: Sanjay Patel <spatel@rotateright.com> 2019-03-26 17:50:08 +0000
committer: Sanjay Patel <spatel@rotateright.com> 2019-03-26 17:50:08 +0000
commit: 81e8d76f5b63ad18fa538960ea57723b2329e8cd (patch)
tree: 28efb12d369398896df803655365bf388bca4952
parent: 1aaa481fc1f8ad45d36e7e9e9340d22790a7c739 (diff)
download: bcm5719-llvm-81e8d76f5b63ad18fa538960ea57723b2329e8cd.tar.gz
bcm5719-llvm-81e8d76f5b63ad18fa538960ea57723b2329e8cd.zip
3 files changed, 94 insertions, 22 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 4b0199d8393..6d743a26098 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -1064,6 +1064,28 @@ static Instruction *canonicalizeLowbitMask(BinaryOperator &I,
   return BinaryOperator::CreateNot(NotMask, I.getName());
 }
 
+static Instruction *foldToUnsignedSaturatedAdd(BinaryOperator &I) {
+  assert(I.getOpcode() == Instruction::Add && "Expecting add instruction");
+  Type *Ty = I.getType();
+  auto getUAddSat = [&]() {
+    return Intrinsic::getDeclaration(I.getModule(), Intrinsic::uadd_sat, Ty);
+  };
+
+  // add (umin X, ~Y), Y --> uaddsat X, Y
+  Value *X, *Y;
+  if (match(&I, m_c_Add(m_c_UMin(m_Value(X), m_Not(m_Value(Y))),
+                        m_Deferred(Y))))
+    return CallInst::Create(getUAddSat(), { X, Y });
+
+  // add (umin X, ~C), C --> uaddsat X, C
+  const APInt *C, *NotC;
+  if (match(&I, m_Add(m_UMin(m_Value(X), m_APInt(NotC)), m_APInt(C))) &&
+      *C == ~*NotC)
+    return CallInst::Create(getUAddSat(), { X, ConstantInt::get(Ty, *C) });
+
+  return nullptr;
+}
+
 Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
   if (Value *V = SimplifyAddInst(I.getOperand(0), I.getOperand(1),
                                  I.hasNoSignedWrap(), I.hasNoUnsignedWrap(),
@@ -1266,6 +1288,9 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
   if (Instruction *V = canonicalizeLowbitMask(I, Builder))
     return V;
 
+  if (Instruction *SatAdd = foldToUnsignedSaturatedAdd(I))
+    return SatAdd;
+
   return Changed ? &I : nullptr;
 }
 
diff --git a/llvm/test/Transforms/InstCombine/minmax-fold.ll b/llvm/test/Transforms/InstCombine/minmax-fold.ll
index d312155c523..f68b62ad7d0 100644
--- a/llvm/test/Transforms/InstCombine/minmax-fold.ll
+++ b/llvm/test/Transforms/InstCombine/minmax-fold.ll
@@ -1134,9 +1134,7 @@ define <2 x i33> @add_umax_vec(<2 x i33> %x) {
 
 define i8 @PR14613_umin(i8 %x) {
 ; CHECK-LABEL: @PR14613_umin(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[X:%.*]], -16
-; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i8 [[X]], i8 -16
-; CHECK-NEXT:    [[U7:%.*]] = add i8 [[TMP2]], 15
+; CHECK-NEXT:    [[U7:%.*]] = call i8 @llvm.uadd.sat.i8(i8 [[X:%.*]], i8 15)
 ; CHECK-NEXT:    ret i8 [[U7]]
 ;
   %u4 = zext i8 %x to i32
diff --git a/llvm/test/Transforms/InstCombine/saturating-add-sub.ll b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll
index c817b18abfc..4d0e722d5c8 100644
--- a/llvm/test/Transforms/InstCombine/saturating-add-sub.ll
+++ b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll
@@ -1254,10 +1254,7 @@ declare <2 x i8> @get_v2i8()
 define i32 @unsigned_sat_variable_using_min_add(i32 %x) {
 ; CHECK-LABEL: @unsigned_sat_variable_using_min_add(
 ; CHECK-NEXT:    [[Y:%.*]] = call i32 @get_i32()
-; CHECK-NEXT:    [[NOTY:%.*]] = xor i32 [[Y]], -1
-; CHECK-NEXT:    [[C:%.*]] = icmp ugt i32 [[NOTY]], [[X:%.*]]
-; CHECK-NEXT:    [[S:%.*]] = select i1 [[C]], i32 [[X]], i32 [[NOTY]]
-; CHECK-NEXT:    [[R:%.*]] = add i32 [[S]], [[Y]]
+; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X:%.*]], i32 [[Y]])
 ; CHECK-NEXT:    ret i32 [[R]]
 ;
   %y = call i32 @get_i32() ; thwart complexity-based canonicalization
@@ -1271,10 +1268,7 @@ define i32 @unsigned_sat_variable_using_min_add(i32 %x) {
 define i32 @unsigned_sat_variable_using_min_commute_add(i32 %x) {
 ; CHECK-LABEL: @unsigned_sat_variable_using_min_commute_add(
 ; CHECK-NEXT:    [[Y:%.*]] = call i32 @get_i32()
-; CHECK-NEXT:    [[NOTY:%.*]] = xor i32 [[Y]], -1
-; CHECK-NEXT:    [[C:%.*]] = icmp ugt i32 [[NOTY]], [[X:%.*]]
-; CHECK-NEXT:    [[S:%.*]] = select i1 [[C]], i32 [[X]], i32 [[NOTY]]
-; CHECK-NEXT:    [[R:%.*]] = add i32 [[Y]], [[S]]
+; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X:%.*]], i32 [[Y]])
 ; CHECK-NEXT:    ret i32 [[R]]
 ;
   %y = call i32 @get_i32() ; thwart complexity-based canonicalization
@@ -1288,10 +1282,7 @@ define i32 @unsigned_sat_variable_using_min_commute_add(i32 %x) {
 define <2 x i8> @unsigned_sat_variable_using_min_commute_select(<2 x i8> %x) {
 ; CHECK-LABEL: @unsigned_sat_variable_using_min_commute_select(
 ; CHECK-NEXT:    [[Y:%.*]] = call <2 x i8> @get_v2i8()
-; CHECK-NEXT:    [[NOTY:%.*]] = xor <2 x i8> [[Y]], <i8 -1, i8 -1>
-; CHECK-NEXT:    [[C:%.*]] = icmp ult <2 x i8> [[NOTY]], [[X:%.*]]
-; CHECK-NEXT:    [[S:%.*]] = select <2 x i1> [[C]], <2 x i8> [[NOTY]], <2 x i8> [[X]]
-; CHECK-NEXT:    [[R:%.*]] = add <2 x i8> [[S]], [[Y]]
+; CHECK-NEXT:    [[R:%.*]] = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> [[X:%.*]], <2 x i8> [[Y]])
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
   %y = call <2 x i8> @get_v2i8() ; thwart complexity-based canonicalization
@@ -1305,10 +1296,7 @@ define <2 x i8> @unsigned_sat_variable_using_min_commute_select(<2 x i8> %x) {
 define <2 x i8> @unsigned_sat_variable_using_min_commute_add_select(<2 x i8> %x) {
 ; CHECK-LABEL: @unsigned_sat_variable_using_min_commute_add_select(
 ; CHECK-NEXT:    [[Y:%.*]] = call <2 x i8> @get_v2i8()
-; CHECK-NEXT:    [[NOTY:%.*]] = xor <2 x i8> [[Y]], <i8 -1, i8 -1>
-; CHECK-NEXT:    [[C:%.*]] = icmp ult <2 x i8> [[NOTY]], [[X:%.*]]
-; CHECK-NEXT:    [[S:%.*]] = select <2 x i1> [[C]], <2 x i8> [[NOTY]], <2 x i8> [[X]]
-; CHECK-NEXT:    [[R:%.*]] = add <2 x i8> [[Y]], [[S]]
+; CHECK-NEXT:    [[R:%.*]] = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> [[X:%.*]], <2 x i8> [[Y]])
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
   %y = call <2 x i8> @get_v2i8() ; thwart complexity-based canonicalization
@@ -1319,13 +1307,49 @@ define <2 x i8> @unsigned_sat_variable_using_min_commute_add_select(<2 x i8> %x)
   ret <2 x i8> %r
 }
 
+; Negative test
+
+define i32 @unsigned_sat_variable_using_wrong_min(i32 %x) {
+; CHECK-LABEL: @unsigned_sat_variable_using_wrong_min(
+; CHECK-NEXT:    [[Y:%.*]] = call i32 @get_i32()
+; CHECK-NEXT:    [[NOTY:%.*]] = xor i32 [[Y]], -1
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i32 [[NOTY]], [[X:%.*]]
+; CHECK-NEXT:    [[S:%.*]] = select i1 [[C]], i32 [[X]], i32 [[NOTY]]
+; CHECK-NEXT:    [[R:%.*]] = add i32 [[Y]], [[S]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %y = call i32 @get_i32() ; thwart complexity-based canonicalization
+  %noty = xor i32 %y, -1
+  %c = icmp slt i32 %x, %noty
+  %s = select i1 %c, i32 %x, i32 %noty
+  %r = add i32 %y, %s
+  ret i32 %r
+}
+
+; Negative test
+
+define i32 @unsigned_sat_variable_using_wrong_value(i32 %x, i32 %z) {
+; CHECK-LABEL: @unsigned_sat_variable_using_wrong_value(
+; CHECK-NEXT:    [[Y:%.*]] = call i32 @get_i32()
+; CHECK-NEXT:    [[NOTY:%.*]] = xor i32 [[Y]], -1
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt i32 [[NOTY]], [[X:%.*]]
+; CHECK-NEXT:    [[S:%.*]] = select i1 [[C]], i32 [[X]], i32 [[NOTY]]
+; CHECK-NEXT:    [[R:%.*]] = add i32 [[S]], [[Z:%.*]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %y = call i32 @get_i32() ; thwart complexity-based canonicalization
+  %noty = xor i32 %y, -1
+  %c = icmp ult i32 %x, %noty
+  %s = select i1 %c, i32 %x, i32 %noty
+  %r = add i32 %z, %s
+  ret i32 %r
+}
+
 ; If we have a constant operand, there's no commutativity variation.
 
 define i32 @unsigned_sat_constant_using_min(i32 %x) {
 ; CHECK-LABEL: @unsigned_sat_constant_using_min(
-; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[X:%.*]], 42
-; CHECK-NEXT:    [[S:%.*]] = select i1 [[C]], i32 [[X]], i32 42
-; CHECK-NEXT:    [[R:%.*]] = add nsw i32 [[S]], -43
+; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X:%.*]], i32 -43)
 ; CHECK-NEXT:    ret i32 [[R]]
 ;
   %c = icmp ult i32 %x, 42
@@ -1334,3 +1358,28 @@ define i32 @unsigned_sat_constant_using_min(i32 %x) {
   ret i32 %r
 }
 
+define <2 x i32> @unsigned_sat_constant_using_min_splat(<2 x i32> %x) {
+; CHECK-LABEL: @unsigned_sat_constant_using_min_splat(
+; CHECK-NEXT:    [[R:%.*]] = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> [[X:%.*]], <2 x i32> <i32 -15, i32 -15>)
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %c = icmp ult <2 x i32> %x, <i32 14, i32 14>
+  %s = select <2 x i1> %c, <2 x i32> %x, <2 x i32> <i32 14, i32 14>
+  %r = add <2 x i32> %s, <i32 -15, i32 -15>
+  ret <2 x i32> %r
+}
+
+; Negative test
+
+define i32 @unsigned_sat_constant_using_min_wrong_constant(i32 %x) {
+; CHECK-LABEL: @unsigned_sat_constant_using_min_wrong_constant(
+; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[X:%.*]], 42
+; CHECK-NEXT:    [[S:%.*]] = select i1 [[C]], i32 [[X]], i32 42
+; CHECK-NEXT:    [[R:%.*]] = add nsw i32 [[S]], -42
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %c = icmp ult i32 %x, 42
+  %s = select i1 %c, i32 %x, i32 42
+  %r = add i32 %s, -42
+  ret i32 %r
+}
author	Sanjay Patel <spatel@rotateright.com>	2019-03-26 17:50:08 +0000
committer	Sanjay Patel <spatel@rotateright.com>	2019-03-26 17:50:08 +0000
commit	81e8d76f5b63ad18fa538960ea57723b2329e8cd (patch)
tree	28efb12d369398896df803655365bf388bca4952
parent	1aaa481fc1f8ad45d36e7e9e9340d22790a7c739 (diff)
download	bcm5719-llvm-81e8d76f5b63ad18fa538960ea57723b2329e8cd.tar.gz bcm5719-llvm-81e8d76f5b63ad18fa538960ea57723b2329e8cd.zip