diff options
author | Sanjay Patel <spatel@rotateright.com> | 2019-03-26 17:50:08 +0000 |
---|---|---|
committer | Sanjay Patel <spatel@rotateright.com> | 2019-03-26 17:50:08 +0000 |
commit | 81e8d76f5b63ad18fa538960ea57723b2329e8cd (patch) | |
tree | 28efb12d369398896df803655365bf388bca4952 | |
parent | 1aaa481fc1f8ad45d36e7e9e9340d22790a7c739 (diff) | |
download | bcm5719-llvm-81e8d76f5b63ad18fa538960ea57723b2329e8cd.tar.gz bcm5719-llvm-81e8d76f5b63ad18fa538960ea57723b2329e8cd.zip |
[InstCombine] form uaddsat from add+umin (PR14613)
This is the last step towards solving the examples shown in:
https://bugs.llvm.org/show_bug.cgi?id=14613
With this change, x86 should end up with psubus instructions
when those are available.
All known codegen issues with expanding the saturating intrinsics
were resolved with:
D59006 / rL356855
We also have some early evidence in D58872 that using the intrinsics
will lead to better perf. If some target regresses from this, custom
lowering of the intrinsics (as in the above for x86) may be needed.
llvm-svn: 357012
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp | 25 | ||||
-rw-r--r-- | llvm/test/Transforms/InstCombine/minmax-fold.ll | 4 | ||||
-rw-r--r-- | llvm/test/Transforms/InstCombine/saturating-add-sub.ll | 87 |
3 files changed, 94 insertions, 22 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 4b0199d8393..6d743a26098 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1064,6 +1064,28 @@ static Instruction *canonicalizeLowbitMask(BinaryOperator &I, return BinaryOperator::CreateNot(NotMask, I.getName()); } +static Instruction *foldToUnsignedSaturatedAdd(BinaryOperator &I) { + assert(I.getOpcode() == Instruction::Add && "Expecting add instruction"); + Type *Ty = I.getType(); + auto getUAddSat = [&]() { + return Intrinsic::getDeclaration(I.getModule(), Intrinsic::uadd_sat, Ty); + }; + + // add (umin X, ~Y), Y --> uaddsat X, Y + Value *X, *Y; + if (match(&I, m_c_Add(m_c_UMin(m_Value(X), m_Not(m_Value(Y))), + m_Deferred(Y)))) + return CallInst::Create(getUAddSat(), { X, Y }); + + // add (umin X, ~C), C --> uaddsat X, C + const APInt *C, *NotC; + if (match(&I, m_Add(m_UMin(m_Value(X), m_APInt(NotC)), m_APInt(C))) && + *C == ~*NotC) + return CallInst::Create(getUAddSat(), { X, ConstantInt::get(Ty, *C) }); + + return nullptr; +} + Instruction *InstCombiner::visitAdd(BinaryOperator &I) { if (Value *V = SimplifyAddInst(I.getOperand(0), I.getOperand(1), I.hasNoSignedWrap(), I.hasNoUnsignedWrap(), @@ -1266,6 +1288,9 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { if (Instruction *V = canonicalizeLowbitMask(I, Builder)) return V; + if (Instruction *SatAdd = foldToUnsignedSaturatedAdd(I)) + return SatAdd; + return Changed ? &I : nullptr; } diff --git a/llvm/test/Transforms/InstCombine/minmax-fold.ll b/llvm/test/Transforms/InstCombine/minmax-fold.ll index d312155c523..f68b62ad7d0 100644 --- a/llvm/test/Transforms/InstCombine/minmax-fold.ll +++ b/llvm/test/Transforms/InstCombine/minmax-fold.ll @@ -1134,9 +1134,7 @@ define <2 x i33> @add_umax_vec(<2 x i33> %x) { define i8 @PR14613_umin(i8 %x) { ; CHECK-LABEL: @PR14613_umin( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i8 [[X:%.*]], -16 -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i8 [[X]], i8 -16 -; CHECK-NEXT: [[U7:%.*]] = add i8 [[TMP2]], 15 +; CHECK-NEXT: [[U7:%.*]] = call i8 @llvm.uadd.sat.i8(i8 [[X:%.*]], i8 15) ; CHECK-NEXT: ret i8 [[U7]] ; %u4 = zext i8 %x to i32 diff --git a/llvm/test/Transforms/InstCombine/saturating-add-sub.ll b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll index c817b18abfc..4d0e722d5c8 100644 --- a/llvm/test/Transforms/InstCombine/saturating-add-sub.ll +++ b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll @@ -1254,10 +1254,7 @@ declare <2 x i8> @get_v2i8() define i32 @unsigned_sat_variable_using_min_add(i32 %x) { ; CHECK-LABEL: @unsigned_sat_variable_using_min_add( ; CHECK-NEXT: [[Y:%.*]] = call i32 @get_i32() -; CHECK-NEXT: [[NOTY:%.*]] = xor i32 [[Y]], -1 -; CHECK-NEXT: [[C:%.*]] = icmp ugt i32 [[NOTY]], [[X:%.*]] -; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i32 [[X]], i32 [[NOTY]] -; CHECK-NEXT: [[R:%.*]] = add i32 [[S]], [[Y]] +; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X:%.*]], i32 [[Y]]) ; CHECK-NEXT: ret i32 [[R]] ; %y = call i32 @get_i32() ; thwart complexity-based canonicalization @@ -1271,10 +1268,7 @@ define i32 @unsigned_sat_variable_using_min_add(i32 %x) { define i32 @unsigned_sat_variable_using_min_commute_add(i32 %x) { ; CHECK-LABEL: @unsigned_sat_variable_using_min_commute_add( ; CHECK-NEXT: [[Y:%.*]] = call i32 @get_i32() -; CHECK-NEXT: [[NOTY:%.*]] = xor i32 [[Y]], -1 -; CHECK-NEXT: [[C:%.*]] = icmp ugt i32 [[NOTY]], [[X:%.*]] -; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i32 [[X]], i32 [[NOTY]] -; CHECK-NEXT: [[R:%.*]] = add i32 [[Y]], [[S]] +; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X:%.*]], i32 [[Y]]) ; CHECK-NEXT: ret i32 [[R]] ; %y = call i32 @get_i32() ; thwart complexity-based canonicalization @@ -1288,10 +1282,7 @@ define i32 @unsigned_sat_variable_using_min_commute_add(i32 %x) { define <2 x i8> @unsigned_sat_variable_using_min_commute_select(<2 x i8> %x) { ; CHECK-LABEL: @unsigned_sat_variable_using_min_commute_select( ; CHECK-NEXT: [[Y:%.*]] = call <2 x i8> @get_v2i8() -; CHECK-NEXT: [[NOTY:%.*]] = xor <2 x i8> [[Y]], <i8 -1, i8 -1> -; CHECK-NEXT: [[C:%.*]] = icmp ult <2 x i8> [[NOTY]], [[X:%.*]] -; CHECK-NEXT: [[S:%.*]] = select <2 x i1> [[C]], <2 x i8> [[NOTY]], <2 x i8> [[X]] -; CHECK-NEXT: [[R:%.*]] = add <2 x i8> [[S]], [[Y]] +; CHECK-NEXT: [[R:%.*]] = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> [[X:%.*]], <2 x i8> [[Y]]) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %y = call <2 x i8> @get_v2i8() ; thwart complexity-based canonicalization @@ -1305,10 +1296,7 @@ define <2 x i8> @unsigned_sat_variable_using_min_commute_select(<2 x i8> %x) { define <2 x i8> @unsigned_sat_variable_using_min_commute_add_select(<2 x i8> %x) { ; CHECK-LABEL: @unsigned_sat_variable_using_min_commute_add_select( ; CHECK-NEXT: [[Y:%.*]] = call <2 x i8> @get_v2i8() -; CHECK-NEXT: [[NOTY:%.*]] = xor <2 x i8> [[Y]], <i8 -1, i8 -1> -; CHECK-NEXT: [[C:%.*]] = icmp ult <2 x i8> [[NOTY]], [[X:%.*]] -; CHECK-NEXT: [[S:%.*]] = select <2 x i1> [[C]], <2 x i8> [[NOTY]], <2 x i8> [[X]] -; CHECK-NEXT: [[R:%.*]] = add <2 x i8> [[Y]], [[S]] +; CHECK-NEXT: [[R:%.*]] = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> [[X:%.*]], <2 x i8> [[Y]]) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %y = call <2 x i8> @get_v2i8() ; thwart complexity-based canonicalization @@ -1319,13 +1307,49 @@ define <2 x i8> @unsigned_sat_variable_using_min_commute_add_select(<2 x i8> %x) ret <2 x i8> %r } +; Negative test + +define i32 @unsigned_sat_variable_using_wrong_min(i32 %x) { +; CHECK-LABEL: @unsigned_sat_variable_using_wrong_min( +; CHECK-NEXT: [[Y:%.*]] = call i32 @get_i32() +; CHECK-NEXT: [[NOTY:%.*]] = xor i32 [[Y]], -1 +; CHECK-NEXT: [[C:%.*]] = icmp sgt i32 [[NOTY]], [[X:%.*]] +; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i32 [[X]], i32 [[NOTY]] +; CHECK-NEXT: [[R:%.*]] = add i32 [[Y]], [[S]] +; CHECK-NEXT: ret i32 [[R]] +; + %y = call i32 @get_i32() ; thwart complexity-based canonicalization + %noty = xor i32 %y, -1 + %c = icmp slt i32 %x, %noty + %s = select i1 %c, i32 %x, i32 %noty + %r = add i32 %y, %s + ret i32 %r +} + +; Negative test + +define i32 @unsigned_sat_variable_using_wrong_value(i32 %x, i32 %z) { +; CHECK-LABEL: @unsigned_sat_variable_using_wrong_value( +; CHECK-NEXT: [[Y:%.*]] = call i32 @get_i32() +; CHECK-NEXT: [[NOTY:%.*]] = xor i32 [[Y]], -1 +; CHECK-NEXT: [[C:%.*]] = icmp ugt i32 [[NOTY]], [[X:%.*]] +; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i32 [[X]], i32 [[NOTY]] +; CHECK-NEXT: [[R:%.*]] = add i32 [[S]], [[Z:%.*]] +; CHECK-NEXT: ret i32 [[R]] +; + %y = call i32 @get_i32() ; thwart complexity-based canonicalization + %noty = xor i32 %y, -1 + %c = icmp ult i32 %x, %noty + %s = select i1 %c, i32 %x, i32 %noty + %r = add i32 %z, %s + ret i32 %r +} + ; If we have a constant operand, there's no commutativity variation. define i32 @unsigned_sat_constant_using_min(i32 %x) { ; CHECK-LABEL: @unsigned_sat_constant_using_min( -; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[X:%.*]], 42 -; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i32 [[X]], i32 42 -; CHECK-NEXT: [[R:%.*]] = add nsw i32 [[S]], -43 +; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X:%.*]], i32 -43) ; CHECK-NEXT: ret i32 [[R]] ; %c = icmp ult i32 %x, 42 @@ -1334,3 +1358,28 @@ define i32 @unsigned_sat_constant_using_min(i32 %x) { ret i32 %r } +define <2 x i32> @unsigned_sat_constant_using_min_splat(<2 x i32> %x) { +; CHECK-LABEL: @unsigned_sat_constant_using_min_splat( +; CHECK-NEXT: [[R:%.*]] = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> [[X:%.*]], <2 x i32> <i32 -15, i32 -15>) +; CHECK-NEXT: ret <2 x i32> [[R]] +; + %c = icmp ult <2 x i32> %x, <i32 14, i32 14> + %s = select <2 x i1> %c, <2 x i32> %x, <2 x i32> <i32 14, i32 14> + %r = add <2 x i32> %s, <i32 -15, i32 -15> + ret <2 x i32> %r +} + +; Negative test + +define i32 @unsigned_sat_constant_using_min_wrong_constant(i32 %x) { +; CHECK-LABEL: @unsigned_sat_constant_using_min_wrong_constant( +; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[X:%.*]], 42 +; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i32 [[X]], i32 42 +; CHECK-NEXT: [[R:%.*]] = add nsw i32 [[S]], -42 +; CHECK-NEXT: ret i32 [[R]] +; + %c = icmp ult i32 %x, 42 + %s = select i1 %c, i32 %x, i32 42 + %r = add i32 %s, -42 + ret i32 %r +} |