summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNikita Popov <nikita.ppv@gmail.com>2019-01-05 09:48:16 +0000
committerNikita Popov <nikita.ppv@gmail.com>2019-01-05 09:48:16 +0000
commit65038515ee4465d20103b2c32b337a76a2c63b53 (patch)
treee6e6ce7bb99118a6303a3da242e29da84d5ce63c
parent7bd4900ba095b26fedc595275ecf40c8329b7aa8 (diff)
downloadbcm5719-llvm-65038515ee4465d20103b2c32b337a76a2c63b53.tar.gz
bcm5719-llvm-65038515ee4465d20103b2c32b337a76a2c63b53.zip
[InstCombine] Relax cttz/ctlz with select on zero
The cttz/ctlz intrinsics have a parameter specifying whether the result is undefined for zero. cttz(x, false) can be relaxed to cttz(x, true) if x is known non-zero, and in fact such an optimization is already performed. However, this currently doesn't work if x is non-zero as a result of a select rather than an explicit branch. This patch adds handling for this case, thus allowing x != 0 ? cttz(x, false) : y to simplify to x != 0 ? cttz(x, true) : y. Differential Revision: https://reviews.llvm.org/D55786 llvm-svn: 350463
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp23
-rw-r--r--llvm/test/Transforms/InstCombine/select-cmp-cttz-ctlz.ll8
2 files changed, 19 insertions, 12 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index b1e0ffa6fa7..ebbe2afe3ec 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -709,17 +709,18 @@ static Value *foldSelectCttzCtlz(ICmpInst *ICI, Value *TrueVal, Value *FalseVal,
match(Count, m_Trunc(m_Value(V))))
Count = V;
+ // Check that 'Count' is a call to intrinsic cttz/ctlz. Also check that the
+ // input to the cttz/ctlz is used as LHS for the compare instruction.
+ if (!match(Count, m_Intrinsic<Intrinsic::cttz>(m_Specific(CmpLHS))) &&
+ !match(Count, m_Intrinsic<Intrinsic::ctlz>(m_Specific(CmpLHS))))
+ return nullptr;
+
+ IntrinsicInst *II = cast<IntrinsicInst>(Count);
+
// Check if the value propagated on zero is a constant number equal to the
// sizeof in bits of 'Count'.
unsigned SizeOfInBits = Count->getType()->getScalarSizeInBits();
- if (!match(ValueOnZero, m_SpecificInt(SizeOfInBits)))
- return nullptr;
-
- // Check that 'Count' is a call to intrinsic cttz/ctlz. Also check that the
- // input to the cttz/ctlz is used as LHS for the compare instruction.
- if (match(Count, m_Intrinsic<Intrinsic::cttz>(m_Specific(CmpLHS))) ||
- match(Count, m_Intrinsic<Intrinsic::ctlz>(m_Specific(CmpLHS)))) {
- IntrinsicInst *II = cast<IntrinsicInst>(Count);
+ if (match(ValueOnZero, m_SpecificInt(SizeOfInBits))) {
// Explicitly clear the 'undef_on_zero' flag.
IntrinsicInst *NewI = cast<IntrinsicInst>(II->clone());
NewI->setArgOperand(1, ConstantInt::getFalse(NewI->getContext()));
@@ -727,6 +728,12 @@ static Value *foldSelectCttzCtlz(ICmpInst *ICI, Value *TrueVal, Value *FalseVal,
return Builder.CreateZExtOrTrunc(NewI, ValueOnZero->getType());
}
+ // If the ValueOnZero is not the bitwidth, we can at least make use of the
+ // fact that the cttz/ctlz result will not be used if the input is zero, so
+ // it's okay to relax it to undef for that case.
+ if (II->hasOneUse() && !match(II->getArgOperand(1), m_One()))
+ II->setArgOperand(1, ConstantInt::getTrue(II->getContext()));
+
return nullptr;
}
diff --git a/llvm/test/Transforms/InstCombine/select-cmp-cttz-ctlz.ll b/llvm/test/Transforms/InstCombine/select-cmp-cttz-ctlz.ll
index f7690079b6f..606cdedca7a 100644
--- a/llvm/test/Transforms/InstCombine/select-cmp-cttz-ctlz.ll
+++ b/llvm/test/Transforms/InstCombine/select-cmp-cttz-ctlz.ll
@@ -345,7 +345,7 @@ define i128 @test8(i128 %x) {
define i32 @test_ctlz_not_bw(i32 %x) {
; CHECK-LABEL: @test_ctlz_not_bw(
-; CHECK-NEXT: [[CT:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 false), !range !1
+; CHECK-NEXT: [[CT:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 true), !range !1
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X]], 0
; CHECK-NEXT: [[RES:%.*]] = select i1 [[CMP]], i32 123, i32 [[CT]]
; CHECK-NEXT: ret i32 [[RES]]
@@ -373,7 +373,7 @@ define i32 @test_ctlz_not_bw_multiuse(i32 %x) {
define i32 @test_cttz_not_bw(i32 %x) {
; CHECK-LABEL: @test_cttz_not_bw(
-; CHECK-NEXT: [[CT:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 false), !range !1
+; CHECK-NEXT: [[CT:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 true), !range !1
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X]], 0
; CHECK-NEXT: [[RES:%.*]] = select i1 [[CMP]], i32 123, i32 [[CT]]
; CHECK-NEXT: ret i32 [[RES]]
@@ -412,7 +412,7 @@ define <2 x i32> @test_ctlz_bw_vec(<2 x i32> %x) {
define <2 x i32> @test_ctlz_not_bw_vec(<2 x i32> %x) {
; CHECK-LABEL: @test_ctlz_not_bw_vec(
-; CHECK-NEXT: [[CT:%.*]] = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X:%.*]], i1 false)
+; CHECK-NEXT: [[CT:%.*]] = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X:%.*]], i1 true)
; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[X]], zeroinitializer
; CHECK-NEXT: [[RES:%.*]] = select <2 x i1> [[CMP]], <2 x i32> zeroinitializer, <2 x i32> [[CT]]
; CHECK-NEXT: ret <2 x i32> [[RES]]
@@ -436,7 +436,7 @@ define <2 x i32> @test_cttz_bw_vec(<2 x i32> %x) {
define <2 x i32> @test_cttz_not_bw_vec(<2 x i32> %x) {
; CHECK-LABEL: @test_cttz_not_bw_vec(
-; CHECK-NEXT: [[CT:%.*]] = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X:%.*]], i1 false)
+; CHECK-NEXT: [[CT:%.*]] = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X:%.*]], i1 true)
; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[X]], zeroinitializer
; CHECK-NEXT: [[RES:%.*]] = select <2 x i1> [[CMP]], <2 x i32> zeroinitializer, <2 x i32> [[CT]]
; CHECK-NEXT: ret <2 x i32> [[RES]]
OpenPOWER on IntegriCloud