summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp46
-rw-r--r--llvm/test/Transforms/InstCombine/ctpop.ll39
-rw-r--r--llvm/test/Transforms/InstCombine/intrinsics.ll41
3 files changed, 108 insertions, 18 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index c0830a5d211..dbed7ad4eae 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1409,6 +1409,47 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombiner &IC) {
}
}
+ // Add range metadata since known bits can't completely reflect what we know.
+ // TODO: Handle splat vectors.
+ auto *IT = dyn_cast<IntegerType>(Op0->getType());
+ if (IT && IT->getBitWidth() != 1 && !II.getMetadata(LLVMContext::MD_range)) {
+ Metadata *LowAndHigh[] = {
+ ConstantAsMetadata::get(ConstantInt::get(IT, DefiniteZeros)),
+ ConstantAsMetadata::get(ConstantInt::get(IT, PossibleZeros + 1))};
+ II.setMetadata(LLVMContext::MD_range,
+ MDNode::get(II.getContext(), LowAndHigh));
+ return &II;
+ }
+
+ return nullptr;
+}
+
+static Instruction *foldCtpop(IntrinsicInst &II, InstCombiner &IC) {
+ assert(II.getIntrinsicID() == Intrinsic::ctpop &&
+ "Expected ctpop intrinsic");
+ Value *Op0 = II.getArgOperand(0);
+ // FIXME: Try to simplify vectors of integers.
+ auto *IT = dyn_cast<IntegerType>(Op0->getType());
+ if (!IT)
+ return nullptr;
+
+ unsigned BitWidth = IT->getBitWidth();
+ KnownBits Known(BitWidth);
+ IC.computeKnownBits(Op0, Known, 0, &II);
+
+ unsigned MinCount = Known.countMinPopulation();
+ unsigned MaxCount = Known.countMaxPopulation();
+
+ // Add range metadata since known bits can't completely reflect what we know.
+ if (IT->getBitWidth() != 1 && !II.getMetadata(LLVMContext::MD_range)) {
+ Metadata *LowAndHigh[] = {
+ ConstantAsMetadata::get(ConstantInt::get(IT, MinCount)),
+ ConstantAsMetadata::get(ConstantInt::get(IT, MaxCount + 1))};
+ II.setMetadata(LLVMContext::MD_range,
+ MDNode::get(II.getContext(), LowAndHigh));
+ return &II;
+ }
+
return nullptr;
}
@@ -1981,6 +2022,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
return I;
break;
+ case Intrinsic::ctpop:
+ if (auto *I = foldCtpop(*II, *this))
+ return I;
+ break;
+
case Intrinsic::uadd_with_overflow:
case Intrinsic::sadd_with_overflow:
case Intrinsic::umul_with_overflow:
diff --git a/llvm/test/Transforms/InstCombine/ctpop.ll b/llvm/test/Transforms/InstCombine/ctpop.ll
index d49a907ffce..e8e3603e4cb 100644
--- a/llvm/test/Transforms/InstCombine/ctpop.ll
+++ b/llvm/test/Transforms/InstCombine/ctpop.ll
@@ -3,6 +3,8 @@
declare i32 @llvm.ctpop.i32(i32)
declare i8 @llvm.ctpop.i8(i8)
+declare i1 @llvm.ctpop.i1(i1)
+declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>)
declare void @llvm.assume(i1)
define i1 @test1(i32 %arg) {
@@ -44,7 +46,7 @@ define i1 @test3(i32 %arg) {
; Negative test for when we know nothing
define i1 @test4(i8 %arg) {
; CHECK-LABEL: @test4(
-; CHECK-NEXT: [[CNT:%.*]] = call i8 @llvm.ctpop.i8(i8 [[ARG:%.*]])
+; CHECK-NEXT: [[CNT:%.*]] = call i8 @llvm.ctpop.i8(i8 [[ARG:%.*]]), !range ![[RANGE:[0-9]+]]
; CHECK-NEXT: [[RES:%.*]] = icmp eq i8 [[CNT]], 2
; CHECK-NEXT: ret i1 [[RES]]
;
@@ -55,16 +57,41 @@ define i1 @test4(i8 %arg) {
; Test when the number of possible known bits isn't one less than a power of 2
; and the compare value is greater but less than the next power of 2.
-; TODO: The icmp is unnecessary given the known bits of the input.
define i1 @test5(i32 %arg) {
; CHECK-LABEL: @test5(
-; CHECK-NEXT: [[AND:%.*]] = and i32 [[ARG:%.*]], 3
-; CHECK-NEXT: [[CNT:%.*]] = call i32 @llvm.ctpop.i32(i32 [[AND]])
-; CHECK-NEXT: [[RES:%.*]] = icmp eq i32 [[CNT]], 3
-; CHECK-NEXT: ret i1 [[RES]]
+; CHECK-NEXT: ret i1 false
;
%and = and i32 %arg, 3
%cnt = call i32 @llvm.ctpop.i32(i32 %and)
%res = icmp eq i32 %cnt, 3
ret i1 %res
}
+
+; Test when the number of possible known bits isn't one less than a power of 2
+; and the compare value is greater but less than the next power of 2.
+; TODO: The icmp is unnecessary given the known bits of the input, but range
+; metadata doesn't support vectors
+define <2 x i1> @test5vec(<2 x i32> %arg) {
+; CHECK-LABEL: @test5vec(
+; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[ARG:%.*]], <i32 3, i32 3>
+; CHECK-NEXT: [[CNT:%.*]] = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[AND]])
+; CHECK-NEXT: [[RES:%.*]] = icmp eq <2 x i32> [[CNT]], <i32 3, i32 3>
+; CHECK-NEXT: ret <2 x i1> [[RES]]
+;
+ %and = and <2 x i32> %arg, <i32 3, i32 3>
+ %cnt = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %and)
+ %res = icmp eq <2 x i32> %cnt, <i32 3, i32 3>
+ ret <2 x i1> %res
+}
+
+; Make sure we don't add range metadata to i1 ctpop.
+define i1 @test6(i1 %arg) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT: [[CNT:%.*]] = call i1 @llvm.ctpop.i1(i1 [[ARG:%.*]])
+; CHECK-NEXT: ret i1 [[CNT]]
+;
+ %cnt = call i1 @llvm.ctpop.i1(i1 %arg)
+ ret i1 %cnt
+}
+
+; CHECK: ![[RANGE]] = !{i8 0, i8 9}
diff --git a/llvm/test/Transforms/InstCombine/intrinsics.ll b/llvm/test/Transforms/InstCombine/intrinsics.ll
index 1b1ed606868..c294d79f15e 100644
--- a/llvm/test/Transforms/InstCombine/intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/intrinsics.ll
@@ -16,6 +16,8 @@ declare %ov.result.32 @llvm.umul.with.overflow.i32(i32, i32) nounwind readnone
declare double @llvm.powi.f64(double, i32) nounwind readonly
declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone
declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
+declare i1 @llvm.cttz.i1(i1, i1) nounwind readnone
+declare i1 @llvm.ctlz.i1(i1, i1) nounwind readnone
declare i32 @llvm.ctpop.i32(i32) nounwind readnone
declare <2 x i32> @llvm.cttz.v2i32(<2 x i32>, i1) nounwind readnone
declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) nounwind readnone
@@ -293,6 +295,16 @@ define <2 x i32> @cttz_vec(<2 x i32> %a) {
ret <2 x i32> %count
}
+; Make sure we don't add range metadata to i1 cttz.
+define i1 @cttz_i1(i1 %arg) {
+; CHECK-LABEL: @cttz_i1(
+; CHECK-NEXT: [[CNT:%.*]] = call i1 @llvm.cttz.i1(i1 [[ARG:%.*]], i1 false) #2
+; CHECK-NEXT: ret i1 [[CNT]]
+;
+ %cnt = call i1 @llvm.cttz.i1(i1 %arg, i1 false) nounwind readnone
+ ret i1 %cnt
+}
+
define i1 @cttz_knownbits(i32 %arg) {
; CHECK-LABEL: @cttz_knownbits(
; CHECK-NEXT: ret i1 false
@@ -316,7 +328,7 @@ define <2 x i1> @cttz_knownbits_vec(<2 x i32> %arg) {
define i1 @cttz_knownbits2(i32 %arg) {
; CHECK-LABEL: @cttz_knownbits2(
; CHECK-NEXT: [[OR:%.*]] = or i32 [[ARG:%.*]], 4
-; CHECK-NEXT: [[CNT:%.*]] = call i32 @llvm.cttz.i32(i32 [[OR]], i1 true)
+; CHECK-NEXT: [[CNT:%.*]] = call i32 @llvm.cttz.i32(i32 [[OR]], i1 true) #2, !range ![[CTTZ_RANGE:[0-9]+]]
; CHECK-NEXT: [[RES:%.*]] = icmp eq i32 [[CNT]], 2
; CHECK-NEXT: ret i1 [[RES]]
;
@@ -339,13 +351,9 @@ define <2 x i1> @cttz_knownbits2_vec(<2 x i32> %arg) {
ret <2 x i1> %res
}
-; TODO: The icmp is unnecessary given the known bits of the input.
define i1 @cttz_knownbits3(i32 %arg) {
; CHECK-LABEL: @cttz_knownbits3(
-; CHECK-NEXT: [[OR:%.*]] = or i32 [[ARG:%.*]], 4
-; CHECK-NEXT: [[CNT:%.*]] = call i32 @llvm.cttz.i32(i32 [[OR]], i1 true) #2
-; CHECK-NEXT: [[RES:%.*]] = icmp eq i32 [[CNT]], 3
-; CHECK-NEXT: ret i1 [[RES]]
+; CHECK-NEXT: ret i1 false
;
%or = or i32 %arg, 4
%cnt = call i32 @llvm.cttz.i32(i32 %or, i1 true) nounwind readnone
@@ -387,6 +395,16 @@ define <2 x i8> @ctlz_vec(<2 x i8> %a) {
ret <2 x i8> %count
}
+; Make sure we don't add range metadata to i1 ctlz.
+define i1 @ctlz_i1(i1 %arg) {
+; CHECK-LABEL: @ctlz_i1(
+; CHECK-NEXT: [[CNT:%.*]] = call i1 @llvm.ctlz.i1(i1 [[ARG:%.*]], i1 false) #2
+; CHECK-NEXT: ret i1 [[CNT]]
+;
+ %cnt = call i1 @llvm.ctlz.i1(i1 %arg, i1 false) nounwind readnone
+ ret i1 %cnt
+}
+
define i1 @ctlz_knownbits(i8 %arg) {
; CHECK-LABEL: @ctlz_knownbits(
; CHECK-NEXT: ret i1 false
@@ -410,7 +428,7 @@ define <2 x i1> @ctlz_knownbits_vec(<2 x i8> %arg) {
define i1 @ctlz_knownbits2(i8 %arg) {
; CHECK-LABEL: @ctlz_knownbits2(
; CHECK-NEXT: [[OR:%.*]] = or i8 [[ARG:%.*]], 32
-; CHECK-NEXT: [[CNT:%.*]] = call i8 @llvm.ctlz.i8(i8 [[OR]], i1 true)
+; CHECK-NEXT: [[CNT:%.*]] = call i8 @llvm.ctlz.i8(i8 [[OR]], i1 true) #2, !range ![[CTLZ_RANGE:[0-9]+]]
; CHECK-NEXT: [[RES:%.*]] = icmp eq i8 [[CNT]], 2
; CHECK-NEXT: ret i1 [[RES]]
;
@@ -433,13 +451,9 @@ define <2 x i1> @ctlz_knownbits2_vec(<2 x i8> %arg) {
ret <2 x i1> %res
}
-; TODO: The icmp is unnecessary given the known bits of the input.
define i1 @ctlz_knownbits3(i8 %arg) {
; CHECK-LABEL: @ctlz_knownbits3(
-; CHECK-NEXT: [[OR:%.*]] = or i8 [[ARG:%.*]], 32
-; CHECK-NEXT: [[CNT:%.*]] = call i8 @llvm.ctlz.i8(i8 [[OR]], i1 true) #2
-; CHECK-NEXT: [[RES:%.*]] = icmp eq i8 [[CNT]], 3
-; CHECK-NEXT: ret i1 [[RES]]
+; CHECK-NEXT: ret i1 false
;
%or = or i8 %arg, 32
%cnt = call i8 @llvm.ctlz.i8(i8 %or, i1 true) nounwind readnone
@@ -790,3 +804,6 @@ define void @nearbyint(double *%P) {
store volatile double %C, double* %P
ret void
}
+
+; CHECK: [[CTTZ_RANGE]] = !{i32 0, i32 3}
+; CHECK: [[CTLZ_RANGE]] = !{i8 0, i8 3}
OpenPOWER on IntegriCloud