3 files changed, 108 insertions, 18 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index c0830a5d211..dbed7ad4eae 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1409,6 +1409,47 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombiner &IC) {
     }
   }
 
+  // Add range metadata since known bits can't completely reflect what we know.
+  // TODO: Handle splat vectors.
+  auto *IT = dyn_cast<IntegerType>(Op0->getType());
+  if (IT && IT->getBitWidth() != 1 && !II.getMetadata(LLVMContext::MD_range)) {
+    Metadata *LowAndHigh[] = {
+        ConstantAsMetadata::get(ConstantInt::get(IT, DefiniteZeros)),
+        ConstantAsMetadata::get(ConstantInt::get(IT, PossibleZeros + 1))};
+    II.setMetadata(LLVMContext::MD_range,
+                   MDNode::get(II.getContext(), LowAndHigh));
+    return &II;
+  }
+
+  return nullptr;
+}
+
+static Instruction *foldCtpop(IntrinsicInst &II, InstCombiner &IC) {
+  assert(II.getIntrinsicID() == Intrinsic::ctpop &&
+         "Expected ctpop intrinsic");
+  Value *Op0 = II.getArgOperand(0);
+  // FIXME: Try to simplify vectors of integers.
+  auto *IT = dyn_cast<IntegerType>(Op0->getType());
+  if (!IT)
+    return nullptr;
+
+  unsigned BitWidth = IT->getBitWidth();
+  KnownBits Known(BitWidth);
+  IC.computeKnownBits(Op0, Known, 0, &II);
+
+  unsigned MinCount = Known.countMinPopulation();
+  unsigned MaxCount = Known.countMaxPopulation();
+
+  // Add range metadata since known bits can't completely reflect what we know.
+  if (IT->getBitWidth() != 1 && !II.getMetadata(LLVMContext::MD_range)) {
+    Metadata *LowAndHigh[] = {
+        ConstantAsMetadata::get(ConstantInt::get(IT, MinCount)),
+        ConstantAsMetadata::get(ConstantInt::get(IT, MaxCount + 1))};
+    II.setMetadata(LLVMContext::MD_range,
+                   MDNode::get(II.getContext(), LowAndHigh));
+    return &II;
+  }
+
   return nullptr;
 }
 
@@ -1981,6 +2022,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
       return I;
     break;
 
+  case Intrinsic::ctpop:
+    if (auto *I = foldCtpop(*II, *this))
+      return I;
+    break;
+
   case Intrinsic::uadd_with_overflow:
   case Intrinsic::sadd_with_overflow:
   case Intrinsic::umul_with_overflow:
diff --git a/llvm/test/Transforms/InstCombine/ctpop.ll b/llvm/test/Transforms/InstCombine/ctpop.ll
index d49a907ffce..e8e3603e4cb 100644
--- a/llvm/test/Transforms/InstCombine/ctpop.ll
+++ b/llvm/test/Transforms/InstCombine/ctpop.ll
@@ -3,6 +3,8 @@
 
 declare i32 @llvm.ctpop.i32(i32)
 declare i8 @llvm.ctpop.i8(i8)
+declare i1 @llvm.ctpop.i1(i1)
+declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>)
 declare void @llvm.assume(i1)
 
 define i1 @test1(i32 %arg) {
@@ -44,7 +46,7 @@ define i1 @test3(i32 %arg) {
 ; Negative test for when we know nothing
 define i1 @test4(i8 %arg) {
 ; CHECK-LABEL: @test4(
-; CHECK-NEXT:    [[CNT:%.*]] = call i8 @llvm.ctpop.i8(i8 [[ARG:%.*]])
+; CHECK-NEXT:    [[CNT:%.*]] = call i8 @llvm.ctpop.i8(i8 [[ARG:%.*]]), !range ![[RANGE:[0-9]+]]
 ; CHECK-NEXT:    [[RES:%.*]] = icmp eq i8 [[CNT]], 2
 ; CHECK-NEXT:    ret i1 [[RES]]
 ;
@@ -55,16 +57,41 @@ define i1 @test4(i8 %arg) {
 
 ; Test when the number of possible known bits isn't one less than a power of 2
 ; and the compare value is greater but less than the next power of 2.
-; TODO: The icmp is unnecessary given the known bits of the input.
 define i1 @test5(i32 %arg) {
 ; CHECK-LABEL: @test5(
-; CHECK-NEXT:    [[AND:%.*]] = and i32 [[ARG:%.*]], 3
-; CHECK-NEXT:    [[CNT:%.*]] = call i32 @llvm.ctpop.i32(i32 [[AND]])
-; CHECK-NEXT:    [[RES:%.*]] = icmp eq i32 [[CNT]], 3
-; CHECK-NEXT:    ret i1 [[RES]]
+; CHECK-NEXT:    ret i1 false
 ;
   %and = and i32 %arg, 3
   %cnt = call i32 @llvm.ctpop.i32(i32 %and)
   %res = icmp eq i32 %cnt, 3
   ret i1 %res
 }
+
+; Test when the number of possible known bits isn't one less than a power of 2
+; and the compare value is greater but less than the next power of 2.
+; TODO: The icmp is unnecessary given the known bits of the input, but range
+; metadata doesn't support vectors
+define <2 x i1> @test5vec(<2 x i32> %arg) {
+; CHECK-LABEL: @test5vec(
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[ARG:%.*]], <i32 3, i32 3>
+; CHECK-NEXT:    [[CNT:%.*]] = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[AND]])
+; CHECK-NEXT:    [[RES:%.*]] = icmp eq <2 x i32> [[CNT]], <i32 3, i32 3>
+; CHECK-NEXT:    ret <2 x i1> [[RES]]
+;
+  %and = and <2 x i32> %arg, <i32 3, i32 3>
+  %cnt = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %and)
+  %res = icmp eq <2 x i32> %cnt, <i32 3, i32 3>
+  ret <2 x i1> %res
+}
+
+; Make sure we don't add range metadata to i1 ctpop.
+define i1 @test6(i1 %arg) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:    [[CNT:%.*]] = call i1 @llvm.ctpop.i1(i1 [[ARG:%.*]])
+; CHECK-NEXT:    ret i1 [[CNT]]
+;
+  %cnt = call i1 @llvm.ctpop.i1(i1 %arg)
+  ret i1 %cnt
+}
+
+; CHECK: ![[RANGE]] = !{i8 0, i8 9}
diff --git a/llvm/test/Transforms/InstCombine/intrinsics.ll b/llvm/test/Transforms/InstCombine/intrinsics.ll
index 1b1ed606868..c294d79f15e 100644
--- a/llvm/test/Transforms/InstCombine/intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/intrinsics.ll
@@ -16,6 +16,8 @@ declare %ov.result.32 @llvm.umul.with.overflow.i32(i32, i32) nounwind readnone
 declare double @llvm.powi.f64(double, i32) nounwind readonly
 declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone
 declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
+declare i1 @llvm.cttz.i1(i1, i1) nounwind readnone
+declare i1 @llvm.ctlz.i1(i1, i1) nounwind readnone
 declare i32 @llvm.ctpop.i32(i32) nounwind readnone
 declare <2 x i32> @llvm.cttz.v2i32(<2 x i32>, i1) nounwind readnone
 declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) nounwind readnone
@@ -293,6 +295,16 @@ define <2 x i32> @cttz_vec(<2 x i32> %a) {
   ret <2 x i32> %count
 }
 
+; Make sure we don't add range metadata to i1 cttz.
+define i1 @cttz_i1(i1 %arg) {
+; CHECK-LABEL: @cttz_i1(
+; CHECK-NEXT:    [[CNT:%.*]] = call i1 @llvm.cttz.i1(i1 [[ARG:%.*]], i1 false) #2
+; CHECK-NEXT:    ret i1 [[CNT]]
+;
+  %cnt = call i1 @llvm.cttz.i1(i1 %arg, i1 false) nounwind readnone
+  ret i1 %cnt
+}
+
 define i1 @cttz_knownbits(i32 %arg) {
 ; CHECK-LABEL: @cttz_knownbits(
 ; CHECK-NEXT:    ret i1 false
@@ -316,7 +328,7 @@ define <2 x i1> @cttz_knownbits_vec(<2 x i32> %arg) {
 define i1 @cttz_knownbits2(i32 %arg) {
 ; CHECK-LABEL: @cttz_knownbits2(
 ; CHECK-NEXT:    [[OR:%.*]] = or i32 [[ARG:%.*]], 4
-; CHECK-NEXT:    [[CNT:%.*]] = call i32 @llvm.cttz.i32(i32 [[OR]], i1 true)
+; CHECK-NEXT:    [[CNT:%.*]] = call i32 @llvm.cttz.i32(i32 [[OR]], i1 true) #2, !range ![[CTTZ_RANGE:[0-9]+]]
 ; CHECK-NEXT:    [[RES:%.*]] = icmp eq i32 [[CNT]], 2
 ; CHECK-NEXT:    ret i1 [[RES]]
 ;
@@ -339,13 +351,9 @@ define <2 x i1> @cttz_knownbits2_vec(<2 x i32> %arg) {
   ret <2 x i1> %res
 }
 
-; TODO: The icmp is unnecessary given the known bits of the input.
 define i1 @cttz_knownbits3(i32 %arg) {
 ; CHECK-LABEL: @cttz_knownbits3(
-; CHECK-NEXT:    [[OR:%.*]] = or i32 [[ARG:%.*]], 4
-; CHECK-NEXT:    [[CNT:%.*]] = call i32 @llvm.cttz.i32(i32 [[OR]], i1 true) #2
-; CHECK-NEXT:    [[RES:%.*]] = icmp eq i32 [[CNT]], 3
-; CHECK-NEXT:    ret i1 [[RES]]
+; CHECK-NEXT:    ret i1 false
 ;
   %or = or i32 %arg, 4
   %cnt = call i32 @llvm.cttz.i32(i32 %or, i1 true) nounwind readnone
@@ -387,6 +395,16 @@ define <2 x i8> @ctlz_vec(<2 x i8> %a) {
   ret <2 x i8> %count
 }
 
+; Make sure we don't add range metadata to i1 ctlz.
+define i1 @ctlz_i1(i1 %arg) {
+; CHECK-LABEL: @ctlz_i1(
+; CHECK-NEXT:    [[CNT:%.*]] = call i1 @llvm.ctlz.i1(i1 [[ARG:%.*]], i1 false) #2
+; CHECK-NEXT:    ret i1 [[CNT]]
+;
+  %cnt = call i1 @llvm.ctlz.i1(i1 %arg, i1 false) nounwind readnone
+  ret i1 %cnt
+}
+
 define i1 @ctlz_knownbits(i8 %arg) {
 ; CHECK-LABEL: @ctlz_knownbits(
 ; CHECK-NEXT:    ret i1 false
@@ -410,7 +428,7 @@ define <2 x i1> @ctlz_knownbits_vec(<2 x i8> %arg) {
 define i1 @ctlz_knownbits2(i8 %arg) {
 ; CHECK-LABEL: @ctlz_knownbits2(
 ; CHECK-NEXT:    [[OR:%.*]] = or i8 [[ARG:%.*]], 32
-; CHECK-NEXT:    [[CNT:%.*]] = call i8 @llvm.ctlz.i8(i8 [[OR]], i1 true)
+; CHECK-NEXT:    [[CNT:%.*]] = call i8 @llvm.ctlz.i8(i8 [[OR]], i1 true) #2, !range ![[CTLZ_RANGE:[0-9]+]]
 ; CHECK-NEXT:    [[RES:%.*]] = icmp eq i8 [[CNT]], 2
 ; CHECK-NEXT:    ret i1 [[RES]]
 ;
@@ -433,13 +451,9 @@ define <2 x i1> @ctlz_knownbits2_vec(<2 x i8> %arg) {
   ret <2 x i1> %res
 }
 
-; TODO: The icmp is unnecessary given the known bits of the input.
 define i1 @ctlz_knownbits3(i8 %arg) {
 ; CHECK-LABEL: @ctlz_knownbits3(
-; CHECK-NEXT:    [[OR:%.*]] = or i8 [[ARG:%.*]], 32
-; CHECK-NEXT:    [[CNT:%.*]] = call i8 @llvm.ctlz.i8(i8 [[OR]], i1 true) #2
-; CHECK-NEXT:    [[RES:%.*]] = icmp eq i8 [[CNT]], 3
-; CHECK-NEXT:    ret i1 [[RES]]
+; CHECK-NEXT:    ret i1 false
 ;
   %or = or i8 %arg, 32
   %cnt = call i8 @llvm.ctlz.i8(i8 %or, i1 true) nounwind readnone
@@ -790,3 +804,6 @@ define void @nearbyint(double *%P) {
   store volatile double %C, double* %P
   ret void
 }
+
+; CHECK: [[CTTZ_RANGE]] = !{i32 0, i32 3}
+; CHECK: [[CTLZ_RANGE]] = !{i8 0, i8 3}