diff options
| -rw-r--r-- | llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp | 136 | ||||
| -rw-r--r-- | llvm/test/Transforms/IRCE/eq_ne.ll | 8 | ||||
| -rw-r--r-- | llvm/test/Transforms/IRCE/variable-loop-bounds.ll | 174 | 
3 files changed, 254 insertions, 64 deletions
diff --git a/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp index 9d965d8e131..940db29dce4 100644 --- a/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp +++ b/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp @@ -702,27 +702,59 @@ static bool CanBeMax(ScalarEvolution &SE, const SCEV *S, bool Signed) {           SE.getUnsignedRange(S).contains(Max);  } -static bool SumCanReachMax(ScalarEvolution &SE, const SCEV *S1, const SCEV *S2, -                           bool Signed) { -  // S1 < INT_MAX - S2 ===> S1 + S2 < INT_MAX. -  assert(SE.isKnownNonNegative(S2) && -         "We expected the 2nd arg to be non-negative!"); -  const SCEV *Max = SE.getConstant( -      Signed ? APInt::getSignedMaxValue( -                   cast<IntegerType>(S1->getType())->getBitWidth()) -             : APInt::getMaxValue( -                   cast<IntegerType>(S1->getType())->getBitWidth())); -  const SCEV *CapForS1 = SE.getMinusSCEV(Max, S2); -  return !SE.isKnownPredicate(Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, -                              S1, CapForS1); +/// Given a loop with an increasing induction variable, is it possible to +/// safely calculate the bounds of a new loop using the given Predicate. +static bool isSafeIncreasingBound(const SCEV *Start, +                                  const SCEV *BoundSCEV, const SCEV *Step, +                                  ICmpInst::Predicate Pred, +                                  unsigned LatchBrExitIdx, +                                  Loop *L, ScalarEvolution &SE) { +  if (Pred != ICmpInst::ICMP_SLT && Pred != ICmpInst::ICMP_SGT && +      Pred != ICmpInst::ICMP_ULT && Pred != ICmpInst::ICMP_UGT) +    return false; + +  if (!SE.isAvailableAtLoopEntry(BoundSCEV, L)) +    return false; + +  DEBUG(dbgs() << "irce: isSafeIncreasingBound with:\n"); +  DEBUG(dbgs() << "irce: Start: " << *Start); +  DEBUG(dbgs() << "irce: Step: " << *Step); +  DEBUG(dbgs() << "irce: BoundSCEV: " << *BoundSCEV); +  DEBUG(dbgs() << "irce: Pred: " << ICmpInst::getPredicateName(Pred) << "\n"); +  DEBUG(dbgs() << "irce: LatchExitBrIdx: " << LatchBrExitIdx << "\n"); + +  bool IsSigned = ICmpInst::isSigned(Pred); +  // The predicate that we need to check that the induction variable lies +  // within bounds. +  ICmpInst::Predicate BoundPred = +      IsSigned ? CmpInst::ICMP_SLT : CmpInst::ICMP_ULT; + +  if (LatchBrExitIdx == 1) +    return SE.isLoopEntryGuardedByCond(L, BoundPred, Start, BoundSCEV); + +  assert(LatchBrExitIdx == 0 && "LatchBrExitIdx should be 0 or 1"); + +  const SCEV *StepMinusOne = +    SE.getMinusSCEV(Step, SE.getOne(Step->getType())); +  unsigned BitWidth = cast<IntegerType>(BoundSCEV->getType())->getBitWidth(); +  APInt Max = IsSigned ? APInt::getSignedMaxValue(BitWidth) :  +    APInt::getMaxValue(BitWidth); +  const SCEV *Limit = SE.getMinusSCEV(SE.getConstant(Max), StepMinusOne); + +  return (SE.isLoopEntryGuardedByCond(L, BoundPred, Start, +                                      SE.getAddExpr(BoundSCEV, Step)) && +          SE.isLoopEntryGuardedByCond(L, BoundPred, BoundSCEV, Limit));  } -static bool CanBeMin(ScalarEvolution &SE, const SCEV *S, bool Signed) { -  APInt Min = Signed ? -      APInt::getSignedMinValue(cast<IntegerType>(S->getType())->getBitWidth()) : -      APInt::getMinValue(cast<IntegerType>(S->getType())->getBitWidth()); -  return SE.getSignedRange(S).contains(Min) && -         SE.getUnsignedRange(S).contains(Min); +static bool CannotBeMinInLoop(const SCEV *BoundSCEV, Loop *L, +                              ScalarEvolution &SE, bool Signed) { +  unsigned BitWidth = cast<IntegerType>(BoundSCEV->getType())->getBitWidth(); +  APInt Min = Signed ? APInt::getSignedMinValue(BitWidth) :  +    APInt::getMinValue(BitWidth); +  auto Predicate = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; +  return SE.isAvailableAtLoopEntry(BoundSCEV, L) && +         SE.isLoopEntryGuardedByCond(L, Predicate, BoundSCEV, +                                     SE.getConstant(Min));  }  static bool SumCanReachMin(ScalarEvolution &SE, const SCEV *S1, const SCEV *S2, @@ -904,17 +936,24 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE,            Pred = ICmpInst::ICMP_ULT;          else            Pred = ICmpInst::ICMP_SLT; -      else if (Pred == ICmpInst::ICMP_EQ && LatchBrExitIdx == 0 && -               !CanBeMin(SE, RightSCEV, /* IsSignedPredicate */ true)) { +      else if (Pred == ICmpInst::ICMP_EQ && LatchBrExitIdx == 0) {          // while (true) {               while (true) {          //   if (++i == len)     --->     if (++i > len - 1)          //     break;                       break;          //   ...                          ...          // }                            } -        // TODO: Insert ICMP_UGT if both are non-negative? -        Pred = ICmpInst::ICMP_SGT; -        RightSCEV = SE.getMinusSCEV(RightSCEV, SE.getOne(RightSCEV->getType())); -        DecreasedRightValueByOne = true; +        if (IndVarBase->getNoWrapFlags(SCEV::FlagNUW) && +            CannotBeMinInLoop(RightSCEV, &L, SE, /*Signed*/false)) { +          Pred = ICmpInst::ICMP_UGT; +          RightSCEV = SE.getMinusSCEV(RightSCEV, +                                      SE.getOne(RightSCEV->getType())); +          DecreasedRightValueByOne = true; +        } else if (CannotBeMinInLoop(RightSCEV, &L, SE, /*Signed*/true)) { +          Pred = ICmpInst::ICMP_SGT; +          RightSCEV = SE.getMinusSCEV(RightSCEV, +                                      SE.getOne(RightSCEV->getType())); +          DecreasedRightValueByOne = true; +        }        }      } @@ -928,36 +967,18 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE,        return None;      } -    IsSignedPredicate = -        Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SGT; - +    IsSignedPredicate = ICmpInst::isSigned(Pred);      if (!IsSignedPredicate && !AllowUnsignedLatchCondition) {        FailureReason = "unsigned latch conditions are explicitly prohibited";        return None;      } -    // The predicate that we need to check that the induction variable lies -    // within bounds. -    ICmpInst::Predicate BoundPred = -        IsSignedPredicate ? CmpInst::ICMP_SLT : CmpInst::ICMP_ULT; - +    if (!isSafeIncreasingBound(IndVarStart, RightSCEV, Step, Pred, +                               LatchBrExitIdx, &L, SE)) { +      FailureReason = "Unsafe loop bounds"; +      return None; +    }      if (LatchBrExitIdx == 0) { -      const SCEV *StepMinusOne = SE.getMinusSCEV(Step, -                                                 SE.getOne(Step->getType())); -      if (SumCanReachMax(SE, RightSCEV, StepMinusOne, IsSignedPredicate)) { -        // TODO: this restriction is easily removable -- we just have to -        // remember that the icmp was an slt and not an sle. -        FailureReason = "limit may overflow when coercing le to lt"; -        return None; -      } - -      if (!SE.isAvailableAtLoopEntry(RightSCEV, &L) || -          !SE.isLoopEntryGuardedByCond(&L, BoundPred, IndVarStart, -                                       SE.getAddExpr(RightSCEV, Step))) { -        FailureReason = "Induction variable start not bounded by upper limit"; -        return None; -      } -        // We need to increase the right value unless we have already decreased        // it virtually when we replaced EQ with SGT.        if (!DecreasedRightValueByOne) { @@ -965,11 +986,6 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE,          RightValue = B.CreateAdd(RightValue, One);        }      } else { -      if (!SE.isAvailableAtLoopEntry(RightSCEV, &L) || -          !SE.isLoopEntryGuardedByCond(&L, BoundPred, IndVarStart, RightSCEV)) { -        FailureReason = "Induction variable start not bounded by upper limit"; -        return None; -      }        assert(!DecreasedRightValueByOne &&               "Right value can be decreased only for LatchBrExitIdx == 0!");      } @@ -1479,13 +1495,15 @@ bool LoopConstrainer::run() {      if (Increasing)        ExitPreLoopAtSCEV = *SR.LowLimit;      else { -      if (CanBeMin(SE, *SR.HighLimit, IsSignedPredicate)) { +      if (CannotBeMinInLoop(*SR.HighLimit, &OriginalLoop, SE, +                            IsSignedPredicate)) +        ExitPreLoopAtSCEV = SE.getAddExpr(*SR.HighLimit, MinusOneS); +      else {          DEBUG(dbgs() << "irce: could not prove no-overflow when computing "                       << "preloop exit limit.  HighLimit = " << *(*SR.HighLimit)                       << "\n");          return false;        } -      ExitPreLoopAtSCEV = SE.getAddExpr(*SR.HighLimit, MinusOneS);      }      if (!isSafeToExpandAt(ExitPreLoopAtSCEV, InsertPt, SE)) { @@ -1505,13 +1523,15 @@ bool LoopConstrainer::run() {      if (Increasing)        ExitMainLoopAtSCEV = *SR.HighLimit;      else { -      if (CanBeMin(SE, *SR.LowLimit, IsSignedPredicate)) { +      if (CannotBeMinInLoop(*SR.LowLimit, &OriginalLoop, SE, +                            IsSignedPredicate)) +        ExitMainLoopAtSCEV = SE.getAddExpr(*SR.LowLimit, MinusOneS); +      else {          DEBUG(dbgs() << "irce: could not prove no-overflow when computing "                       << "mainloop exit limit.  LowLimit = " << *(*SR.LowLimit)                       << "\n");          return false;        } -      ExitMainLoopAtSCEV = SE.getAddExpr(*SR.LowLimit, MinusOneS);      }      if (!isSafeToExpandAt(ExitMainLoopAtSCEV, InsertPt, SE)) { diff --git a/llvm/test/Transforms/IRCE/eq_ne.ll b/llvm/test/Transforms/IRCE/eq_ne.ll index 4961a2bbff2..290c1cb823e 100644 --- a/llvm/test/Transforms/IRCE/eq_ne.ll +++ b/llvm/test/Transforms/IRCE/eq_ne.ll @@ -5,7 +5,7 @@  ; CHECK: irce: in function test_01u: constrained Loop at depth 1 containing: %loop<header><exiting>,%in.bounds<latch><exiting>  ; CHECK-NOT: irce: in function test_02: constrained Loop at depth 1 containing: %loop<header><exiting>,%in.bounds<latch><exiting>  ; CHECK: irce: in function test_03: constrained Loop at depth 1 containing: %loop<header><exiting>,%in.bounds<latch><exiting> -; CHECK-NOT: irce: in function test_04: constrained Loop at depth 1 containing: %loop<header><exiting>,%in.bounds<latch><exiting> +; CHECK: irce: in function test_04: constrained Loop at depth 1 containing: %loop<header><exiting>,%in.bounds<latch><exiting>  ; CHECK: irce: in function test_05: constrained Loop at depth 1 containing: %loop<header><exiting>,%in.bounds<latch><exiting>  ; CHECK-NOT: irce: in function test_06: constrained Loop at depth 1 containing: %loop<header><exiting>,%in.bounds<latch><exiting>  ; CHECK: irce: in function test_07: constrained Loop at depth 1 containing: %loop<header><exiting>,%in.bounds<latch><exiting> @@ -112,7 +112,7 @@ define void @test_03(i32* %arr, i32* %a_len_ptr) #0 {  ; CHECK: test_03(  ; CHECK:        main.exit.selector:  ; CHECK-NEXT:     [[PSEUDO_PHI:%[^ ]+]] = phi i32 [ %idx.next, %in.bounds ] -; CHECK-NEXT:     [[COND:%[^ ]+]] = icmp slt i32 [[PSEUDO_PHI]], 100 +; CHECK-NEXT:     [[COND:%[^ ]+]] = icmp ult i32 [[PSEUDO_PHI]], 100  ; CHECK-NEXT:     br i1 [[COND]]  entry: @@ -138,12 +138,8 @@ exit:    ret void  } -; Show that if n is not known to be greater than the starting value, IRCE -; doesn't apply.  define void @test_04(i32* %arr, i32* %a_len_ptr) #0 { -; CHECK: test_04( -  entry:    %len = load i32, i32* %a_len_ptr, !range !0    br label %loop diff --git a/llvm/test/Transforms/IRCE/variable-loop-bounds.ll b/llvm/test/Transforms/IRCE/variable-loop-bounds.ll new file mode 100644 index 00000000000..93b90039016 --- /dev/null +++ b/llvm/test/Transforms/IRCE/variable-loop-bounds.ll @@ -0,0 +1,174 @@ +; RUN: opt -irce -S -verify-loop-info -irce-print-changed-loops -irce-skip-profitability-checks < %s 2>&1 | FileCheck %s + +; CHECK: irce: in function test_inc_eq: constrained Loop at depth 1 containing: %for.body<header>,%if.else,%if.then,%for.inc<latch><exiting> +; CHECK: irce: in function test_inc_ne: constrained Loop at depth 1 containing: %for.body<header>,%if.else,%if.then,%for.inc<latch><exiting> +; CHECK: irce: in function test_inc_slt: constrained Loop at depth 1 containing: %for.body<header>,%if.else,%if.then,%for.inc<latch><exiting> +; CHECK: irce: in function test_inc_ult: constrained Loop at depth 1 containing: %for.body<header>,%if.else,%if.then,%for.inc<latch><exiting> + +; CHECK-LABEL: test_inc_eq( +; CHECK: main.exit.selector: +; CHECK: [[PSEUDO_PHI:%[^ ]+]] = phi i32 [ %inc, %for.inc ] +; CHECK: [[COND:%[^ ]+]] = icmp ult i32 [[PSEUDO_PHI]], %N +; CHECK: br i1 [[COND]], label %main.pseudo.exit, label %for.cond.cleanup.loopexit +define void @test_inc_eq(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) { +entry: +  %cmp16 = icmp sgt i32 %N, 0 +  br i1 %cmp16, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: +  ret void + +for.body: +  %i.017 = phi i32 [ %inc, %for.inc ], [ 0, %entry ] +  %cmp1 = icmp ult i32 %i.017, 512 +  %arrayidx = getelementptr inbounds i32, i32* %b, i32 %i.017 +  %0 = load i32, i32* %arrayidx, align 4 +  %arrayidx2 = getelementptr inbounds i32, i32* %c, i32 %i.017 +  %1 = load i32, i32* %arrayidx2, align 4 +  br i1 %cmp1, label %if.then, label %if.else + +if.then: +  %sub = sub i32 %0, %1 +  %arrayidx3 = getelementptr inbounds i32, i32* %a, i32 %i.017 +  %2 = load i32, i32* %arrayidx3, align 4 +  %add = add nsw i32 %sub, %2 +  store i32 %add, i32* %arrayidx3, align 4 +  br label %for.inc + +if.else: +  %add6 = add nsw i32 %1, %0 +  %arrayidx7 = getelementptr inbounds i32, i32* %a, i32 %i.017 +  store i32 %add6, i32* %arrayidx7, align 4 +  br label %for.inc + +for.inc: +  %inc = add nuw nsw i32 %i.017, 1 +  %exitcond = icmp eq i32 %inc, %N +  br i1 %exitcond, label %for.cond.cleanup, label %for.body +} + +; CHECK-LABEL: test_inc_ne +; CHECK: main.exit.selector: +; CHECK: [[PSEUDO_PHI:%[^ ]+]] = phi i32 [ %inc, %for.inc ] +; CHECK: [[COND:%[^ ]+]] = icmp slt i32 [[PSEUDO_PHI]], %N +; CHECK: br i1 [[COND]], label %main.pseudo.exit, label %for.cond.cleanup.loopexit +define void @test_inc_ne(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) { +entry: +  %cmp16 = icmp sgt i32 %N, 0 +  br i1 %cmp16, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: +  ret void + +for.body: +  %i.017 = phi i32 [ %inc, %for.inc ], [ 0, %entry ] +  %cmp1 = icmp ult i32 %i.017, 512 +  %arrayidx = getelementptr inbounds i32, i32* %b, i32 %i.017 +  %0 = load i32, i32* %arrayidx, align 4 +  %arrayidx2 = getelementptr inbounds i32, i32* %c, i32 %i.017 +  %1 = load i32, i32* %arrayidx2, align 4 +  br i1 %cmp1, label %if.then, label %if.else + +if.then: +  %sub = sub i32 %0, %1 +  %arrayidx3 = getelementptr inbounds i32, i32* %a, i32 %i.017 +  %2 = load i32, i32* %arrayidx3, align 4 +  %add = add nsw i32 %sub, %2 +  store i32 %add, i32* %arrayidx3, align 4 +  br label %for.inc + +if.else: +  %add6 = add nsw i32 %1, %0 +  %arrayidx7 = getelementptr inbounds i32, i32* %a, i32 %i.017 +  store i32 %add6, i32* %arrayidx7, align 4 +  br label %for.inc + +for.inc: +  %inc = add nuw nsw i32 %i.017, 1 +  %exitcond = icmp ne i32 %inc, %N +  br i1 %exitcond, label %for.body, label %for.cond.cleanup +} + +; CHECK-LABEL: test_inc_slt( +; CHECK: main.exit.selector: +; CHECK: [[PSEUDO_PHI:%[^ ]+]] = phi i32 [ %inc, %for.inc ] +; CHECK: [[COND:%[^ ]+]] = icmp slt i32 [[PSEUDO_PHI]], %N +; CHECK: br i1 [[COND]], label %main.pseudo.exit, label %for.cond.cleanup.loopexit +define void @test_inc_slt(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) { +entry: +  %cmp16 = icmp sgt i32 %N, 0 +  br i1 %cmp16, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: +  ret void + +for.body: +  %i.017 = phi i32 [ %inc, %for.inc ], [ 0, %entry ] +  %cmp1 = icmp ult i32 %i.017, 512 +  %arrayidx = getelementptr inbounds i32, i32* %b, i32 %i.017 +  %0 = load i32, i32* %arrayidx, align 4 +  %arrayidx2 = getelementptr inbounds i32, i32* %c, i32 %i.017 +  %1 = load i32, i32* %arrayidx2, align 4 +  br i1 %cmp1, label %if.then, label %if.else + +if.then: +  %sub = sub i32 %0, %1 +  %arrayidx3 = getelementptr inbounds i32, i32* %a, i32 %i.017 +  %2 = load i32, i32* %arrayidx3, align 4 +  %add = add nsw i32 %sub, %2 +  store i32 %add, i32* %arrayidx3, align 4 +  br label %for.inc + +if.else: +  %add6 = add nsw i32 %1, %0 +  %arrayidx7 = getelementptr inbounds i32, i32* %a, i32 %i.017 +  store i32 %add6, i32* %arrayidx7, align 4 +  br label %for.inc + +for.inc: +  %inc = add nuw nsw i32 %i.017, 1 +  %exitcond = icmp slt i32 %inc, %N +  br i1 %exitcond, label %for.body, label %for.cond.cleanup +} + +; CHECK-LABEL: test_inc_ult +; CHECK: main.exit.selector: +; CHECK: [[PSEUDO_PHI:%[^ ]+]] = phi i32 [ %inc, %for.inc ] +; CHECK: [[COND:%[^ ]+]] = icmp ult i32 [[PSEUDO_PHI]], %N +; CHECK: br i1 [[COND]], label %main.pseudo.exit, label %for.cond.cleanup.loopexit +define void @test_inc_ult(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) { +entry: +  %cmp16 = icmp ugt i32 %N, 0 +  br i1 %cmp16, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: +  ret void + +for.body: +  %i.017 = phi i32 [ %inc, %for.inc ], [ 0, %entry ] +  %cmp1 = icmp ult i32 %i.017, 512 +  %arrayidx = getelementptr inbounds i32, i32* %b, i32 %i.017 +  %0 = load i32, i32* %arrayidx, align 4 +  %arrayidx2 = getelementptr inbounds i32, i32* %c, i32 %i.017 +  %1 = load i32, i32* %arrayidx2, align 4 +  br i1 %cmp1, label %if.then, label %if.else + +if.then: +  %sub = sub i32 %0, %1 +  %arrayidx3 = getelementptr inbounds i32, i32* %a, i32 %i.017 +  %2 = load i32, i32* %arrayidx3, align 4 +  %add = add nsw i32 %sub, %2 +  store i32 %add, i32* %arrayidx3, align 4 +  br label %for.inc + +if.else: +  %add6 = add nsw i32 %1, %0 +  %arrayidx7 = getelementptr inbounds i32, i32* %a, i32 %i.017 +  store i32 %add6, i32* %arrayidx7, align 4 +  br label %for.inc + +for.inc: +  %inc = add nuw nsw i32 %i.017, 1 +  %exitcond = icmp ult i32 %inc, %N +  br i1 %exitcond, label %for.body, label %for.cond.cleanup +}  | 

