2 files changed, 119 insertions, 49 deletions
diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index c6b0f7b8d71..f2f9bae0c8a 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -16,6 +16,7 @@
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
 #include "llvm/IR/Dominators.h"
@@ -653,45 +654,77 @@ Value *RecurrenceDescriptor::createMinMaxOp(IRBuilder<> &Builder,
 }
 
 InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K,
-                                         ConstantInt *Step)
-  : StartValue(Start), IK(K), StepValue(Step) {
+                                         const SCEV *Step)
+  : StartValue(Start), IK(K), Step(Step) {
   assert(IK != IK_NoInduction && "Not an induction");
+
+  // Start value type should match the induction kind and the value
+  // itself should not be null.
   assert(StartValue && "StartValue is null");
-  assert(StepValue && !StepValue->isZero() && "StepValue is zero");
   assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) &&
          "StartValue is not a pointer for pointer induction");
   assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) &&
          "StartValue is not an integer for integer induction");
-  assert(StepValue->getType()->isIntegerTy() &&
-         "StepValue is not an integer");
+
+  // Check the Step Value. It should be non-zero integer value.
+  assert((!getConstIntStepValue() || !getConstIntStepValue()->isZero()) &&
+         "Step value is zero");
+
+  assert((IK != IK_PtrInduction || getConstIntStepValue()) &&
+         "Step value should be constant for pointer induction");
+  assert(Step->getType()->isIntegerTy() && "StepValue is not an integer");
 }
 
 int InductionDescriptor::getConsecutiveDirection() const {
-  if (StepValue && (StepValue->isOne() || StepValue->isMinusOne()))
-    return StepValue->getSExtValue();
+  ConstantInt *ConstStep = getConstIntStepValue();
+  if (ConstStep && (ConstStep->isOne() || ConstStep->isMinusOne()))
+    return ConstStep->getSExtValue();
   return 0;
 }
 
-Value *InductionDescriptor::transform(IRBuilder<> &B, Value *Index) const {
+ConstantInt *InductionDescriptor::getConstIntStepValue() const {
+  if (isa<SCEVConstant>(Step))
+    return dyn_cast<ConstantInt>(cast<SCEVConstant>(Step)->getValue());
+  return nullptr;
+}
+
+Value *InductionDescriptor::transform(IRBuilder<> &B, Value *Index,
+                                      ScalarEvolution *SE,
+                                      const DataLayout& DL) const {
+
+  SCEVExpander Exp(*SE, DL, "induction");
   switch (IK) {
-  case IK_IntInduction:
+  case IK_IntInduction: {
     assert(Index->getType() == StartValue->getType() &&
            "Index type does not match StartValue type");
-    if (StepValue->isMinusOne())
-      return B.CreateSub(StartValue, Index);
-    if (!StepValue->isOne())
-      Index = B.CreateMul(Index, StepValue);
-    return B.CreateAdd(StartValue, Index);
 
-  case IK_PtrInduction:
-    assert(Index->getType() == StepValue->getType() &&
+    // FIXME: Theoretically, we can call getAddExpr() of ScalarEvolution
+    // and calculate (Start + Index * Step) for all cases, without
+    // special handling for "isOne" and "isMinusOne".
+    // But in the real life the result code getting worse. We mix SCEV
+    // expressions and ADD/SUB operations and receive redundant
+    // intermediate values being calculated in different ways and
+    // Instcombine is unable to reduce them all.
+
+    if (getConstIntStepValue() &&
+        getConstIntStepValue()->isMinusOne())
+      return B.CreateSub(StartValue, Index);
+    if (getConstIntStepValue() &&
+        getConstIntStepValue()->isOne())
+      return B.CreateAdd(StartValue, Index);
+    const SCEV *S = SE->getAddExpr(SE->getSCEV(StartValue),
+                                   SE->getMulExpr(Step, SE->getSCEV(Index)));
+    return Exp.expandCodeFor(S, StartValue->getType(), &*B.GetInsertPoint());
+  }
+  case IK_PtrInduction: {
+    assert(Index->getType() == Step->getType() &&
            "Index type does not match StepValue type");
-    if (StepValue->isMinusOne())
-      Index = B.CreateNeg(Index);
-    else if (!StepValue->isOne())
-      Index = B.CreateMul(Index, StepValue);
+    assert(isa<SCEVConstant>(Step) &&
+           "Expected constant step for pointer induction");
+    const SCEV *S = SE->getMulExpr(SE->getSCEV(Index), Step);
+    Index = Exp.expandCodeFor(S, Index->getType(), &*B.GetInsertPoint());
     return B.CreateGEP(nullptr, StartValue, Index);
-
+  }
   case IK_NoInduction:
     return nullptr;
   }
@@ -746,17 +779,22 @@ bool InductionDescriptor::isInductionPHI(PHINode *Phi,
     Phi->getIncomingValueForBlock(AR->getLoop()->getLoopPreheader());
   const SCEV *Step = AR->getStepRecurrence(*SE);
   // Calculate the pointer stride and check if it is consecutive.
-  const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
-  if (!C)
+  // The stride may be a constant or a loop invariant integer value.
+  const SCEVConstant *ConstStep = dyn_cast<SCEVConstant>(Step);
+  if (!ConstStep && !SE->isLoopInvariant(Step, AR->getLoop()))
     return false;
 
-  ConstantInt *CV = C->getValue();
   if (PhiTy->isIntegerTy()) {
-    D = InductionDescriptor(StartValue, IK_IntInduction, CV);
+    D = InductionDescriptor(StartValue, IK_IntInduction, Step);
     return true;
   }
 
   assert(PhiTy->isPointerTy() && "The PHI must be a pointer");
+  // Pointer induction should be a constant.
+  if (!ConstStep)
+    return false;
+
+  ConstantInt *CV = ConstStep->getValue();
   Type *PointerElementType = PhiTy->getPointerElementType();
   // The pointer stride cannot be determined if the pointer element type is not
   // sized.
@@ -771,8 +809,8 @@ bool InductionDescriptor::isInductionPHI(PHINode *Phi,
   int64_t CVSize = CV->getSExtValue();
   if (CVSize % Size)
     return false;
-  auto *StepValue = ConstantInt::getSigned(CV->getType(), CVSize / Size);
-
+  auto *StepValue = SE->getConstant(CV->getType(), CVSize / Size,
+                                    true /* signed */);
   D = InductionDescriptor(StartValue, IK_PtrInduction, StepValue);
   return true;
 }
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 8284856c2cf..83b0233bc2d 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -416,6 +416,12 @@ protected:
   /// to each vector element of Val. The sequence starts at StartIndex.
   virtual Value *getStepVector(Value *Val, int StartIdx, Value *Step);
 
+  /// This function adds (StartIdx, StartIdx + Step, StartIdx + 2*Step, ...)
+  /// to each vector element of Val. The sequence starts at StartIndex.
+  /// Step is a SCEV. In order to get StepValue it takes the existing value
+  /// from SCEV or creates a new using SCEVExpander.
+  virtual Value *getStepVector(Value *Val, int StartIdx, const SCEV *Step);
+
   /// When we go over instructions in the basic block we rely on previous
   /// values within the current basic block or on loop invariant values.
   /// When we widen (vectorize) values we place them in the map. If the values
@@ -600,6 +606,7 @@ private:
   void vectorizeMemoryInstruction(Instruction *Instr) override;
   Value *getBroadcastInstrs(Value *V) override;
   Value *getStepVector(Value *Val, int StartIdx, Value *Step) override;
+  Value *getStepVector(Value *Val, int StartIdx, const SCEV *StepSCEV) override;
   Value *reverseVector(Value *Vec) override;
 };
 
@@ -2076,6 +2083,15 @@ Value *InnerLoopVectorizer::getBroadcastInstrs(Value *V) {
 }
 
 Value *InnerLoopVectorizer::getStepVector(Value *Val, int StartIdx,
+                                          const SCEV *StepSCEV) {
+  const DataLayout &DL = OrigLoop->getHeader()->getModule()->getDataLayout();
+  SCEVExpander Exp(*PSE.getSE(), DL, "induction");
+  Value *StepValue = Exp.expandCodeFor(StepSCEV, StepSCEV->getType(),
+                                       &*Builder.GetInsertPoint());
+  return getStepVector(Val, StartIdx, StepValue);
+}
+
+Value *InnerLoopVectorizer::getStepVector(Value *Val, int StartIdx,
                                           Value *Step) {
   assert(Val->getType()->isVectorTy() && "Must be a vector");
   assert(Val->getType()->getScalarType()->isIntegerTy() &&
@@ -3141,9 +3157,10 @@ void InnerLoopVectorizer::createEmptyLoop() {
       EndValue = CountRoundDown;
     } else {
       IRBuilder<> B(LoopBypassBlocks.back()->getTerminator());
-      Value *CRD = B.CreateSExtOrTrunc(
-          CountRoundDown, II.getStepValue()->getType(), "cast.crd");
-      EndValue = II.transform(B, CRD);
+      Value *CRD = B.CreateSExtOrTrunc(CountRoundDown,
+                                       II.getStep()->getType(), "cast.crd");
+      const DataLayout &DL = OrigLoop->getHeader()->getModule()->getDataLayout();
+      EndValue = II.transform(B, CRD, PSE.getSE(), DL);
       EndValue->setName("ind.end");
     }
 
@@ -4035,6 +4052,7 @@ void InnerLoopVectorizer::widenPHIInstruction(
   assert(Legal->getInductionVars()->count(P) && "Not an induction variable");
 
   InductionDescriptor II = Legal->getInductionVars()->lookup(P);
+  const DataLayout &DL = OrigLoop->getHeader()->getModule()->getDataLayout();
 
   // FIXME: The newly created binary instructions should contain nsw/nuw flags,
   // which can be found from the original scalar operations.
@@ -4048,14 +4066,14 @@ void InnerLoopVectorizer::widenPHIInstruction(
     Value *V = Induction;
     if (P != OldInduction) {
       V = Builder.CreateSExtOrTrunc(Induction, P->getType());
-      V = II.transform(Builder, V);
+      V = II.transform(Builder, V, PSE.getSE(), DL);
       V->setName("offset.idx");
     }
     Value *Broadcasted = getBroadcastInstrs(V);
     // After broadcasting the induction variable we need to make the vector
     // consecutive by adding 0, 1, 2, etc.
     for (unsigned part = 0; part < UF; ++part)
-      Entry[part] = getStepVector(Broadcasted, VF * part, II.getStepValue());
+      Entry[part] = getStepVector(Broadcasted, VF * part, II.getStep());
     return;
   }
   case InductionDescriptor::IK_PtrInduction:
@@ -4063,7 +4081,7 @@ void InnerLoopVectorizer::widenPHIInstruction(
     assert(P->getType()->isPointerTy() && "Unexpected type.");
     // This is the normalized GEP that starts counting at zero.
     Value *PtrInd = Induction;
-    PtrInd = Builder.CreateSExtOrTrunc(PtrInd, II.getStepValue()->getType());
+    PtrInd = Builder.CreateSExtOrTrunc(PtrInd, II.getStep()->getType());
     // This is the vector of results. Notice that we don't generate
     // vector geps because scalar geps result in better code.
     for (unsigned part = 0; part < UF; ++part) {
@@ -4071,7 +4089,7 @@ void InnerLoopVectorizer::widenPHIInstruction(
         int EltIndex = part;
         Constant *Idx = ConstantInt::get(PtrInd->getType(), EltIndex);
         Value *GlobalIdx = Builder.CreateAdd(PtrInd, Idx);
-        Value *SclrGep = II.transform(Builder, GlobalIdx);
+        Value *SclrGep = II.transform(Builder, GlobalIdx, PSE.getSE(), DL);
         SclrGep->setName("next.gep");
         Entry[part] = SclrGep;
         continue;
@@ -4082,7 +4100,7 @@ void InnerLoopVectorizer::widenPHIInstruction(
         int EltIndex = i + part * VF;
         Constant *Idx = ConstantInt::get(PtrInd->getType(), EltIndex);
         Value *GlobalIdx = Builder.CreateAdd(PtrInd, Idx);
-        Value *SclrGep = II.transform(Builder, GlobalIdx);
+        Value *SclrGep = II.transform(Builder, GlobalIdx, PSE.getSE(), DL);
         SclrGep->setName("next.gep");
         VecVal = Builder.CreateInsertElement(VecVal, SclrGep,
                                              Builder.getInt32(i), "insert.gep");
@@ -4218,23 +4236,26 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
     case Instruction::BitCast: {
       CastInst *CI = dyn_cast<CastInst>(it);
       setDebugLocFromInst(Builder, &*it);
-      /// Optimize the special case where the source is the induction
-      /// variable. Notice that we can only optimize the 'trunc' case
+      /// Optimize the special case where the source is a constant integer
+      /// induction variable. Notice that we can only optimize the 'trunc' case
       /// because: a. FP conversions lose precision, b. sext/zext may wrap,
       /// c. other casts depend on pointer size.
+
       if (CI->getOperand(0) == OldInduction &&
           it->getOpcode() == Instruction::Trunc) {
-        Value *ScalarCast =
-            Builder.CreateCast(CI->getOpcode(), Induction, CI->getType());
-        Value *Broadcasted = getBroadcastInstrs(ScalarCast);
         InductionDescriptor II =
-            Legal->getInductionVars()->lookup(OldInduction);
-        Constant *Step = ConstantInt::getSigned(
-            CI->getType(), II.getStepValue()->getSExtValue());
-        for (unsigned Part = 0; Part < UF; ++Part)
-          Entry[Part] = getStepVector(Broadcasted, VF * Part, Step);
-        addMetadata(Entry, &*it);
-        break;
+          Legal->getInductionVars()->lookup(OldInduction);
+        if (auto StepValue = II.getConstIntStepValue()) {
+          StepValue = ConstantInt::getSigned(cast<IntegerType>(CI->getType()),
+                                             StepValue->getSExtValue());
+          Value *ScalarCast = Builder.CreateCast(CI->getOpcode(), Induction,
+                                                 CI->getType());
+          Value *Broadcasted = getBroadcastInstrs(ScalarCast);
+          for (unsigned Part = 0; Part < UF; ++Part)
+            Entry[Part] = getStepVector(Broadcasted, VF * Part, StepValue);
+          addMetadata(Entry, &*it);
+          break;
+        }
       }
       /// Vectorize casts.
       Type *DestTy =
@@ -4596,9 +4617,11 @@ bool LoopVectorizationLegality::addInductionPhi(PHINode *Phi,
 
   // Int inductions are special because we only allow one IV.
   if (ID.getKind() == InductionDescriptor::IK_IntInduction &&
-      ID.getStepValue()->isOne() &&
+      ID.getConstIntStepValue() &&
+      ID.getConstIntStepValue()->isOne() &&
       isa<Constant>(ID.getStartValue()) &&
-        cast<Constant>(ID.getStartValue())->isNullValue()) {
+      cast<Constant>(ID.getStartValue())->isNullValue()) {
+
     // Use the phi node with the widest type as induction. Use the last
     // one if there are multiple (no good reason for doing this other
     // than it is expedient). We've checked that it begins at zero and
@@ -6235,6 +6258,15 @@ Value *InnerLoopUnroller::reverseVector(Value *Vec) { return Vec; }
 
 Value *InnerLoopUnroller::getBroadcastInstrs(Value *V) { return V; }
 
+Value *InnerLoopUnroller::getStepVector(Value *Val, int StartIdx,
+                                        const SCEV *StepSCEV) {
+  const DataLayout &DL = OrigLoop->getHeader()->getModule()->getDataLayout();
+  SCEVExpander Exp(*PSE.getSE(), DL, "induction");
+  Value *StepValue = Exp.expandCodeFor(StepSCEV, StepSCEV->getType(),
+                                       &*Builder.GetInsertPoint());
+  return getStepVector(Val, StartIdx, StepValue);
+}
+
 Value *InnerLoopUnroller::getStepVector(Value *Val, int StartIdx, Value *Step) {
   // When unrolling and the VF is 1, we only need to add a simple scalar.
   Type *ITy = Val->getType();