summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/include/llvm/Transforms/Utils/LoopUtils.h7
-rw-r--r--llvm/lib/Transforms/Utils/LoopUtils.cpp37
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp203
-rw-r--r--llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll209
4 files changed, 450 insertions, 6 deletions
diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
index 4cb01851203..222fe5e89d1 100644
--- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
@@ -175,6 +175,13 @@ public:
static bool isReductionPHI(PHINode *Phi, Loop *TheLoop,
RecurrenceDescriptor &RedDes);
+ /// Returns true if Phi is a first-order recurrence. A first-order recurrence
+ /// is a non-reduction recurrence relation in which the value of the
+ /// recurrence in the current loop iteration equals a value defined in the
+ /// previous iteration.
+ static bool isFirstOrderRecurrence(PHINode *Phi, Loop *TheLoop,
+ DominatorTree *DT);
+
RecurrenceKind getRecurrenceKind() { return Kind; }
MinMaxRecurrenceKind getMinMaxRecurrenceKind() { return MinMaxKind; }
diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index 240fff0d822..42ba0124db1 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -520,6 +520,43 @@ bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
return false;
}
+bool RecurrenceDescriptor::isFirstOrderRecurrence(PHINode *Phi, Loop *TheLoop,
+ DominatorTree *DT) {
+
+ // Ensure the phi node is in the loop header and has two incoming values.
+ if (Phi->getParent() != TheLoop->getHeader() ||
+ Phi->getNumIncomingValues() != 2)
+ return false;
+
+ // Ensure the loop has a preheader and a single latch block. The loop
+ // vectorizer will need the latch to set up the next iteration of the loop.
+ auto *Preheader = TheLoop->getLoopPreheader();
+ auto *Latch = TheLoop->getLoopLatch();
+ if (!Preheader || !Latch)
+ return false;
+
+ // Ensure the phi node's incoming blocks are the loop preheader and latch.
+ if (Phi->getBasicBlockIndex(Preheader) < 0 ||
+ Phi->getBasicBlockIndex(Latch) < 0)
+ return false;
+
+ // Get the previous value. The previous value comes from the latch edge while
+ // the initial value comes form the preheader edge.
+ auto *Previous = dyn_cast<Instruction>(Phi->getIncomingValueForBlock(Latch));
+ if (!Previous)
+ return false;
+
+ // Ensure every user of the phi node is dominated by the previous value. The
+ // dominance requirement ensures the loop vectorizer will not need to
+ // vectorize the initial value prior to the first iteration of the loop.
+ for (User *U : Phi->users())
+ if (auto *I = dyn_cast<Instruction>(U))
+ if (!DT->dominates(Previous, I))
+ return false;
+
+ return true;
+}
+
/// This function returns the identity element (or neutral element) for
/// the operation K.
Constant *RecurrenceDescriptor::getRecurrenceIdentity(RecurrenceKind K,
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index e8af7051a32..997b52fb3dc 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -364,6 +364,10 @@ protected:
/// Copy and widen the instructions from the old loop.
virtual void vectorizeLoop();
+ /// Fix a first-order recurrence. This is the second phase of vectorizing
+ /// this phi node.
+ void fixFirstOrderRecurrence(PHINode *Phi);
+
/// \brief The Loop exit block may have single value PHI nodes where the
/// incoming value is 'Undef'. While vectorizing we only handled real values
/// that were defined inside the loop. Here we fix the 'undef case'.
@@ -1201,6 +1205,10 @@ public:
/// induction descriptor.
typedef MapVector<PHINode*, InductionDescriptor> InductionList;
+ /// RecurrenceSet contains the phi nodes that are recurrences other than
+ /// inductions and reductions.
+ typedef SmallPtrSet<const PHINode *, 8> RecurrenceSet;
+
/// Returns true if it is legal to vectorize this loop.
/// This does not mean that it is profitable to vectorize this
/// loop, only that it is legal to do so.
@@ -1215,6 +1223,9 @@ public:
/// Returns the induction variables found in the loop.
InductionList *getInductionVars() { return &Inductions; }
+ /// Return the first-order recurrences found in the loop.
+ RecurrenceSet *getFirstOrderRecurrences() { return &FirstOrderRecurrences; }
+
/// Returns the widest induction type.
Type *getWidestInductionType() { return WidestIndTy; }
@@ -1224,6 +1235,9 @@ public:
/// Returns True if PN is a reduction variable in this loop.
bool isReductionVariable(PHINode *PN) { return Reductions.count(PN); }
+ /// Returns True if Phi is a first-order recurrence in this loop.
+ bool isFirstOrderRecurrence(const PHINode *Phi);
+
/// Return true if the block BB needs to be predicated in order for the loop
/// to be vectorized.
bool blockNeedsPredication(BasicBlock *BB);
@@ -1384,6 +1398,8 @@ private:
/// Notice that inductions don't need to start at zero and that induction
/// variables can be pointers.
InductionList Inductions;
+ /// Holds the phi nodes that are first-order recurrences.
+ RecurrenceSet FirstOrderRecurrences;
/// Holds the widest induction type encountered.
Type *WidestIndTy;
@@ -3386,8 +3402,14 @@ void InnerLoopVectorizer::vectorizeLoop() {
for (PHINode *Phi : PHIsToFix) {
assert(Phi && "Unable to recover vectorized PHI");
- // We currently only handle reductions. Ensure the PHI node to be fixed is
- // a reduction, and get its reduction variable descriptor.
+ // Handle first-order recurrences that need to be fixed.
+ if (Legal->isFirstOrderRecurrence(Phi)) {
+ fixFirstOrderRecurrence(Phi);
+ continue;
+ }
+
+ // If the phi node is not a first-order recurrence, it must be a reduction.
+ // Get it's reduction variable descriptor.
assert(Legal->isReductionVariable(Phi) &&
"Unable to find the reduction variable");
RecurrenceDescriptor RdxDesc = (*Legal->getReductionVars())[Phi];
@@ -3613,6 +3635,158 @@ void InnerLoopVectorizer::vectorizeLoop() {
cse(LoopVectorBody);
}
+void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) {
+
+ // This is the second phase of vectorizing first-order rececurrences. An
+ // overview of the transformation is described below. Suppose we have the
+ // following loop.
+ //
+ // for (int i = 0; i < n; ++i)
+ // b[i] = a[i] - a[i - 1];
+ //
+ // There is a first-order recurrence on "a". For this loop, the shorthand
+ // scalar IR looks like:
+ //
+ // scalar.ph:
+ // s_init = a[-1]
+ // br scalar.body
+ //
+ // scalar.body:
+ // i = phi [0, scalar.ph], [i+1, scalar.body]
+ // s1 = phi [s_init, scalar.ph], [s2, scalar.body]
+ // s2 = a[i]
+ // b[i] = s2 - s1
+ // br cond, scalar.body, ...
+ //
+ // In this example, s1 is a recurrence because it's value depends on the
+ // previous iteration. In the first phase of vectorization, we created a
+ // temporary value for s1. We now complete the vectorization and produce the
+ // shorthand vector IR shown below (for VF = 4, UF = 1).
+ //
+ // vector.ph:
+ // v_init = vector(..., ..., ..., a[-1])
+ // br vector.body
+ //
+ // vector.body
+ // i = phi [0, vector.ph], [i+4, vector.body]
+ // v1 = phi [v_init, vector.ph], [v2, vector.body]
+ // v2 = a[i, i+1, i+2, i+3];
+ // v3 = vector(v1(3), v2(0, 1, 2))
+ // b[i, i+1, i+2, i+3] = v2 - v3
+ // br cond, vector.body, middle.block
+ //
+ // middle.block:
+ // x = v2(3)
+ // br scalar.ph
+ //
+ // scalar.ph:
+ // s_init = phi [x, middle.block], [a[-1], otherwise]
+ // br scalar.body
+ //
+ // After execution completes the vector loop, we extract the next value of
+ // the recurrence (x) to use as the initial value in the scalar loop.
+
+ // Get the original loop preheader and single loop latch.
+ auto *Preheader = OrigLoop->getLoopPreheader();
+ auto *Latch = OrigLoop->getLoopLatch();
+
+ // Get the initial and previous values of the scalar recurrence.
+ auto *ScalarInit = Phi->getIncomingValueForBlock(Preheader);
+ auto *Previous = Phi->getIncomingValueForBlock(Latch);
+
+ // Create a vector from the initial value.
+ auto *VectorInit = ScalarInit;
+ if (VF > 1) {
+ Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator());
+ VectorInit = Builder.CreateInsertElement(
+ UndefValue::get(VectorType::get(VectorInit->getType(), VF)), VectorInit,
+ Builder.getInt32(VF - 1), "vector.recur.init");
+ }
+
+ // We constructed a temporary phi node in the first phase of vectorization.
+ // This phi node will eventually be deleted.
+ auto &PhiParts = getVectorValue(Phi);
+ Builder.SetInsertPoint(cast<Instruction>(PhiParts[0]));
+
+ // Create a phi node for the new recurrence. The current value will either be
+ // the initial value inserted into a vector or loop-varying vector value.
+ auto *VecPhi = Builder.CreatePHI(VectorInit->getType(), 2, "vector.recur");
+ VecPhi->addIncoming(VectorInit, LoopVectorPreHeader);
+
+ // Get the vectorized previous value. We ensured the previous values was an
+ // instruction when detecting the recurrence.
+ auto &PreviousParts = getVectorValue(Previous);
+
+ // Set the insertion point to be after this instruction. We ensured the
+ // previous value dominated all uses of the phi when detecting the
+ // recurrence.
+ Builder.SetInsertPoint(
+ &*++BasicBlock::iterator(cast<Instruction>(PreviousParts[UF - 1])));
+
+ // We will construct a vector for the recurrence by combining the values for
+ // the current and previous iterations. This is the required shuffle mask.
+ SmallVector<Constant *, 8> ShuffleMask(VF);
+ ShuffleMask[0] = Builder.getInt32(VF - 1);
+ for (unsigned I = 1; I < VF; ++I)
+ ShuffleMask[I] = Builder.getInt32(I + VF - 1);
+
+ // The vector from which to take the initial value for the current iteration
+ // (actual or unrolled). Initially, this is the vector phi node.
+ Value *Incoming = VecPhi;
+
+ // Shuffle the current and previous vector and update the vector parts.
+ for (unsigned Part = 0; Part < UF; ++Part) {
+ auto *Shuffle =
+ VF > 1
+ ? Builder.CreateShuffleVector(Incoming, PreviousParts[Part],
+ ConstantVector::get(ShuffleMask))
+ : Incoming;
+ PhiParts[Part]->replaceAllUsesWith(Shuffle);
+ cast<Instruction>(PhiParts[Part])->eraseFromParent();
+ PhiParts[Part] = Shuffle;
+ Incoming = PreviousParts[Part];
+ }
+
+ // Fix the latch value of the new recurrence in the vector loop.
+ VecPhi->addIncoming(Incoming,
+ LI->getLoopFor(LoopVectorBody[0])->getLoopLatch());
+
+ // Extract the last vector element in the middle block. This will be the
+ // initial value for the recurrence when jumping to the scalar loop.
+ auto *Extract = Incoming;
+ if (VF > 1) {
+ Builder.SetInsertPoint(LoopMiddleBlock->getTerminator());
+ Extract = Builder.CreateExtractElement(Extract, Builder.getInt32(VF - 1),
+ "vector.recur.extract");
+ }
+
+ // Fix the initial value of the original recurrence in the scalar loop.
+ Builder.SetInsertPoint(&*LoopScalarPreHeader->begin());
+ auto *Start = Builder.CreatePHI(Phi->getType(), 2, "scalar.recur.init");
+ for (auto *BB : predecessors(LoopScalarPreHeader)) {
+ auto *Incoming = BB == LoopMiddleBlock ? Extract : ScalarInit;
+ Start->addIncoming(Incoming, BB);
+ }
+
+ Phi->setIncomingValue(Phi->getBasicBlockIndex(LoopScalarPreHeader), Start);
+ Phi->setName("scalar.recur");
+
+ // Finally, fix users of the recurrence outside the loop. The users will need
+ // either the last value of the scalar recurrence or the last value of the
+ // vector recurrence we extracted in the middle block. Since the loop is in
+ // LCSSA form, we just need to find the phi node for the original scalar
+ // recurrence in the exit block, and then add an edge for the middle block.
+ for (auto &I : *LoopExitBlock) {
+ auto *LCSSAPhi = dyn_cast<PHINode>(&I);
+ if (!LCSSAPhi)
+ break;
+ if (LCSSAPhi->getIncomingValue(0) == Phi) {
+ LCSSAPhi->addIncoming(Extract, LoopMiddleBlock);
+ break;
+ }
+ }
+}
+
void InnerLoopVectorizer::fixLCSSAPHIs() {
for (BasicBlock::iterator LEI = LoopExitBlock->begin(),
LEE = LoopExitBlock->end(); LEI != LEE; ++LEI) {
@@ -3687,8 +3861,8 @@ void InnerLoopVectorizer::widenPHIInstruction(
Instruction *PN, InnerLoopVectorizer::VectorParts &Entry, unsigned UF,
unsigned VF, PhiVector *PV) {
PHINode* P = cast<PHINode>(PN);
- // Handle reduction variables:
- if (Legal->isReductionVariable(P)) {
+ // Handle recurrences.
+ if (Legal->isReductionVariable(P) || Legal->isFirstOrderRecurrence(P)) {
for (unsigned part = 0; part < UF; ++part) {
// This is phase one of vectorizing PHIs.
Type *VecTy = (VF == 1) ? PN->getType() :
@@ -4384,6 +4558,11 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
continue;
}
+ if (RecurrenceDescriptor::isFirstOrderRecurrence(Phi, TheLoop, DT)) {
+ FirstOrderRecurrences.insert(Phi);
+ continue;
+ }
+
emitAnalysis(VectorizationReport(&*it) <<
"value that could not be identified as "
"reduction is used outside the loop");
@@ -4561,6 +4740,10 @@ bool LoopVectorizationLegality::isInductionVariable(const Value *V) {
return Inductions.count(PN);
}
+bool LoopVectorizationLegality::isFirstOrderRecurrence(const PHINode *Phi) {
+ return FirstOrderRecurrences.count(Phi);
+}
+
bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) {
return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
}
@@ -5450,9 +5633,17 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
case Instruction::Br: {
return TTI.getCFInstrCost(I->getOpcode());
}
- case Instruction::PHI:
- //TODO: IF-converted IFs become selects.
+ case Instruction::PHI: {
+ auto *Phi = cast<PHINode>(I);
+
+ // First-order recurrences are replaced by vector shuffles inside the loop.
+ if (VF > 1 && Legal->isFirstOrderRecurrence(Phi))
+ return TTI.getShuffleCost(TargetTransformInfo::SK_ExtractSubvector,
+ VectorTy, VF - 1, VectorTy);
+
+ // TODO: IF-converted IFs become selects.
return 0;
+ }
case Instruction::Add:
case Instruction::FAdd:
case Instruction::Sub:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll
new file mode 100644
index 00000000000..f556cd1319f
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll
@@ -0,0 +1,209 @@
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=2 -dce -instcombine -S | FileCheck %s --check-prefix=UNROLL
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+
+; CHECK-LABEL: @recurrence_1
+;
+; void recurrence_1(int *a, int *b, int n) {
+; for(int i = 0; i < n; i++)
+; b[i] = a[i] + a[i - 1]
+; }
+;
+; CHECK: vector.ph:
+; CHECK: %vector.recur.init = insertelement <4 x i32> undef, i32 %pre_load, i32 3
+;
+; CHECK: vector.body:
+; CHECK: %vector.recur = phi <4 x i32> [ %vector.recur.init, %vector.ph ], [ [[L1:%[a-zA-Z0-9.]+]], %vector.body ]
+; CHECK: [[L1]] = load <4 x i32>
+; CHECK: {{.*}} = shufflevector <4 x i32> %vector.recur, <4 x i32> [[L1]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+;
+; CHECK: middle.block:
+; CHECK: %vector.recur.extract = extractelement <4 x i32> [[L1]], i32 3
+;
+; CHECK: scalar.ph:
+; CHECK: %scalar.recur.init = phi i32 [ %vector.recur.extract, %middle.block ], [ %pre_load, %vector.memcheck ], [ %pre_load, %min.iters.checked ], [ %pre_load, %for.preheader ]
+;
+; CHECK: scalar.body:
+; CHECK: %scalar.recur = phi i32 [ %scalar.recur.init, %scalar.ph ], [ {{.*}}, %scalar.body ]
+;
+; UNROLL: vector.body:
+; UNROLL: %vector.recur = phi <4 x i32> [ %vector.recur.init, %vector.ph ], [ [[L2:%[a-zA-Z0-9.]+]], %vector.body ]
+; UNROLL: [[L1:%[a-zA-Z0-9.]+]] = load <4 x i32>
+; UNROLL: [[L2]] = load <4 x i32>
+; UNROLL: {{.*}} = shufflevector <4 x i32> %vector.recur, <4 x i32> [[L1]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; UNROLL: {{.*}} = shufflevector <4 x i32> [[L1]], <4 x i32> [[L2]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+;
+; UNROLL: middle.block:
+; UNROLL: %vector.recur.extract = extractelement <4 x i32> [[L2]], i32 3
+;
+define void @recurrence_1(i32* nocapture readonly %a, i32* nocapture %b, i32 %n) {
+entry:
+ br label %for.preheader
+
+for.preheader:
+ %arrayidx.phi.trans.insert = getelementptr inbounds i32, i32* %a, i64 0
+ %pre_load = load i32, i32* %arrayidx.phi.trans.insert
+ br label %scalar.body
+
+scalar.body:
+ %0 = phi i32 [ %pre_load, %for.preheader ], [ %1, %scalar.body ]
+ %indvars.iv = phi i64 [ 0, %for.preheader ], [ %indvars.iv.next, %scalar.body ]
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %arrayidx32 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next
+ %1 = load i32, i32* %arrayidx32
+ %arrayidx34 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
+ %add35 = add i32 %1, %0
+ store i32 %add35, i32* %arrayidx34
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.exit, label %scalar.body
+
+for.exit:
+ ret void
+}
+
+; CHECK-LABEL: @recurrence_2
+;
+; int recurrence_2(int *a, int n) {
+; int minmax;
+; for (int i = 0; i < n; ++i)
+; minmax = min(minmax, max(a[i] - a[i-1], 0));
+; return minmax;
+; }
+;
+; CHECK: vector.ph:
+; CHECK: %vector.recur.init = insertelement <4 x i32> undef, i32 %.pre, i32 3
+;
+; CHECK: vector.body:
+; CHECK: %vector.recur = phi <4 x i32> [ %vector.recur.init, %vector.ph ], [ [[L1:%[a-zA-Z0-9.]+]], %vector.body ]
+; CHECK: [[L1]] = load <4 x i32>
+; CHECK: {{.*}} = shufflevector <4 x i32> %vector.recur, <4 x i32> [[L1]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+;
+; CHECK: middle.block:
+; CHECK: %vector.recur.extract = extractelement <4 x i32> [[L1]], i32 3
+;
+; CHECK: scalar.ph:
+; CHECK: %scalar.recur.init = phi i32 [ %vector.recur.extract, %middle.block ], [ %.pre, %min.iters.checked ], [ %.pre, %for.preheader ]
+;
+; CHECK: scalar.body:
+; CHECK: %scalar.recur = phi i32 [ %scalar.recur.init, %scalar.ph ], [ {{.*}}, %scalar.body ]
+;
+; UNROLL: vector.body:
+; UNROLL: %vector.recur = phi <4 x i32> [ %vector.recur.init, %vector.ph ], [ [[L2:%[a-zA-Z0-9.]+]], %vector.body ]
+; UNROLL: [[L1:%[a-zA-Z0-9.]+]] = load <4 x i32>
+; UNROLL: [[L2]] = load <4 x i32>
+; UNROLL: {{.*}} = shufflevector <4 x i32> %vector.recur, <4 x i32> [[L1]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; UNROLL: {{.*}} = shufflevector <4 x i32> [[L1]], <4 x i32> [[L2]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+;
+; UNROLL: middle.block:
+; UNROLL: %vector.recur.extract = extractelement <4 x i32> [[L2]], i32 3
+;
+define i32 @recurrence_2(i32* nocapture readonly %a, i32 %n) {
+entry:
+ %cmp27 = icmp sgt i32 %n, 0
+ br i1 %cmp27, label %for.preheader, label %for.cond.cleanup
+
+for.preheader:
+ %arrayidx2.phi.trans.insert = getelementptr inbounds i32, i32* %a, i64 -1
+ %.pre = load i32, i32* %arrayidx2.phi.trans.insert, align 4
+ br label %scalar.body
+
+for.cond.cleanup.loopexit:
+ %minmax.0.cond.lcssa = phi i32 [ %minmax.0.cond, %scalar.body ]
+ br label %for.cond.cleanup
+
+for.cond.cleanup:
+ %minmax.0.lcssa = phi i32 [ undef, %entry ], [ %minmax.0.cond.lcssa, %for.cond.cleanup.loopexit ]
+ ret i32 %minmax.0.lcssa
+
+scalar.body:
+ %0 = phi i32 [ %.pre, %for.preheader ], [ %1, %scalar.body ]
+ %indvars.iv = phi i64 [ 0, %for.preheader ], [ %indvars.iv.next, %scalar.body ]
+ %minmax.028 = phi i32 [ undef, %for.preheader ], [ %minmax.0.cond, %scalar.body ]
+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+ %1 = load i32, i32* %arrayidx, align 4
+ %sub3 = sub nsw i32 %1, %0
+ %cmp4 = icmp sgt i32 %sub3, 0
+ %cond = select i1 %cmp4, i32 %sub3, i32 0
+ %cmp5 = icmp slt i32 %minmax.028, %cond
+ %minmax.0.cond = select i1 %cmp5, i32 %minmax.028, i32 %cond
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.cond.cleanup.loopexit, label %scalar.body
+}
+
+; CHECK-LABEL: @recurrence_3
+;
+; void recurrence_3(short *a, double *b, int n, float f, short p) {
+; b[0] = (double)a[0] - f * (double)p;
+; for (int i = 1; i < n; i++)
+; b[i] = (double)a[i] - f * (double)a[i - 1];
+; }
+;
+;
+; CHECK: vector.ph:
+; CHECK: %vector.recur.init = insertelement <4 x i16> undef, i16 %0, i32 3
+;
+; CHECK: vector.body:
+; CHECK: %vector.recur = phi <4 x i16> [ %vector.recur.init, %vector.ph ], [ [[L1:%[a-zA-Z0-9.]+]], %vector.body ]
+; CHECK: [[L1]] = load <4 x i16>
+; CHECK: {{.*}} = shufflevector <4 x i16> %vector.recur, <4 x i16> [[L1]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+;
+; CHECK: middle.block:
+; CHECK: %vector.recur.extract = extractelement <4 x i16> [[L1]], i32 3
+;
+; CHECK: scalar.ph:
+; CHECK: %scalar.recur.init = phi i16 [ %vector.recur.extract, %middle.block ], [ %0, %vector.memcheck ], [ %0, %min.iters.checked ], [ %0, %for.preheader ]
+;
+; CHECK: scalar.body:
+; CHECK: %scalar.recur = phi i16 [ %scalar.recur.init, %scalar.ph ], [ {{.*}}, %scalar.body ]
+;
+; UNROLL: vector.body:
+; UNROLL: %vector.recur = phi <4 x i16> [ %vector.recur.init, %vector.ph ], [ [[L2:%[a-zA-Z0-9.]+]], %vector.body ]
+; UNROLL: [[L1:%[a-zA-Z0-9.]+]] = load <4 x i16>
+; UNROLL: [[L2]] = load <4 x i16>
+; UNROLL: {{.*}} = shufflevector <4 x i16> %vector.recur, <4 x i16> [[L1]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; UNROLL: {{.*}} = shufflevector <4 x i16> [[L1]], <4 x i16> [[L2]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+;
+; UNROLL: middle.block:
+; UNROLL: %vector.recur.extract = extractelement <4 x i16> [[L2]], i32 3
+;
+define void @recurrence_3(i16* nocapture readonly %a, double* nocapture %b, i32 %n, float %f, i16 %p) {
+entry:
+ %0 = load i16, i16* %a, align 2
+ %conv = sitofp i16 %0 to double
+ %conv1 = fpext float %f to double
+ %conv2 = sitofp i16 %p to double
+ %mul = fmul fast double %conv2, %conv1
+ %sub = fsub fast double %conv, %mul
+ store double %sub, double* %b, align 8
+ %cmp25 = icmp sgt i32 %n, 1
+ br i1 %cmp25, label %for.preheader, label %for.end
+
+for.preheader:
+ br label %scalar.body
+
+scalar.body:
+ %1 = phi i16 [ %0, %for.preheader ], [ %2, %scalar.body ]
+ %advars.iv = phi i64 [ %advars.iv.next, %scalar.body ], [ 1, %for.preheader ]
+ %arrayidx5 = getelementptr inbounds i16, i16* %a, i64 %advars.iv
+ %2 = load i16, i16* %arrayidx5, align 2
+ %conv6 = sitofp i16 %2 to double
+ %conv11 = sitofp i16 %1 to double
+ %mul12 = fmul fast double %conv11, %conv1
+ %sub13 = fsub fast double %conv6, %mul12
+ %arrayidx15 = getelementptr inbounds double, double* %b, i64 %advars.iv
+ store double %sub13, double* %arrayidx15, align 8
+ %advars.iv.next = add nuw nsw i64 %advars.iv, 1
+ %lftr.wideiv = trunc i64 %advars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end.loopexit, label %scalar.body
+
+for.end.loopexit:
+ br label %for.end
+
+for.end:
+ ret void
+}
OpenPOWER on IntegriCloud