summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp52
-rw-r--r--llvm/test/Transforms/LoopVectorize/X86/pr36524.ll39
2 files changed, 76 insertions, 15 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index dab24ef038d..ba07ebe1e51 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -542,9 +542,9 @@ protected:
/// Compute scalar induction steps. \p ScalarIV is the scalar induction
/// variable on which to base the steps, \p Step is the size of the step, and
/// \p EntryVal is the value from the original loop that maps to the steps.
- /// Note that \p EntryVal doesn't have to be an induction variable (e.g., it
- /// can be a truncate instruction).
- void buildScalarSteps(Value *ScalarIV, Value *Step, Value *EntryVal,
+ /// Note that \p EntryVal doesn't have to be an induction variable - it
+ /// can also be a truncate instruction.
+ void buildScalarSteps(Value *ScalarIV, Value *Step, Instruction *EntryVal,
const InductionDescriptor &ID);
/// Create a vector induction phi node based on an existing scalar one. \p
@@ -571,10 +571,20 @@ protected:
/// vector loop for both the Phi and the cast.
/// If \p VectorLoopValue is a scalarized value, \p Lane is also specified,
/// Otherwise, \p VectorLoopValue is a widened/vectorized value.
- void recordVectorLoopValueForInductionCast (const InductionDescriptor &ID,
- Value *VectorLoopValue,
- unsigned Part,
- unsigned Lane = UINT_MAX);
+ ///
+ /// \p EntryVal is the value from the original loop that maps to the vector
+ /// phi node and is used to distinguish what is the IV currently being
+ /// processed - original one (if \p EntryVal is a phi corresponding to the
+ /// original IV) or the "newly-created" one based on the proof mentioned above
+ /// (see also buildScalarSteps() and createVectorIntOrFPInductionPHI()). In the
+ /// latter case \p EntryVal is a TruncInst and we must not record anything for
+ /// that IV, but it's error-prone to expect callers of this routine to care
+ /// about that, hence this explicit parameter.
+ void recordVectorLoopValueForInductionCast(const InductionDescriptor &ID,
+ const Instruction *EntryVal,
+ Value *VectorLoopValue,
+ unsigned Part,
+ unsigned Lane = UINT_MAX);
/// Generate a shuffle sequence that will reverse the vector Vec.
virtual Value *reverseVector(Value *Vec);
@@ -2368,6 +2378,8 @@ Value *InnerLoopVectorizer::getBroadcastInstrs(Value *V) {
void InnerLoopVectorizer::createVectorIntOrFpInductionPHI(
const InductionDescriptor &II, Value *Step, Instruction *EntryVal) {
+ assert((isa<PHINode>(EntryVal) || isa<TruncInst>(EntryVal)) &&
+ "Expected either an induction phi-node or a truncate of it!");
Value *Start = II.getStartValue();
// Construct the initial value of the vector IV in the vector loop preheader
@@ -2421,8 +2433,7 @@ void InnerLoopVectorizer::createVectorIntOrFpInductionPHI(
if (isa<TruncInst>(EntryVal))
addMetadata(LastInduction, EntryVal);
- else
- recordVectorLoopValueForInductionCast(II, LastInduction, Part);
+ recordVectorLoopValueForInductionCast(II, EntryVal, LastInduction, Part);
LastInduction = cast<Instruction>(addFastMathFlag(
Builder.CreateBinOp(AddOp, LastInduction, SplatVF, "step.add")));
@@ -2456,8 +2467,20 @@ bool InnerLoopVectorizer::needsScalarInduction(Instruction *IV) const {
}
void InnerLoopVectorizer::recordVectorLoopValueForInductionCast(
- const InductionDescriptor &ID, Value *VectorLoopVal, unsigned Part,
- unsigned Lane) {
+ const InductionDescriptor &ID, const Instruction *EntryVal,
+ Value *VectorLoopVal, unsigned Part, unsigned Lane) {
+ assert((isa<PHINode>(EntryVal) || isa<TruncInst>(EntryVal)) &&
+ "Expected either an induction phi-node or a truncate of it!");
+
+ // This induction variable is not the phi from the original loop but the
+ // newly-created IV based on the proof that casted Phi is equal to the
+ // uncasted Phi in the vectorized loop (under a runtime guard possibly). It
+ // re-uses the same InductionDescriptor that original IV uses but we don't
+ // have to do any recording in this case - that is done when original IV is
+ // processed.
+ if (isa<TruncInst>(EntryVal))
+ return;
+
const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts();
if (Casts.empty())
return;
@@ -2554,8 +2577,7 @@ void InnerLoopVectorizer::widenIntOrFpInduction(PHINode *IV, TruncInst *Trunc) {
VectorLoopValueMap.setVectorValue(EntryVal, Part, EntryPart);
if (Trunc)
addMetadata(EntryPart, Trunc);
- else
- recordVectorLoopValueForInductionCast(ID, EntryPart, Part);
+ recordVectorLoopValueForInductionCast(ID, EntryVal, EntryPart, Part);
}
}
@@ -2626,7 +2648,7 @@ Value *InnerLoopVectorizer::getStepVector(Value *Val, int StartIdx, Value *Step,
}
void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step,
- Value *EntryVal,
+ Instruction *EntryVal,
const InductionDescriptor &ID) {
// We shouldn't have to build scalar steps if we aren't vectorizing.
assert(VF > 1 && "VF should be greater than one");
@@ -2661,7 +2683,7 @@ void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step,
auto *Mul = addFastMathFlag(Builder.CreateBinOp(MulOp, StartIdx, Step));
auto *Add = addFastMathFlag(Builder.CreateBinOp(AddOp, ScalarIV, Mul));
VectorLoopValueMap.setScalarValue(EntryVal, {Part, Lane}, Add);
- recordVectorLoopValueForInductionCast(ID, Add, Part, Lane);
+ recordVectorLoopValueForInductionCast(ID, EntryVal, Add, Part, Lane);
}
}
}
diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr36524.ll b/llvm/test/Transforms/LoopVectorize/X86/pr36524.ll
new file mode 100644
index 00000000000..1f8464296f3
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/X86/pr36524.ll
@@ -0,0 +1,39 @@
+; RUN: opt -S -scev-version-unknown -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1"
+
+define void @foo() {
+; CHECK-LABEL: @foo(
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY:%.*]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 2, i64 3, i64 4, i64 5>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 2, [[INDEX]]
+; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 0
+; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 1
+; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 2
+; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[OFFSET_IDX]], 3
+; CHECK-NEXT: [[OFFSET_IDX1:%.*]] = add i64 2, [[INDEX]]
+; CHECK-NEXT: [[TMP11:%.*]] = trunc i64 [[OFFSET_IDX1]] to i32
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[TMP11]], i32 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 0
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 80
+; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0
+;
+entry:
+ br label %loop
+
+loop:
+ %0 = phi i64 [ 2, %entry ], [ %3, %loop ]
+ %1 = and i64 %0, 4294967295
+ %2 = trunc i64 %0 to i32
+ %3 = add nuw nsw i64 %1, 1
+ %4 = icmp sgt i32 %2, 80
+ br i1 %4, label %exit, label %loop
+
+exit:
+ ret void
+}
OpenPOWER on IntegriCloud