summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp97
-rw-r--r--llvm/test/Transforms/LoopVectorize/X86/uniform-phi.ll50
2 files changed, 121 insertions, 26 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 2e55c4fc75e..0463f97e315 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5007,38 +5007,83 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
void LoopVectorizationLegality::collectLoopUniforms() {
// We now know that the loop is vectorizable!
// Collect variables that will remain uniform after vectorization.
- std::vector<Value *> Worklist;
- BasicBlock *Latch = TheLoop->getLoopLatch();
- // Start with the conditional branch and walk up the block.
- Worklist.push_back(Latch->getTerminator()->getOperand(0));
+ // If V is not an instruction inside the current loop, it is a Value
+ // outside of the scope which we are interesting in.
+ auto isOutOfScope = [&](Value *V) -> bool {
+ Instruction *I = dyn_cast<Instruction>(V);
+ return (!I || !TheLoop->contains(I));
+ };
+
+ SetVector<Instruction *> Worklist;
+ BasicBlock *Latch = TheLoop->getLoopLatch();
+ // Start with the conditional branch.
+ if (!isOutOfScope(Latch->getTerminator()->getOperand(0))) {
+ Instruction *Cmp = cast<Instruction>(Latch->getTerminator()->getOperand(0));
+ Worklist.insert(Cmp);
+ DEBUG(dbgs() << "LV: Found uniform instruction: " << *Cmp << "\n");
+ }
// Also add all consecutive pointer values; these values will be uniform
- // after vectorization (and subsequent cleanup) and, until revectorization is
- // supported, all dependencies must also be uniform.
- for (Loop::block_iterator B = TheLoop->block_begin(),
- BE = TheLoop->block_end();
- B != BE; ++B)
- for (Instruction &I : **B)
- if (I.getType()->isPointerTy() && isConsecutivePtr(&I))
- Worklist.insert(Worklist.end(), I.op_begin(), I.op_end());
-
- while (!Worklist.empty()) {
- Instruction *I = dyn_cast<Instruction>(Worklist.back());
- Worklist.pop_back();
-
- // Look at instructions inside this loop.
- // Stop when reaching PHI nodes.
- // TODO: we need to follow values all over the loop, not only in this block.
- if (!I || !TheLoop->contains(I) || isa<PHINode>(I))
- continue;
+ // after vectorization (and subsequent cleanup).
+ for (auto *BB : TheLoop->getBlocks()) {
+ for (auto &I : *BB) {
+ if (I.getType()->isPointerTy() && isConsecutivePtr(&I)) {
+ Worklist.insert(&I);
+ DEBUG(dbgs() << "LV: Found uniform instruction: " << I << "\n");
+ }
+ }
+ }
- // This is a known uniform.
- Uniforms.insert(I);
+ // Expand Worklist in topological order: whenever a new instruction
+ // is added , its users should be either already inside Worklist, or
+ // out of scope. It ensures a uniform instruction will only be used
+ // by uniform instructions or out of scope instructions.
+ unsigned idx = 0;
+ do {
+ Instruction *I = Worklist[idx++];
- // Insert all operands.
- Worklist.insert(Worklist.end(), I->op_begin(), I->op_end());
+ for (auto OV : I->operand_values()) {
+ if (isOutOfScope(OV))
+ continue;
+ Instruction *OI = cast<Instruction>(OV);
+ if (std::all_of(OI->user_begin(), OI->user_end(), [&](User *U) -> bool {
+ return isOutOfScope(U) || Worklist.count(cast<Instruction>(U));
+ })) {
+ Worklist.insert(OI);
+ DEBUG(dbgs() << "LV: Found uniform instruction: " << *OI << "\n");
+ }
+ }
+ } while (idx != Worklist.size());
+
+ // For an instruction to be added into Worklist above, all its users inside
+ // the current loop should be already added into Worklist. This condition
+ // cannot be true for phi instructions which is always in a dependence loop.
+ // Because any instruction in the dependence cycle always depends on others
+ // in the cycle to be added into Worklist first, the result is no ones in
+ // the cycle will be added into Worklist in the end.
+ // That is why we process PHI separately.
+ for (auto &Induction : *getInductionVars()) {
+ auto *PN = Induction.first;
+ auto *UpdateV = PN->getIncomingValueForBlock(TheLoop->getLoopLatch());
+ if (std::all_of(PN->user_begin(), PN->user_end(),
+ [&](User *U) -> bool {
+ return U == UpdateV || isOutOfScope(U) ||
+ Worklist.count(cast<Instruction>(U));
+ }) &&
+ std::all_of(UpdateV->user_begin(), UpdateV->user_end(),
+ [&](User *U) -> bool {
+ return U == PN || isOutOfScope(U) ||
+ Worklist.count(cast<Instruction>(U));
+ })) {
+ Worklist.insert(cast<Instruction>(PN));
+ Worklist.insert(cast<Instruction>(UpdateV));
+ DEBUG(dbgs() << "LV: Found uniform instruction: " << *PN << "\n");
+ DEBUG(dbgs() << "LV: Found uniform instruction: " << *UpdateV << "\n");
+ }
}
+
+ Uniforms.insert(Worklist.begin(), Worklist.end());
}
bool LoopVectorizationLegality::canVectorizeMemory() {
diff --git a/llvm/test/Transforms/LoopVectorize/X86/uniform-phi.ll b/llvm/test/Transforms/LoopVectorize/X86/uniform-phi.ll
new file mode 100644
index 00000000000..1759cb81976
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/X86/uniform-phi.ll
@@ -0,0 +1,50 @@
+; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 -debug-only=loop-vectorize -S 2>&1 | FileCheck %s
+; REQUIRES: asserts
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK-LABEL: test
+; CHECK-DAG: LV: Found uniform instruction: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+; CHECK-DAG: LV: Found uniform instruction: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+; CHECK-DAG: LV: Found uniform instruction: %exitcond = icmp eq i64 %indvars.iv, 1599
+
+define void @test(float* noalias nocapture %a, float* noalias nocapture readonly %b) #0 {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv
+ %tmp0 = load float, float* %arrayidx, align 4
+ %add = fadd float %tmp0, 1.000000e+00
+ %arrayidx5 = getelementptr inbounds float, float* %a, i64 %indvars.iv
+ store float %add, float* %arrayidx5, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv, 1599
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
+
+; CHECK-LABEL: foo
+; CHECK-DAG: LV: Found uniform instruction: %cond = icmp eq i64 %i.next, %n
+; CHECK-DAG: LV: Found uniform instruction: %tmp1 = getelementptr inbounds i32, i32* %a, i32 %tmp0
+; CHECK-NOT: LV: Found uniform instruction: %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
+
+define void @foo(i32* %a, i64 %n) {
+entry:
+ br label %for.body
+
+for.body:
+ %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
+ %tmp0 = trunc i64 %i to i32
+ %tmp1 = getelementptr inbounds i32, i32* %a, i32 %tmp0
+ store i32 %tmp0, i32* %tmp1, align 4
+ %i.next = add nuw nsw i64 %i, 1
+ %cond = icmp eq i64 %i.next, %n
+ br i1 %cond, label %for.end, label %for.body
+
+for.end:
+ ret void
+}
OpenPOWER on IntegriCloud