diff options
-rw-r--r-- | llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp | 11 | ||||
-rw-r--r-- | llvm/test/Transforms/LoopLoadElim/cond-load.ll | 42 |
2 files changed, 53 insertions, 0 deletions
diff --git a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp index a0c5798df05..1dd60995237 100644 --- a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp +++ b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp @@ -119,6 +119,11 @@ bool doesStoreDominatesAllLatches(BasicBlock *StoreBlock, Loop *L, }); } +/// \brief Return true if the load is not executed on all paths in the loop. +static bool isLoadConditional(LoadInst *Load, Loop *L) { + return Load->getParent() != L->getHeader(); +} + /// \brief The per-loop class that does most of the work. class LoadEliminationForLoop { public: @@ -450,6 +455,12 @@ public: if (!doesStoreDominatesAllLatches(Cand.Store->getParent(), L, DT)) continue; + // If the load is conditional we can't hoist its 0-iteration instance to + // the preheader because that would make it unconditional. Thus we would + // access a memory location that the original loop did not access. + if (isLoadConditional(Cand.Load, L)) + continue; + // Check whether the SCEV difference is the same as the induction step, // thus we load the value in the next iteration. if (!Cand.isDependenceDistanceOfOne(PSE, L)) diff --git a/llvm/test/Transforms/LoopLoadElim/cond-load.ll b/llvm/test/Transforms/LoopLoadElim/cond-load.ll new file mode 100644 index 00000000000..e337397e996 --- /dev/null +++ b/llvm/test/Transforms/LoopLoadElim/cond-load.ll @@ -0,0 +1,42 @@ +; RUN: opt -S -loop-load-elim < %s | FileCheck %s + +; We can't hoist conditional loads to the preheader for the initial value. +; E.g. in the loop below we'd access array[-1] if we did: +; +; for(int i = 0 ; i < n ; i++ ) +; array[i] = ( i > 0 ? array[i - 1] : 0 ) + 4; + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.11.0" + +define void @f(i32* %array, i32 %n) { +entry: + %cmp10 = icmp sgt i32 %n, 0 + br i1 %cmp10, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %cond.end, %entry + ret void + +for.body: ; preds = %entry, %cond.end + %indvars.iv = phi i64 [ %indvars.iv.next, %cond.end ], [ 0, %entry ] +; CHECK-NOT: %store_forwarded = phi + %cmp1 = icmp sgt i64 %indvars.iv, 0 + br i1 %cmp1, label %cond.true, label %cond.end + +cond.true: ; preds = %for.body + %0 = add nsw i64 %indvars.iv, -1 + %arrayidx = getelementptr inbounds i32, i32* %array, i64 %0 + %1 = load i32, i32* %arrayidx, align 4 + br label %cond.end + +cond.end: ; preds = %for.body, %cond.true + %cond = phi i32 [ %1, %cond.true ], [ 0, %for.body ] +; CHECK: %cond = phi i32 [ %1, %cond.true ], [ 0, %for.body ] + %add = add nsw i32 %cond, 4 + %arrayidx3 = getelementptr inbounds i32, i32* %array, i64 %indvars.iv + store i32 %add, i32* %arrayidx3, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.cond.cleanup, label %for.body +} |