summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArnold Schwaighofer <aschwaighofer@apple.com>2013-05-15 01:44:30 +0000
committerArnold Schwaighofer <aschwaighofer@apple.com>2013-05-15 01:44:30 +0000
commit2d920477a4164a421e0a70f68b1354f040b92d92 (patch)
treea0583bc1ef6ba33be1e44190b62ecf3291e947a6
parentb78d73fe7612ceab5b0c044c038daa2bf85782f4 (diff)
downloadbcm5719-llvm-2d920477a4164a421e0a70f68b1354f040b92d92.tar.gz
bcm5719-llvm-2d920477a4164a421e0a70f68b1354f040b92d92.zip
LoopVectorize: Hoist conditional loads if possible
InstCombine can be uncooperative to vectorization and sink loads into conditional blocks. This prevents vectorization. Undo this optimization if there are unconditional memory accesses to the same addresses in the loop. radar://13815763 llvm-svn: 181860
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp105
-rw-r--r--llvm/test/Transforms/LoopVectorize/hoist-loads.ll69
2 files changed, 171 insertions, 3 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 0b29445dd01..a8e1f4579c7 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -318,6 +318,93 @@ private:
ValueMap WidenMap;
};
+/// \brief Check if conditionally executed loads are hoistable.
+///
+/// This class has two functions. isHoistableLoad and canHoistAllLoads.
+/// isHoistableLoad should be called on all load instructions that are executed
+/// conditionally. After all conditional loads are processed, the client should
+/// call canHoistAllLoads to determine if all of the conditional execute loads
+/// have an unconditional memory access in the loop.
+class LoadHoisting {
+ typedef SmallPtrSet<Value *, 8> MemorySet;
+
+ Loop *TheLoop;
+ DominatorTree *DT;
+ MemorySet CondLoadAddrSet;
+
+public:
+ LoadHoisting(Loop *L, DominatorTree *D) : TheLoop(L), DT(D) {}
+
+ /// \brief Check if the instruction is a load with a identifiable address.
+ bool isHoistableLoad(Instruction *L);
+
+ /// \brief Check if all of the conditional loads are hoistable because there
+ /// exists an unconditional memory access to the same address in the loop.
+ bool canHoistAllLoads();
+};
+
+bool LoadHoisting::isHoistableLoad(Instruction *L) {
+ LoadInst *LI = dyn_cast<LoadInst>(L);
+ if (!LI)
+ return false;
+
+ CondLoadAddrSet.insert(LI->getPointerOperand());
+ return true;
+}
+
+static void addMemAccesses(BasicBlock *BB, SmallPtrSet<Value *, 8> &Set) {
+ for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE; ++BI) {
+ Instruction *I = &*BI;
+ Value *Addr = 0;
+
+ // Try a load.
+ LoadInst *LI = dyn_cast<LoadInst>(I);
+ if (LI) {
+ Addr = LI->getPointerOperand();
+ Set.insert(Addr);
+ continue;
+ }
+
+ // Try a store.
+ StoreInst *SI = dyn_cast<StoreInst>(I);
+ if (!SI)
+ continue;
+
+ Addr = SI->getPointerOperand();
+ Set.insert(Addr);
+ }
+}
+
+bool LoadHoisting::canHoistAllLoads() {
+ // No conditional loads.
+ if (CondLoadAddrSet.empty())
+ return true;
+
+ MemorySet UncondMemAccesses;
+ std::vector<BasicBlock*> &LoopBlocks = TheLoop->getBlocksVector();
+ BasicBlock *LoopLatch = TheLoop->getLoopLatch();
+
+ // Iterate over the unconditional blocks and collect memory access addresses.
+ for (unsigned i = 0, e = LoopBlocks.size(); i < e; ++i) {
+ BasicBlock *BB = LoopBlocks[i];
+
+ // Ignore conditional blocks.
+ if (BB != LoopLatch && !DT->dominates(BB, LoopLatch))
+ continue;
+
+ addMemAccesses(BB, UncondMemAccesses);
+ }
+
+ // And make sure there is a matching unconditional access for every
+ // conditional load.
+ for (MemorySet::iterator MI = CondLoadAddrSet.begin(),
+ ME = CondLoadAddrSet.end(); MI != ME; ++MI)
+ if (!UncondMemAccesses.count(*MI))
+ return false;
+
+ return true;
+}
+
/// LoopVectorizationLegality checks if it is legal to vectorize a loop, and
/// to what vectorization factor.
/// This class does not look at the profitability of vectorization, only the
@@ -337,7 +424,8 @@ public:
DominatorTree *DT, TargetTransformInfo* TTI,
AliasAnalysis *AA, TargetLibraryInfo *TLI)
: TheLoop(L), SE(SE), DL(DL), DT(DT), TTI(TTI), AA(AA), TLI(TLI),
- Induction(0), WidestIndTy(0), HasFunNoNaNAttr(false) {}
+ Induction(0), WidestIndTy(0), HasFunNoNaNAttr(false),
+ LoadSpeculation(L, DT) {}
/// This enum represents the kinds of reductions that we support.
enum ReductionKind {
@@ -598,6 +686,9 @@ private:
RuntimePointerCheck PtrRtCheck;
/// Can we assume the absence of NaNs.
bool HasFunNoNaNAttr;
+
+ /// Utility to determine whether loads can be speculated.
+ LoadHoisting LoadSpeculation;
};
/// LoopVectorizationCostModel - estimates the expected speedups due to
@@ -3259,8 +3350,12 @@ bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) {
bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB) {
for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
- // We don't predicate loads/stores at the moment.
- if (it->mayReadFromMemory() || it->mayWriteToMemory() || it->mayThrow())
+ // We might be able to hoist the load.
+ if (it->mayReadFromMemory() && !LoadSpeculation.isHoistableLoad(it))
+ return false;
+
+ // We predicate stores at the moment.
+ if (it->mayWriteToMemory() || it->mayThrow())
return false;
// The instructions below can trap.
@@ -3274,6 +3369,10 @@ bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB) {
}
}
+ // Check that we can actually speculate the hoistable loads.
+ if (!LoadSpeculation.canHoistAllLoads())
+ return false;
+
return true;
}
diff --git a/llvm/test/Transforms/LoopVectorize/hoist-loads.ll b/llvm/test/Transforms/LoopVectorize/hoist-loads.ll
new file mode 100644
index 00000000000..fad17350cd0
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/hoist-loads.ll
@@ -0,0 +1,69 @@
+; RUN: opt -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+@A = common global [1024 x float] zeroinitializer, align 16
+@B = common global [1024 x float] zeroinitializer, align 16
+
+; Make sure we can vectorize in the presence of hoistable conditional loads.
+; CHECK: hoist_cond_load
+; CHECK: load <2 x float>
+
+define void @hoist_cond_load() {
+entry:
+ br label %for.body
+for.body:
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %if.end9 ]
+ %arrayidx = getelementptr inbounds [1024 x float]* @A, i64 0, i64 %indvars.iv
+ %arrayidx2 = getelementptr inbounds [1024 x float]* @B, i64 0, i64 %indvars.iv
+ %0 = load float* %arrayidx2, align 4
+ %cmp3 = fcmp oeq float %0, 0.000000e+00
+ br i1 %cmp3, label %if.end9, label %if.else
+
+if.else:
+ %1 = load float* %arrayidx, align 4
+ br label %if.end9
+
+if.end9:
+ %tmp.0 = phi float [ %1, %if.else ], [ 0.000000e+00, %for.body ]
+ store float %tmp.0, float* %arrayidx, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp ne i32 %lftr.wideiv, 1024
+ br i1 %exitcond, label %for.body, label %for.end
+
+for.end:
+ ret void
+}
+
+; However, we can't hoist loads whose address we have not seen unconditionally
+; accessed.
+; CHECK: dont_hoist_cond_load
+; CHECK-NOT: load <2 x float>
+
+define void @dont_hoist_cond_load() {
+entry:
+ br label %for.body
+for.body:
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %if.end9 ]
+ %arrayidx = getelementptr inbounds [1024 x float]* @A, i64 0, i64 %indvars.iv
+ %arrayidx2 = getelementptr inbounds [1024 x float]* @B, i64 0, i64 %indvars.iv
+ %0 = load float* %arrayidx2, align 4
+ %cmp3 = fcmp oeq float %0, 0.000000e+00
+ br i1 %cmp3, label %if.end9, label %if.else
+
+if.else:
+ %1 = load float* %arrayidx, align 4
+ br label %if.end9
+
+if.end9:
+ %tmp.0 = phi float [ %1, %if.else ], [ 0.000000e+00, %for.body ]
+ store float %tmp.0, float* %arrayidx2, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp ne i32 %lftr.wideiv, 1024
+ br i1 %exitcond, label %for.body, label %for.end
+
+for.end:
+ ret void
+}
OpenPOWER on IntegriCloud