summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Transforms/Scalar/LICM.cpp45
-rw-r--r--llvm/test/Transforms/LICM/hoist-alloca.ll168
2 files changed, 213 insertions, 0 deletions
diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index c239edaf666..48fc9553fdd 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -789,6 +789,41 @@ public:
};
} // namespace
+
+/// Return true if we know how to rewrite all uses of the given alloca after
+/// hoisting it out of the loop. The main concerns are a) potential captures
+/// and b) invariant.start markers which don't capture, but are no longer
+/// valid w/o a corresponding invariant.end.
+static bool canRewriteUsesOfAlloca(AllocaInst &AI) {
+ // TODO: This looks a lot like capture tracking, but we need to remove any
+ // invariant starts if we extend the lifetime of the alloca by hoisting it.
+ // We should probably refactor capture tracking into a form which allows us
+ // to reuse the relevant bits and remove the duplicated logic here.
+
+ SmallVector<Use *, 16> Worklist;
+ for (Use &U : AI.uses())
+ Worklist.push_back(&U);
+
+ unsigned NumUsesExplored = 0;
+ while (!Worklist.empty()) {
+ Use *U = Worklist.pop_back_val();
+ Instruction *I = cast<Instruction>(U->getUser());
+ NumUsesExplored++;
+ if (NumUsesExplored > DefaultMaxUsesToExplore)
+ return false;
+ // Non capturing, terminating uses
+ if (isa<LoadInst>(I) ||
+ (isa<StoreInst>(I) && U->getOperandNo() == 1))
+ continue;
+ // Non capturing, non-terminating
+ if (!isa<BitCastInst>(I) && !isa<GetElementPtrInst>(I))
+ return false;
+ for (Use &U : I->uses())
+ Worklist.push_back(&U);
+ }
+ return true;
+}
+
/// Walk the specified region of the CFG (defined by all blocks dominated by
/// the specified block, and that are in the current loop) in depth first
/// order w.r.t the DominatorTree. This allows us to visit definitions before
@@ -909,6 +944,16 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
continue;
}
+ if (isa<AllocaInst>(&I) &&
+ SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop) &&
+ canRewriteUsesOfAlloca(cast<AllocaInst>(I))) {
+ hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
+ MSSAU, SE, ORE);
+ HoistedInstructions.push_back(&I);
+ Changed = true;
+ continue;
+ }
+
if (PHINode *PN = dyn_cast<PHINode>(&I)) {
if (CFH.canHoistPHI(PN)) {
// Redirect incoming blocks first to ensure that we create hoisted
diff --git a/llvm/test/Transforms/LICM/hoist-alloca.ll b/llvm/test/Transforms/LICM/hoist-alloca.ll
new file mode 100644
index 00000000000..8e4debac283
--- /dev/null
+++ b/llvm/test/Transforms/LICM/hoist-alloca.ll
@@ -0,0 +1,168 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -licm < %s | FileCheck %s
+
+@G = external global i64
+
+define void @test(i64 %n) {
+; CHECK-LABEL: @test(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A:%.*]] = alloca i64
+; CHECK-NEXT: [[VAL:%.*]] = load i64, i64* [[A]]
+; CHECK-NEXT: store i64 [[VAL]], i64* @G
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ult i64 [[IV]], [[N:%.*]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[EXIT:%.*]]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
+ %a = alloca i64
+ %val = load i64, i64* %a
+ store i64 %val, i64* @G
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp ult i64 %iv, %n
+ br i1 %exitcond, label %for.body, label %exit
+exit:
+ ret void
+}
+
+define void @test2(i64 %n) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A:%.*]] = alloca i64
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ult i64 [[IV]], [[N:%.*]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[EXIT:%.*]]
+; CHECK: exit:
+; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i64 [ [[IV]], [[FOR_BODY]] ]
+; CHECK-NEXT: store i64 [[IV_LCSSA]], i64* [[A]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
+ %a = alloca i64
+ store i64 %iv, i64* %a
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp ult i64 %iv, %n
+ br i1 %exitcond, label %for.body, label %exit
+exit:
+ ret void
+}
+
+
+define void @test3(i64 %n) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A:%.*]] = alloca i64
+; CHECK-NEXT: [[A_I8:%.*]] = bitcast i64* [[A]] to i8*
+; CHECK-NEXT: [[A_OFFSET:%.*]] = getelementptr i8, i8* [[A_I8]], i64 4
+; CHECK-NEXT: store i8 0, i8* [[A_OFFSET]]
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ult i64 [[IV]], [[N:%.*]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[EXIT:%.*]]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
+ %a = alloca i64
+ %a.i8 = bitcast i64* %a to i8*
+ %a.offset = getelementptr i8, i8* %a.i8, i64 4
+ store i8 0, i8* %a.offset
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp ult i64 %iv, %n
+ br i1 %exitcond, label %for.body, label %exit
+exit:
+ ret void
+}
+
+; This example is subtle. Because the dynamic alloca isn't reclaimed until
+; end of function scope, the captured value can legally point to a dynamic
+; alloca stack region from a previous iteration.
+define void @test4(i64 %n) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[A:%.*]] = alloca i64
+; CHECK-NEXT: store i64 [[IV]], i64* [[A]]
+; CHECK-NEXT: call void @capture(i64* [[A]])
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ult i64 [[IV]], [[N:%.*]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[EXIT:%.*]]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
+ %a = alloca i64
+ store i64 %iv, i64* %a
+ %a.i8 = bitcast i64* %a to i8*
+ call void @capture(i64* %a)
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp ult i64 %iv, %n
+ br i1 %exitcond, label %for.body, label %exit
+exit:
+ ret void
+}
+declare void @capture(i64* %a)
+
+
+; TODO: not yet handled
+define void @test5(i64 %n) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[A:%.*]] = alloca i64
+; CHECK-NEXT: store i64 [[IV]], i64* [[A]]
+; CHECK-NEXT: [[A_I8:%.*]] = bitcast i64* [[A]] to i8*
+; CHECK-NEXT: [[TMP0:%.*]] = call {}* @llvm.invariant.start.p0i8(i64 8, i8* [[A_I8]])
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ult i64 [[IV]], [[N:%.*]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[EXIT:%.*]]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
+ %a = alloca i64
+ store i64 %iv, i64* %a
+ %a.i8 = bitcast i64* %a to i8*
+ call {}* @llvm.invariant.start.p0i8(i64 8, i8* %a.i8)
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp ult i64 %iv, %n
+ br i1 %exitcond, label %for.body, label %exit
+exit:
+ ret void
+}
+
+declare {}* @llvm.invariant.start.p0i8(i64, i8* nocapture) nounwind readonly
+
OpenPOWER on IntegriCloud