2 files changed, 213 insertions, 0 deletions
diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index c239edaf666..48fc9553fdd 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -789,6 +789,41 @@ public:
 };
 } // namespace
 
+
+/// Return true if we know how to rewrite all uses of the given alloca after
+/// hoisting it out of the loop.  The main concerns are a) potential captures
+/// and b) invariant.start markers which don't capture, but are no longer
+/// valid w/o a corresponding invariant.end.
+static bool canRewriteUsesOfAlloca(AllocaInst &AI) {
+  // TODO: This looks a lot like capture tracking, but we need to remove any
+  // invariant starts if we extend the lifetime of the alloca by hoisting it.
+  // We should probably refactor capture tracking into a form which allows us
+  // to reuse the relevant bits and remove the duplicated logic here.
+
+  SmallVector<Use *, 16> Worklist;
+  for (Use &U : AI.uses())
+    Worklist.push_back(&U);
+  
+  unsigned NumUsesExplored = 0;
+  while (!Worklist.empty()) {
+    Use *U = Worklist.pop_back_val();
+    Instruction *I = cast<Instruction>(U->getUser());
+    NumUsesExplored++;
+    if (NumUsesExplored > DefaultMaxUsesToExplore)
+      return false;
+    // Non capturing, terminating uses
+    if (isa<LoadInst>(I) ||
+        (isa<StoreInst>(I) && U->getOperandNo() == 1))
+      continue;
+    // Non capturing, non-terminating
+    if (!isa<BitCastInst>(I) && !isa<GetElementPtrInst>(I))
+      return false;
+    for (Use &U : I->uses())
+      Worklist.push_back(&U);
+  }
+  return true;
+}
+
 /// Walk the specified region of the CFG (defined by all blocks dominated by
 /// the specified block, and that are in the current loop) in depth first
 /// order w.r.t the DominatorTree.  This allows us to visit definitions before
@@ -909,6 +944,16 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
         continue;
       }
 
+      if (isa<AllocaInst>(&I) &&
+          SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop) &&
+          canRewriteUsesOfAlloca(cast<AllocaInst>(I))) {
+        hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
+              MSSAU, SE, ORE);
+        HoistedInstructions.push_back(&I);
+        Changed = true;
+        continue;
+      }
+
       if (PHINode *PN = dyn_cast<PHINode>(&I)) {
         if (CFH.canHoistPHI(PN)) {
           // Redirect incoming blocks first to ensure that we create hoisted
diff --git a/llvm/test/Transforms/LICM/hoist-alloca.ll b/llvm/test/Transforms/LICM/hoist-alloca.ll
new file mode 100644
index 00000000000..8e4debac283
--- /dev/null
+++ b/llvm/test/Transforms/LICM/hoist-alloca.ll
@@ -0,0 +1,168 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -licm < %s | FileCheck %s
+
+@G = external global i64
+
+define void @test(i64 %n) {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A:%.*]] = alloca i64
+; CHECK-NEXT:    [[VAL:%.*]] = load i64, i64* [[A]]
+; CHECK-NEXT:    store i64 [[VAL]], i64* @G
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ult i64 [[IV]], [[N:%.*]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
+  %a = alloca i64
+  %val = load i64, i64* %a
+  store i64 %val, i64* @G
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp ult i64 %iv, %n
+  br i1 %exitcond, label %for.body, label %exit
+exit:
+  ret void
+}
+
+define void @test2(i64 %n) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A:%.*]] = alloca i64
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ult i64 [[IV]], [[N:%.*]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[IV_LCSSA:%.*]] = phi i64 [ [[IV]], [[FOR_BODY]] ]
+; CHECK-NEXT:    store i64 [[IV_LCSSA]], i64* [[A]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
+  %a = alloca i64
+  store i64 %iv, i64* %a
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp ult i64 %iv, %n
+  br i1 %exitcond, label %for.body, label %exit
+exit:
+  ret void
+}
+
+
+define void @test3(i64 %n) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A:%.*]] = alloca i64
+; CHECK-NEXT:    [[A_I8:%.*]] = bitcast i64* [[A]] to i8*
+; CHECK-NEXT:    [[A_OFFSET:%.*]] = getelementptr i8, i8* [[A_I8]], i64 4
+; CHECK-NEXT:    store i8 0, i8* [[A_OFFSET]]
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ult i64 [[IV]], [[N:%.*]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
+  %a = alloca i64
+  %a.i8 = bitcast i64* %a to i8*
+  %a.offset = getelementptr i8, i8* %a.i8, i64 4
+  store i8 0, i8* %a.offset
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp ult i64 %iv, %n
+  br i1 %exitcond, label %for.body, label %exit
+exit:
+  ret void
+}
+
+; This example is subtle.  Because the dynamic alloca isn't reclaimed until
+; end of function scope, the captured value can legally point to a dynamic
+; alloca stack region from a previous iteration.
+define void @test4(i64 %n) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[A:%.*]] = alloca i64
+; CHECK-NEXT:    store i64 [[IV]], i64* [[A]]
+; CHECK-NEXT:    call void @capture(i64* [[A]])
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ult i64 [[IV]], [[N:%.*]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
+  %a = alloca i64
+  store i64 %iv, i64* %a
+  %a.i8 = bitcast i64* %a to i8*
+  call void @capture(i64* %a)
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp ult i64 %iv, %n
+  br i1 %exitcond, label %for.body, label %exit
+exit:
+  ret void
+}
+declare void @capture(i64* %a)
+
+
+; TODO: not yet handled
+define void @test5(i64 %n) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[A:%.*]] = alloca i64
+; CHECK-NEXT:    store i64 [[IV]], i64* [[A]]
+; CHECK-NEXT:    [[A_I8:%.*]] = bitcast i64* [[A]] to i8*
+; CHECK-NEXT:    [[TMP0:%.*]] = call {}* @llvm.invariant.start.p0i8(i64 8, i8* [[A_I8]])
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ult i64 [[IV]], [[N:%.*]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
+  %a = alloca i64
+  store i64 %iv, i64* %a
+  %a.i8 = bitcast i64* %a to i8*
+  call {}* @llvm.invariant.start.p0i8(i64 8, i8* %a.i8)
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp ult i64 %iv, %n
+  br i1 %exitcond, label %for.body, label %exit
+exit:
+  ret void
+}
+
+declare {}* @llvm.invariant.start.p0i8(i64, i8* nocapture) nounwind readonly
+