summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Transforms/Scalar/LICM.cpp28
-rw-r--r--llvm/test/Transforms/LICM/scalar_promote.ll192
2 files changed, 216 insertions, 4 deletions
diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index eb887af9cb5..8a29540a9eb 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -912,14 +912,23 @@ bool llvm::promoteLoopAccessesToScalars(
//
// If at least one store is guaranteed to execute, both properties are
// satisfied, and promotion is legal.
+ //
// This, however, is not a necessary condition. Even if no store/load is
- // guaranteed to execute, we can still establish these properties:
- // (p1) by proving that hoisting the load into the preheader is
- // safe (i.e. proving dereferenceability on all paths through the loop). We
+ // guaranteed to execute, we can still establish these properties.
+ // We can establish (p1) by proving that hoisting the load into the preheader
+ // is safe (i.e. proving dereferenceability on all paths through the loop). We
// can use any access within the alias set to prove dereferenceability,
// since they're all must alias.
- // (p2) by proving the memory is thread-local, so the memory model
+ //
+ // There are two ways establish (p2):
+ // a) Prove the location is thread-local. In this case the memory model
// requirement does not apply, and stores are safe to insert.
+ // b) Prove a store dominates every exit block. In this case, if an exit
+ // blocks is reached, the original dynamic path would have taken us through
+ // the store, so inserting a store into the exit block is safe. Note that this
+ // is different from the store being guaranteed to execute. For instance,
+ // if an exception is thrown on the first iteration of the loop, the original
+ // store is never executed, but the exit blocks are not executed either.
bool DereferenceableInPH = false;
bool SafeToInsertStore = false;
@@ -1001,6 +1010,17 @@ bool llvm::promoteLoopAccessesToScalars(
}
}
+ // If a store dominates all exit blocks, it is safe to sink.
+ // As explained above, if an exit block was executed, a dominating
+ // store must have been been executed at least once, so we are not
+ // introducing stores on paths that did not have them.
+ // Note that this only looks at explicit exit blocks. If we ever
+ // start sinking stores into unwind edges (see above), this will break.
+ if (!SafeToInsertStore)
+ SafeToInsertStore = llvm::all_of(ExitBlocks, [&](BasicBlock *Exit) {
+ return DT->dominates(Store->getParent(), Exit);
+ });
+
// If the store is not guaranteed to execute, we may still get
// deref info through it.
if (!DereferenceableInPH) {
diff --git a/llvm/test/Transforms/LICM/scalar_promote.ll b/llvm/test/Transforms/LICM/scalar_promote.ll
index 91cdbdbc226..dc5151be8a8 100644
--- a/llvm/test/Transforms/LICM/scalar_promote.ll
+++ b/llvm/test/Transforms/LICM/scalar_promote.ll
@@ -186,6 +186,198 @@ for.end: ; preds = %for.cond.for.end_cr
; CHECK-NEXT: store i32 %[[LCSSAPHI]], i32* %gi, align 4, !tbaa !0
}
+declare i32 @opaque(i32) argmemonly
+declare void @capture(i32*)
+
+; We can promote even if opaque may throw.
+define i32 @test7() {
+; CHECK-LABEL: @test7(
+; CHECK: entry:
+; CHECK-NEXT: %local = alloca
+; CHECK-NEXT: call void @capture(i32* %local)
+; CHECK-NEXT: load i32, i32* %local
+; CHECK-NEXT: br label %loop
+; CHECK: exit:
+; CHECK-NEXT: %[[LCSSAPHI:.*]] = phi i32 [ %x2, %loop ]
+; CHECK-NEXT: store i32 %[[LCSSAPHI]], i32* %local
+; CHECK-NEXT: %ret = load i32, i32* %local
+; CHECK-NEXT: ret i32 %ret
+entry:
+ %local = alloca i32
+ call void @capture(i32* %local)
+ br label %loop
+
+loop:
+ %j = phi i32 [ 0, %entry ], [ %next, %loop ]
+ %x = load i32, i32* %local
+ %x2 = call i32 @opaque(i32 %x) ; Note this does not capture %local
+ store i32 %x2, i32* %local
+ %next = add i32 %j, 1
+ %cond = icmp eq i32 %next, 0
+ br i1 %cond, label %exit, label %loop
+
+exit:
+ %ret = load i32, i32* %local
+ ret i32 %ret
+}
+
+; Make sure we don't promote if the store is really control-flow dependent.
+define i32 @test7bad() {
+; CHECK-LABEL: @test7bad(
+; CHECK: entry:
+; CHECK-NEXT: %local = alloca
+; CHECK-NEXT: call void @capture(i32* %local)
+; CHECK-NEXT: br label %loop
+; CHECK: if:
+; CHECK-NEXT: store i32 %x2, i32* %local
+; CHECK-NEXT: br label %else
+; CHECK: exit:
+; CHECK-NEXT: %ret = load i32, i32* %local
+; CHECK-NEXT: ret i32 %ret
+entry:
+ %local = alloca i32
+ call void @capture(i32* %local)
+ br label %loop
+loop:
+ %j = phi i32 [ 0, %entry ], [ %next, %else ]
+ %x = load i32, i32* %local
+ %x2 = call i32 @opaque(i32 %x) ; Note this does not capture %local
+ %cmp = icmp eq i32 %x2, 0
+ br i1 %cmp, label %if, label %else
+
+if:
+ store i32 %x2, i32* %local
+ br label %else
+
+else:
+ %next = add i32 %j, 1
+ %cond = icmp eq i32 %next, 0
+ br i1 %cond, label %exit, label %loop
+
+exit:
+ %ret = load i32, i32* %local
+ ret i32 %ret
+}
+
+; Even if neither the load nor the store or guaranteed to execute because
+; opaque() may throw, we can still promote - the load not being guaranteed
+; doesn't block us, because %local is always dereferenceable.
+define i32 @test8() {
+; CHECK-LABEL: @test8(
+; CHECK: entry:
+; CHECK-NEXT: %local = alloca
+; CHECK-NEXT: call void @capture(i32* %local)
+; CHECK-NEXT: load i32, i32* %local
+; CHECK-NEXT: br label %loop
+; CHECK: exit:
+; CHECK-NEXT: %[[LCSSAPHI:.*]] = phi i32 [ %x2, %loop ]
+; CHECK-NEXT: store i32 %[[LCSSAPHI]], i32* %local
+; CHECK-NEXT: %ret = load i32, i32* %local
+; CHECK-NEXT: ret i32 %ret
+entry:
+ %local = alloca i32
+ call void @capture(i32* %local)
+ br label %loop
+
+loop:
+ %j = phi i32 [ 0, %entry ], [ %next, %loop ]
+ %throwaway = call i32 @opaque(i32 %j)
+ %x = load i32, i32* %local
+ %x2 = call i32 @opaque(i32 %x)
+ store i32 %x2, i32* %local
+ %next = add i32 %j, 1
+ %cond = icmp eq i32 %next, 0
+ br i1 %cond, label %exit, label %loop
+
+exit:
+ %ret = load i32, i32* %local
+ ret i32 %ret
+}
+
+
+; If the store is "guaranteed modulo exceptions", and the load depends on
+; control flow, we can only promote if the pointer is otherwise known to be
+; dereferenceable
+define i32 @test9() {
+; CHECK-LABEL: @test9(
+; CHECK: entry:
+; CHECK-NEXT: %local = alloca
+; CHECK-NEXT: call void @capture(i32* %local)
+; CHECK-NEXT: load i32, i32* %local
+; CHECK-NEXT: br label %loop
+; CHECK: exit:
+; CHECK-NEXT: %[[LCSSAPHI:.*]] = phi i32 [ %x2, %else ]
+; CHECK-NEXT: store i32 %[[LCSSAPHI]], i32* %local
+; CHECK-NEXT: %ret = load i32, i32* %local
+; CHECK-NEXT: ret i32 %ret
+entry:
+ %local = alloca i32
+ call void @capture(i32* %local)
+ br label %loop
+
+loop:
+ %j = phi i32 [ 0, %entry ], [ %next, %else ]
+ %j2 = call i32 @opaque(i32 %j)
+ %cmp = icmp eq i32 %j2, 0
+ br i1 %cmp, label %if, label %else
+
+if:
+ %x = load i32, i32* %local
+ br label %else
+
+else:
+ %x2 = phi i32 [ 0, %loop ], [ %x, %if]
+ store i32 %x2, i32* %local
+ %next = add i32 %j, 1
+ %cond = icmp eq i32 %next, 0
+ br i1 %cond, label %exit, label %loop
+
+exit:
+ %ret = load i32, i32* %local
+ ret i32 %ret
+}
+
+define i32 @test9bad(i32 %i) {
+; CHECK-LABEL: @test9bad(
+; CHECK: entry:
+; CHECK-NEXT: %local = alloca
+; CHECK-NEXT: call void @capture(i32* %local)
+; CHECK-NEXT: %notderef = getelementptr
+; CHECK-NEXT: br label %loop
+; CHECK: if:
+; CHECK-NEXT: load i32, i32* %notderef
+; CHECK-NEXT: br label %else
+; CHECK: exit:
+; CHECK-NEXT: %ret = load i32, i32* %notderef
+; CHECK-NEXT: ret i32 %ret
+entry:
+ %local = alloca i32
+ call void @capture(i32* %local)
+ %notderef = getelementptr i32, i32* %local, i32 %i
+ br label %loop
+
+loop:
+ %j = phi i32 [ 0, %entry ], [ %next, %else ]
+ %j2 = call i32 @opaque(i32 %j)
+ %cmp = icmp eq i32 %j2, 0
+ br i1 %cmp, label %if, label %else
+
+if:
+ %x = load i32, i32* %notderef
+ br label %else
+
+else:
+ %x2 = phi i32 [ 0, %loop ], [ %x, %if]
+ store i32 %x2, i32* %notderef
+ %next = add i32 %j, 1
+ %cond = icmp eq i32 %next, 0
+ br i1 %cond, label %exit, label %loop
+
+exit:
+ %ret = load i32, i32* %notderef
+ ret i32 %ret
+}
+
!0 = !{!4, !4, i64 0}
!1 = !{!"omnipotent char", !2}
!2 = !{!"Simple C/C++ TBAA"}
OpenPOWER on IntegriCloud