Reapply "[GVN] Prevent LoadPRE from hoisting across instructions that don't pass control flow to successors"

This patch fixes the miscompile that happens when PRE hoists loads across guards and other instructions that don't always pass control flow to their successors. PRE is now prohibited to hoist across such instructions because there is no guarantee that the load standing after such instruction is still valid before such instruction. For example, a load from under a guard may be invalid before the guard in the following case: int array[LEN]; ... guard(0 <= index && index < LEN); use(array[index]); Differential Revision: https://reviews.llvm.org/D37460 llvm-svn: 316975
author: Max Kazantsev <max.kazantsev@azul.com> 2017-10-31 05:07:56 +0000
committer: Max Kazantsev <max.kazantsev@azul.com> 2017-10-31 05:07:56 +0000
commit: 488ec975bb418cc3fd230a9acdfdb1effc61670e (patch)
tree: 19232c65efdc198fc14d64e2f2728f5826a2bf10 /llvm/test
parent: 39a8dbff8760adfa55942c61d6e75dd432d30875 (diff)
download: bcm5719-llvm-488ec975bb418cc3fd230a9acdfdb1effc61670e.tar.gz
bcm5719-llvm-488ec975bb418cc3fd230a9acdfdb1effc61670e.zip
4 files changed, 429 insertions, 0 deletions
diff --git a/llvm/test/Transforms/GVN/PRE/2017-10-16-LoadPRECrash.ll b/llvm/test/Transforms/GVN/PRE/2017-10-16-LoadPRECrash.ll
index 5fbb0fcc511..14f65a4a24a 100644
--- a/llvm/test/Transforms/GVN/PRE/2017-10-16-LoadPRECrash.ll
+++ b/llvm/test/Transforms/GVN/PRE/2017-10-16-LoadPRECrash.ll
@@ -17,6 +17,13 @@ declare i64* @getaddr_i64(i64 addrspace(100)*) #0
 define hidden void @wrapon_fn173() {
 
 ; CHECK-LABEL: @wrapon_fn173
+; CHECK:       entry:
+; CHECK-NEXT:    call %ArrayImpl* @getaddr_ArrayImpl(%ArrayImpl addrspace(100)* undef)
+; CHECK-NEXT:    %.pre = load i64 addrspace(100)*, i64 addrspace(100)** null, align 8
+; CHECK-NEXT:    br label %loop
+; CHECK:       loop:
+; CHECK-NEXT:    call i64* @getaddr_i64(i64 addrspace(100)* %.pre)
+; CHECK-NEXT:    br label %loop
 
 entry:
   %0 = call %ArrayImpl* @getaddr_ArrayImpl(%ArrayImpl addrspace(100)* undef)
diff --git a/llvm/test/Transforms/GVN/PRE/pre-load-guards.ll b/llvm/test/Transforms/GVN/PRE/pre-load-guards.ll
new file mode 100644
index 00000000000..eeb3ef649d9
--- /dev/null
+++ b/llvm/test/Transforms/GVN/PRE/pre-load-guards.ll
@@ -0,0 +1,146 @@
+; RUN: opt < %s -basicaa -gvn -enable-load-pre -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+declare void @llvm.experimental.guard(i1, ...)
+
+; This is a motivating example on why we prohibit hoisting through guards.
+; In the bottom block, we check that the index is within bounds and only access
+; the element in this case and deoptimize otherwise. If we hoist the load to a
+; place above the guard, it will may lead to out-of-bound array access.
+define i32 @test_motivation(i32* %p, i32* %q, i1 %C, i32 %index, i32 %len) {
+; CHECK-LABEL: @test_motivation(
+block1:
+  %el1 = getelementptr inbounds i32, i32* %q, i32 %index
+  %el2 = getelementptr inbounds i32, i32* %p, i32 %index
+	br i1 %C, label %block2, label %block3
+
+block2:
+
+; CHECK:        block2:
+; CHECK-NEXT:     br
+; CHECK-NOT:      load
+; CHECK-NOT:      sge
+; CHECK-NOT:      slt
+; CHECK-NOT:      and
+  br label %block4
+
+block3:
+  store i32 0, i32* %el1
+  br label %block4
+
+block4:
+
+; CHECK:        block4:
+; CHECK:          %cond1 = icmp sge i32 %index, 0
+; CHECK-NEXT:     %cond2 = icmp slt i32 %index, %len
+; CHECK-NEXT:     %in.bounds = and i1 %cond1, %cond2
+; CHECK:          call void (i1, ...) @llvm.experimental.guard(i1 %in.bounds)
+; CHECK-NEXT:     %PRE = load i32, i32* %P2
+; CHECK:          ret i32 %PRE
+
+  %P2 = phi i32* [%el2, %block3], [%el1, %block2]
+  %cond1 = icmp sge i32 %index, 0
+  %cond2 = icmp slt i32 %index, %len
+  %in.bounds = and i1 %cond1, %cond2
+  call void (i1, ...) @llvm.experimental.guard(i1 %in.bounds) [ "deopt"() ]
+  %PRE = load i32, i32* %P2
+  ret i32 %PRE
+}
+
+; Guard in load's block that is above the load should prohibit the PRE.
+define i32 @test_guard_01(i32* %p, i32* %q, i1 %C, i1 %G) {
+; CHECK-LABEL: @test_guard_01(
+block1:
+	br i1 %C, label %block2, label %block3
+
+block2:
+
+; CHECK:        block2:
+; CHECK-NEXT:     br
+; CHECK-NOT:      load
+
+ br label %block4
+
+block3:
+  store i32 0, i32* %p
+  br label %block4
+
+block4:
+
+; CHECK:        block4:
+; CHECK:          call void (i1, ...) @llvm.experimental.guard(i1 %G)
+; CHECK-NEXT:     load
+; CHECK:          ret i32
+
+  %P2 = phi i32* [%p, %block3], [%q, %block2]
+  call void (i1, ...) @llvm.experimental.guard(i1 %G) [ "deopt"() ]
+  %PRE = load i32, i32* %P2
+  ret i32 %PRE
+}
+
+; Guard in load's block that is below the load should not prohibit the PRE.
+define i32 @test_guard_02(i32* %p, i32* %q, i1 %C, i1 %G) {
+; CHECK-LABEL: @test_guard_02(
+block1:
+	br i1 %C, label %block2, label %block3
+
+block2:
+
+; CHECK:        block2:
+; CHECK-NEXT:     load i32, i32* %q
+
+ br label %block4
+
+block3:
+  store i32 0, i32* %p
+  br label %block4
+
+block4:
+
+; CHECK:        block4:
+; CHECK-NEXT:     phi i32 [
+; CHECK-NEXT:     phi i32* [
+; CHECK-NEXT:     call void (i1, ...) @llvm.experimental.guard(i1 %G)
+; CHECK-NOT:      load
+; CHECK:          ret i32
+
+  %P2 = phi i32* [%p, %block3], [%q, %block2]
+  %PRE = load i32, i32* %P2
+  call void (i1, ...) @llvm.experimental.guard(i1 %G) [ "deopt"() ]
+  ret i32 %PRE
+}
+
+; Guard above the load's block should prevent PRE from hoisting through it.
+define i32 @test_guard_03(i32* %p, i32* %q, i1 %C, i1 %G) {
+; CHECK-LABEL: @test_guard_03(
+block1:
+	br i1 %C, label %block2, label %block3
+
+block2:
+
+; CHECK:        block2:
+; CHECK-NEXT:     br
+; CHECK-NOT:      load
+
+ br label %block4
+
+block3:
+  store i32 0, i32* %p
+  br label %block4
+
+block4:
+
+; CHECK:        block4:
+; CHECK-NEXT:     phi i32*
+; CHECK-NEXT:     call void (i1, ...) @llvm.experimental.guard(i1 %G)
+; CHECK-NEXT:     load
+; CHECK-NEXT:     ret i32
+
+  %P2 = phi i32* [%p, %block3], [%q, %block2]
+  call void (i1, ...) @llvm.experimental.guard(i1 %G) [ "deopt"() ]
+  br label %block5
+
+block5:
+  %PRE = load i32, i32* %P2
+  ret i32 %PRE
+}
diff --git a/llvm/test/Transforms/GVN/PRE/pre-load-implicit-cf-updates.ll b/llvm/test/Transforms/GVN/PRE/pre-load-implicit-cf-updates.ll
new file mode 100644
index 00000000000..c131e923997
--- /dev/null
+++ b/llvm/test/Transforms/GVN/PRE/pre-load-implicit-cf-updates.ll
@@ -0,0 +1,118 @@
+; RUN: opt -S -gvn -enable-load-pre < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; These tests exercise situations when instructions that were first instructions
+; with implicit control flow get removed. We make sure that after that we don't
+; face crashes and are still able to perform PRE correctly.
+
+declare i32 @foo(i32 %arg) #0
+
+define hidden void @test_01(i32 %x, i32 %y) {
+
+; c2 only throws if c1 throws, so it can be safely removed and then PRE can
+; hoist the load out of loop.
+
+; CHECK-LABEL: @test_01
+; CHECK:       entry:
+; CHECK-NEXT:    %c1 = call i32 @foo(i32 %x)
+; CHECK-NEXT:    %val.pre = load i32, i32* null, align 8
+; CHECK-NEXT:    br label %loop
+; CHECK:       loop:
+; CHECK-NEXT:    %c3 = call i32 @foo(i32 %val.pre)
+; CHECK-NEXT:    br label %loop
+
+entry:
+  %c1 = call i32 @foo(i32 %x)
+  br label %loop
+
+loop:
+  %c2 = call i32 @foo(i32 %x)
+  %val = load i32, i32* null, align 8
+  %c3 = call i32 @foo(i32 %val)
+  br label %loop
+}
+
+define hidden void @test_02(i32 %x, i32 %y) {
+
+; PRE is not allowed because c2 may throw.
+
+; CHECK-LABEL: @test_02
+; CHECK:       entry:
+; CHECK-NEXT:    %c1 = call i32 @foo(i32 %x)
+; CHECK-NEXT:    br label %loop
+; CHECK:       loop:
+; CHECK-NEXT:    %c2 = call i32 @foo(i32 %y)
+; CHECK-NEXT:    %val = load i32, i32* null, align 8
+; CHECK-NEXT:    %c3 = call i32 @foo(i32 %val)
+; CHECK-NEXT:    br label %loop
+
+entry:
+  %c1 = call i32 @foo(i32 %x)
+  br label %loop
+
+loop:
+  %c2 = call i32 @foo(i32 %y)
+  %val = load i32, i32* null, align 8
+  %c3 = call i32 @foo(i32 %val)
+  br label %loop
+}
+
+define hidden void @test_03(i32 %x, i32 %y) {
+
+; PRE of load is allowed because c2 only throws if c1 throws. c3 should
+; not be eliminated. c4 is eliminated because it only throws if c3 throws.
+
+; CHECK-LABEL: @test_03
+; CHECK:       entry:
+; CHECK-NEXT:    %c1 = call i32 @foo(i32 %x)
+; CHECK-NEXT:    %val.pre = load i32, i32* null, align 8
+; CHECK-NEXT:    br label %loop
+; CHECK:       loop:
+; CHECK-NEXT:    %c3 = call i32 @foo(i32 %y)
+; CHECK-NEXT:    %c5 = call i32 @foo(i32 %val.pre)
+; CHECK-NEXT:    br label %loop
+
+entry:
+  %c1 = call i32 @foo(i32 %x)
+  br label %loop
+
+loop:
+  %c2 = call i32 @foo(i32 %x)
+  %val = load i32, i32* null, align 8
+  %c3 = call i32 @foo(i32 %y)
+  %val2 = load i32, i32* null, align 8
+  %c4 = call i32 @foo(i32 %y)
+  %c5 = call i32 @foo(i32 %val)
+  br label %loop
+}
+
+define hidden void @test_04(i32 %x, i32 %y) {
+
+; PRE is not allowed even after we remove c2 because now c3 prevents us from it.
+
+; CHECK-LABEL: @test_04
+; CHECK:       entry:
+; CHECK-NEXT:    %c1 = call i32 @foo(i32 %x)
+; CHECK-NEXT:    br label %loop
+; CHECK:       loop:
+; CHECK-NEXT:    %c3 = call i32 @foo(i32 %y)
+; CHECK-NEXT:    %val = load i32, i32* null, align 8
+; CHECK-NEXT:    %c5 = call i32 @foo(i32 %val)
+; CHECK-NEXT:    br label %loop
+
+entry:
+  %c1 = call i32 @foo(i32 %x)
+  br label %loop
+
+loop:
+  %c2 = call i32 @foo(i32 %x)
+  %c3 = call i32 @foo(i32 %y)
+  %val = load i32, i32* null, align 8
+  %c4 = call i32 @foo(i32 %y)
+  %c5 = call i32 @foo(i32 %val)
+  br label %loop
+}
+
+attributes #0 = { readnone }
diff --git a/llvm/test/Transforms/GVN/PRE/pre-load.ll b/llvm/test/Transforms/GVN/PRE/pre-load.ll
index ffff2b7f08e..1d050f4b620 100644
--- a/llvm/test/Transforms/GVN/PRE/pre-load.ll
+++ b/llvm/test/Transforms/GVN/PRE/pre-load.ll
@@ -430,3 +430,161 @@ cleanup2:
   call void @g(i32 %NOTPRE)
   cleanupret from %c2 unwind to caller
 }
+
+; Don't PRE load across potentially throwing calls.
+
+define i32 @test13(i32* noalias nocapture readonly %x, i32* noalias nocapture %r, i32 %a) {
+
+; CHECK-LABEL: @test13(
+; CHECK: entry:
+; CHECK-NEXT: icmp eq
+; CHECK-NEXT: br i1
+
+entry:
+  %tobool = icmp eq i32 %a, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+; CHECK: if.then:
+; CHECK-NEXT: load i32
+; CHECK-NEXT: store i32
+
+if.then:
+  %uu = load i32, i32* %x, align 4
+  store i32 %uu, i32* %r, align 4
+  br label %if.end
+
+; CHECK: if.end:
+; CHECK-NEXT: call void @f()
+; CHECK-NEXT: load i32
+
+if.end:
+  call void @f()
+  %vv = load i32, i32* %x, align 4
+  ret i32 %vv
+}
+
+; Same as test13, but now the blocking function is not immediately in load's
+; block.
+
+define i32 @test14(i32* noalias nocapture readonly %x, i32* noalias nocapture %r, i32 %a) {
+
+; CHECK-LABEL: @test14(
+; CHECK: entry:
+; CHECK-NEXT: icmp eq
+; CHECK-NEXT: br i1
+
+entry:
+  %tobool = icmp eq i32 %a, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+; CHECK: if.then:
+; CHECK-NEXT: load i32
+; CHECK-NEXT: store i32
+
+if.then:
+  %uu = load i32, i32* %x, align 4
+  store i32 %uu, i32* %r, align 4
+  br label %if.end
+
+; CHECK: if.end:
+; CHECK-NEXT: call void @f()
+; CHECK-NEXT: load i32
+
+if.end:
+  call void @f()
+  br label %follow_1
+
+follow_1:
+  br label %follow_2
+
+follow_2:
+  %vv = load i32, i32* %x, align 4
+  ret i32 %vv
+}
+
+; Same as test13, but %x here is dereferenceable. A pointer that is
+; dereferenceable can be loaded from speculatively without a risk of trapping.
+; Since it is OK to speculate, PRE is allowed.
+
+define i32 @test15(i32* noalias nocapture readonly dereferenceable(8) %x, i32* noalias nocapture %r, i32 %a) {
+
+; CHECK-LABEL: @test15
+; CHECK: entry:
+; CHECK-NEXT: icmp eq
+; CHECK-NEXT: br i1
+
+entry:
+  %tobool = icmp eq i32 %a, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+; CHECK: entry.if.end_crit_edge:
+; CHECK-NEXT: %vv.pre = load i32, i32* %x, align 4
+; CHECK-NEXT: br label %if.end
+
+if.then:
+  %uu = load i32, i32* %x, align 4
+  store i32 %uu, i32* %r, align 4
+  br label %if.end
+
+; CHECK: if.then:
+; CHECK-NEXT: %uu = load i32, i32* %x, align 4
+; CHECK-NEXT: store i32 %uu, i32* %r, align 4
+; CHECK-NEXT: br label %if.end
+
+if.end:
+  call void @f()
+  %vv = load i32, i32* %x, align 4
+  ret i32 %vv
+
+; CHECK: if.end:
+; CHECK-NEXT: %vv = phi i32 [ %vv.pre, %entry.if.end_crit_edge ], [ %uu, %if.then ]
+; CHECK-NEXT: call void @f()
+; CHECK-NEXT: ret i32 %vv
+
+}
+
+; Same as test14, but %x here is dereferenceable. A pointer that is
+; dereferenceable can be loaded from speculatively without a risk of trapping.
+; Since it is OK to speculate, PRE is allowed.
+
+define i32 @test16(i32* noalias nocapture readonly dereferenceable(8) %x, i32* noalias nocapture %r, i32 %a) {
+
+; CHECK-LABEL: @test16(
+; CHECK: entry:
+; CHECK-NEXT: icmp eq
+; CHECK-NEXT: br i1
+
+entry:
+  %tobool = icmp eq i32 %a, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+; CHECK: entry.if.end_crit_edge:
+; CHECK-NEXT: %vv.pre = load i32, i32* %x, align 4
+; CHECK-NEXT: br label %if.end
+
+if.then:
+  %uu = load i32, i32* %x, align 4
+  store i32 %uu, i32* %r, align 4
+  br label %if.end
+
+; CHECK: if.then:
+; CHECK-NEXT: %uu = load i32, i32* %x, align 4
+; CHECK-NEXT: store i32 %uu, i32* %r, align 4
+; CHECK-NEXT: br label %if.end
+
+if.end:
+  call void @f()
+  br label %follow_1
+
+; CHECK: if.end:
+; CHECK-NEXT: %vv = phi i32 [ %vv.pre, %entry.if.end_crit_edge ], [ %uu, %if.then ]
+; CHECK-NEXT: call void @f()
+; CHECK-NEXT: ret i32 %vv
+
+follow_1:
+  br label %follow_2
+
+follow_2:
+  %vv = load i32, i32* %x, align 4
+  ret i32 %vv
+}
author	Max Kazantsev <max.kazantsev@azul.com>	2017-10-31 05:07:56 +0000
committer	Max Kazantsev <max.kazantsev@azul.com>	2017-10-31 05:07:56 +0000
commit	488ec975bb418cc3fd230a9acdfdb1effc61670e (patch)
tree	19232c65efdc198fc14d64e2f2728f5826a2bf10 /llvm/test
parent	39a8dbff8760adfa55942c61d6e75dd432d30875 (diff)
download	bcm5719-llvm-488ec975bb418cc3fd230a9acdfdb1effc61670e.tar.gz bcm5719-llvm-488ec975bb418cc3fd230a9acdfdb1effc61670e.zip