summaryrefslogtreecommitdiffstats
path: root/llvm/test/Transforms
diff options
context:
space:
mode:
authorChandler Carruth <chandlerc@gmail.com>2014-10-19 08:17:50 +0000
committerChandler Carruth <chandlerc@gmail.com>2014-10-19 08:17:50 +0000
commita801dd5799bd6f6641ce3a7ea48f32d274ef95a0 (patch)
tree560a472c66fbfae4ebb35d7040168d6787b365a2 /llvm/test/Transforms
parent0c28bc20da65169dac6133be5e230b7c5c0914c1 (diff)
downloadbcm5719-llvm-a801dd5799bd6f6641ce3a7ea48f32d274ef95a0.tar.gz
bcm5719-llvm-a801dd5799bd6f6641ce3a7ea48f32d274ef95a0.zip
Fix a long-standing miscompile in the load analysis that was uncovered
by my refactoring of this code. The method isSafeToLoadUnconditionally assumes that the load will proceed with the preferred type alignment. Given that, it has to ensure that the alloca or global is at least that aligned. It has always done this historically when a datalayout is present, but has never checked it when the datalayout is absent. When I refactored the code in r220156, I exposed this path when datalayout was present and that turned the latent bug into a patent bug. This fixes the issue by just removing the special case which allows folding things without datalayout. This isn't worth the complexity of trying to tease apart when it is or isn't safe without actually knowing the preferred alignment. llvm-svn: 220161
Diffstat (limited to 'llvm/test/Transforms')
-rw-r--r--llvm/test/Transforms/InstCombine/load.ll1
-rw-r--r--llvm/test/Transforms/InstCombine/select.ll38
-rw-r--r--llvm/test/Transforms/TailCallElim/reorder_load.ll27
3 files changed, 62 insertions, 4 deletions
diff --git a/llvm/test/Transforms/InstCombine/load.ll b/llvm/test/Transforms/InstCombine/load.ll
index c8ce70a5c03..20d40e2ccfd 100644
--- a/llvm/test/Transforms/InstCombine/load.ll
+++ b/llvm/test/Transforms/InstCombine/load.ll
@@ -2,6 +2,7 @@
; This test makes sure that these instructions are properly eliminated.
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
@X = constant i32 42 ; <i32*> [#uses=2]
@X2 = constant i32 47 ; <i32*> [#uses=1]
diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll
index d625f3b1b33..d1d8b888713 100644
--- a/llvm/test/Transforms/InstCombine/select.ll
+++ b/llvm/test/Transforms/InstCombine/select.ll
@@ -1236,3 +1236,41 @@ define i32 @test75(i32 %x) {
; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 68, i32 %x
; CHECK-NEXT: ret i32 [[SEL]]
}
+
+@under_aligned = external global i32, align 1
+
+define i32 @test76(i1 %flag, i32* %x) {
+; The load here must not be speculated around the select. One side of the
+; select is trivially dereferencable but may have a lower alignment than the
+; load does.
+; CHECK-LABEL: @test76(
+; CHECK: store i32 0, i32* %x
+; CHECK: %[[P:.*]] = select i1 %flag, i32* @under_aligned, i32* %x
+; CHECK: load i32* %[[P]]
+
+ store i32 0, i32* %x
+ %p = select i1 %flag, i32* @under_aligned, i32* %x
+ %v = load i32* %p
+ ret i32 %v
+}
+
+declare void @scribble_on_memory(i32*)
+
+define i32 @test77(i1 %flag, i32* %x) {
+; The load here must not be speculated around the select. One side of the
+; select is trivially dereferencable but may have a lower alignment than the
+; load does.
+; CHECK-LABEL: @test77(
+; CHECK: %[[A:.*]] = alloca i32, align 1
+; CHECK: call void @scribble_on_memory(i32* %[[A]])
+; CHECK: store i32 0, i32* %x
+; CHECK: %[[P:.*]] = select i1 %flag, i32* %[[A]], i32* %x
+; CHECK: load i32* %[[P]]
+
+ %under_aligned = alloca i32, align 1
+ call void @scribble_on_memory(i32* %under_aligned)
+ store i32 0, i32* %x
+ %p = select i1 %flag, i32* %under_aligned, i32* %x
+ %v = load i32* %p
+ ret i32 %v
+}
diff --git a/llvm/test/Transforms/TailCallElim/reorder_load.ll b/llvm/test/Transforms/TailCallElim/reorder_load.ll
index 53c65dab101..2e350d662a3 100644
--- a/llvm/test/Transforms/TailCallElim/reorder_load.ll
+++ b/llvm/test/Transforms/TailCallElim/reorder_load.ll
@@ -1,6 +1,8 @@
; RUN: opt < %s -tailcallelim -S | FileCheck %s
; PR4323
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
; Several cases where tail call elimination should move the load above the call,
; then eliminate the tail recursion.
@@ -12,6 +14,11 @@
; This load can be moved above the call because the function won't write to it
; and the call has no side effects.
define fastcc i32 @raise_load_1(i32* %a_arg, i32 %a_len_arg, i32 %start_arg) nounwind readonly {
+; CHECK-LABEL: @raise_load_1(
+; CHECK-NOT: call
+; CHECK: load i32*
+; CHECK-NOT: call
+; CHECK: }
entry:
%tmp2 = icmp sge i32 %start_arg, %a_len_arg ; <i1> [#uses=1]
br i1 %tmp2, label %if, label %else
@@ -21,7 +28,6 @@ if: ; preds = %entry
else: ; preds = %entry
%tmp7 = add i32 %start_arg, 1 ; <i32> [#uses=1]
-; CHECK-NOT: call
%tmp8 = call fastcc i32 @raise_load_1(i32* %a_arg, i32 %a_len_arg, i32 %tmp7) ; <i32> [#uses=1]
%tmp9 = load i32* %a_arg ; <i32> [#uses=1]
%tmp10 = add i32 %tmp9, %tmp8 ; <i32> [#uses=1]
@@ -32,6 +38,11 @@ else: ; preds = %entry
; This load can be moved above the call because the function won't write to it
; and the load provably can't trap.
define fastcc i32 @raise_load_2(i32* %a_arg, i32 %a_len_arg, i32 %start_arg) readonly {
+; CHECK-LABEL: @raise_load_2(
+; CHECK-NOT: call
+; CHECK: load i32*
+; CHECK-NOT: call
+; CHECK: }
entry:
%tmp2 = icmp sge i32 %start_arg, %a_len_arg ; <i1> [#uses=1]
br i1 %tmp2, label %if, label %else
@@ -48,7 +59,6 @@ unwind: ; preds = %else
recurse: ; preds = %else
%tmp7 = add i32 %start_arg, 1 ; <i32> [#uses=1]
-; CHECK-NOT: call
%tmp8 = call fastcc i32 @raise_load_2(i32* %a_arg, i32 %a_len_arg, i32 %tmp7) ; <i32> [#uses=1]
%tmp9 = load i32* @global ; <i32> [#uses=1]
%tmp10 = add i32 %tmp9, %tmp8 ; <i32> [#uses=1]
@@ -59,6 +69,11 @@ recurse: ; preds = %else
; This load can be safely moved above the call (even though it's from an
; extern_weak global) because the call has no side effects.
define fastcc i32 @raise_load_3(i32* %a_arg, i32 %a_len_arg, i32 %start_arg) nounwind readonly {
+; CHECK-LABEL: @raise_load_3(
+; CHECK-NOT: call
+; CHECK: load i32*
+; CHECK-NOT: call
+; CHECK: }
entry:
%tmp2 = icmp sge i32 %start_arg, %a_len_arg ; <i1> [#uses=1]
br i1 %tmp2, label %if, label %else
@@ -68,7 +83,6 @@ if: ; preds = %entry
else: ; preds = %entry
%tmp7 = add i32 %start_arg, 1 ; <i32> [#uses=1]
-; CHECK-NOT: call
%tmp8 = call fastcc i32 @raise_load_3(i32* %a_arg, i32 %a_len_arg, i32 %tmp7) ; <i32> [#uses=1]
%tmp9 = load i32* @extern_weak_global ; <i32> [#uses=1]
%tmp10 = add i32 %tmp9, %tmp8 ; <i32> [#uses=1]
@@ -80,6 +94,12 @@ else: ; preds = %entry
; unknown pointer (which normally means it might trap) because the first load
; proves it doesn't trap.
define fastcc i32 @raise_load_4(i32* %a_arg, i32 %a_len_arg, i32 %start_arg) readonly {
+; CHECK-LABEL: @raise_load_4(
+; CHECK-NOT: call
+; CHECK: load i32*
+; CHECK-NEXT: load i32*
+; CHECK-NOT: call
+; CHECK: }
entry:
%tmp2 = icmp sge i32 %start_arg, %a_len_arg ; <i1> [#uses=1]
br i1 %tmp2, label %if, label %else
@@ -97,7 +117,6 @@ unwind: ; preds = %else
recurse: ; preds = %else
%tmp7 = add i32 %start_arg, 1 ; <i32> [#uses=1]
%first = load i32* %a_arg ; <i32> [#uses=1]
-; CHECK-NOT: call
%tmp8 = call fastcc i32 @raise_load_4(i32* %a_arg, i32 %first, i32 %tmp7) ; <i32> [#uses=1]
%second = load i32* %a_arg ; <i32> [#uses=1]
%tmp10 = add i32 %second, %tmp8 ; <i32> [#uses=1]
OpenPOWER on IntegriCloud