summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Analysis/Loads.cpp11
-rw-r--r--llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp6
-rw-r--r--llvm/test/Transforms/InstCombine/load.ll1
-rw-r--r--llvm/test/Transforms/InstCombine/select.ll38
-rw-r--r--llvm/test/Transforms/TailCallElim/reorder_load.ll27
5 files changed, 67 insertions, 16 deletions
diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp
index f5eb7055726..4838d856ae3 100644
--- a/llvm/lib/Analysis/Loads.cpp
+++ b/llvm/lib/Analysis/Loads.cpp
@@ -73,11 +73,6 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom,
Type *BaseType = nullptr;
unsigned BaseAlign = 0;
if (const AllocaInst *AI = dyn_cast<AllocaInst>(Base)) {
- // Loading directly from an alloca is trivially safe. We can't even look
- // through pointer casts here though, as that might change the size loaded.
- if (AI == V)
- return true;
-
// An alloca is safe to load from as load as it is suitably aligned.
BaseType = AI->getAllocatedType();
BaseAlign = AI->getAlignment();
@@ -86,12 +81,6 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom,
// overridden. Their size may change or they may be weak and require a test
// to determine if they were in fact provided.
if (!GV->mayBeOverridden()) {
- // Loading directly from the non-overridden global is trivially safe. We
- // can't even look through pointer casts here though, as that might change
- // the size loaded.
- if (GV == V)
- return true;
-
BaseType = GV->getType()->getElementType();
BaseAlign = GV->getAlignment();
}
diff --git a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
index b7580255150..6fe5e188b1a 100644
--- a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -63,6 +63,7 @@
#include "llvm/IR/CFG.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h"
@@ -86,6 +87,7 @@ STATISTIC(NumAccumAdded, "Number of accumulators introduced");
namespace {
struct TailCallElim : public FunctionPass {
const TargetTransformInfo *TTI;
+ const DataLayout *DL;
static char ID; // Pass identification, replacement for typeid
TailCallElim() : FunctionPass(ID) {
@@ -157,6 +159,8 @@ bool TailCallElim::runOnFunction(Function &F) {
if (skipOptnoneFunction(F))
return false;
+ DL = F.getParent()->getDataLayout();
+
bool AllCallsAreTailCalls = false;
bool Modified = markTails(F, AllCallsAreTailCalls);
if (AllCallsAreTailCalls)
@@ -450,7 +454,7 @@ bool TailCallElim::CanMoveAboveCall(Instruction *I, CallInst *CI) {
// being loaded from.
if (CI->mayWriteToMemory() ||
!isSafeToLoadUnconditionally(L->getPointerOperand(), L,
- L->getAlignment()))
+ L->getAlignment(), DL))
return false;
}
}
diff --git a/llvm/test/Transforms/InstCombine/load.ll b/llvm/test/Transforms/InstCombine/load.ll
index c8ce70a5c03..20d40e2ccfd 100644
--- a/llvm/test/Transforms/InstCombine/load.ll
+++ b/llvm/test/Transforms/InstCombine/load.ll
@@ -2,6 +2,7 @@
; This test makes sure that these instructions are properly eliminated.
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
@X = constant i32 42 ; <i32*> [#uses=2]
@X2 = constant i32 47 ; <i32*> [#uses=1]
diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll
index d625f3b1b33..d1d8b888713 100644
--- a/llvm/test/Transforms/InstCombine/select.ll
+++ b/llvm/test/Transforms/InstCombine/select.ll
@@ -1236,3 +1236,41 @@ define i32 @test75(i32 %x) {
; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 68, i32 %x
; CHECK-NEXT: ret i32 [[SEL]]
}
+
+@under_aligned = external global i32, align 1
+
+define i32 @test76(i1 %flag, i32* %x) {
+; The load here must not be speculated around the select. One side of the
+; select is trivially dereferencable but may have a lower alignment than the
+; load does.
+; CHECK-LABEL: @test76(
+; CHECK: store i32 0, i32* %x
+; CHECK: %[[P:.*]] = select i1 %flag, i32* @under_aligned, i32* %x
+; CHECK: load i32* %[[P]]
+
+ store i32 0, i32* %x
+ %p = select i1 %flag, i32* @under_aligned, i32* %x
+ %v = load i32* %p
+ ret i32 %v
+}
+
+declare void @scribble_on_memory(i32*)
+
+define i32 @test77(i1 %flag, i32* %x) {
+; The load here must not be speculated around the select. One side of the
+; select is trivially dereferencable but may have a lower alignment than the
+; load does.
+; CHECK-LABEL: @test77(
+; CHECK: %[[A:.*]] = alloca i32, align 1
+; CHECK: call void @scribble_on_memory(i32* %[[A]])
+; CHECK: store i32 0, i32* %x
+; CHECK: %[[P:.*]] = select i1 %flag, i32* %[[A]], i32* %x
+; CHECK: load i32* %[[P]]
+
+ %under_aligned = alloca i32, align 1
+ call void @scribble_on_memory(i32* %under_aligned)
+ store i32 0, i32* %x
+ %p = select i1 %flag, i32* %under_aligned, i32* %x
+ %v = load i32* %p
+ ret i32 %v
+}
diff --git a/llvm/test/Transforms/TailCallElim/reorder_load.ll b/llvm/test/Transforms/TailCallElim/reorder_load.ll
index 53c65dab101..2e350d662a3 100644
--- a/llvm/test/Transforms/TailCallElim/reorder_load.ll
+++ b/llvm/test/Transforms/TailCallElim/reorder_load.ll
@@ -1,6 +1,8 @@
; RUN: opt < %s -tailcallelim -S | FileCheck %s
; PR4323
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
; Several cases where tail call elimination should move the load above the call,
; then eliminate the tail recursion.
@@ -12,6 +14,11 @@
; This load can be moved above the call because the function won't write to it
; and the call has no side effects.
define fastcc i32 @raise_load_1(i32* %a_arg, i32 %a_len_arg, i32 %start_arg) nounwind readonly {
+; CHECK-LABEL: @raise_load_1(
+; CHECK-NOT: call
+; CHECK: load i32*
+; CHECK-NOT: call
+; CHECK: }
entry:
%tmp2 = icmp sge i32 %start_arg, %a_len_arg ; <i1> [#uses=1]
br i1 %tmp2, label %if, label %else
@@ -21,7 +28,6 @@ if: ; preds = %entry
else: ; preds = %entry
%tmp7 = add i32 %start_arg, 1 ; <i32> [#uses=1]
-; CHECK-NOT: call
%tmp8 = call fastcc i32 @raise_load_1(i32* %a_arg, i32 %a_len_arg, i32 %tmp7) ; <i32> [#uses=1]
%tmp9 = load i32* %a_arg ; <i32> [#uses=1]
%tmp10 = add i32 %tmp9, %tmp8 ; <i32> [#uses=1]
@@ -32,6 +38,11 @@ else: ; preds = %entry
; This load can be moved above the call because the function won't write to it
; and the load provably can't trap.
define fastcc i32 @raise_load_2(i32* %a_arg, i32 %a_len_arg, i32 %start_arg) readonly {
+; CHECK-LABEL: @raise_load_2(
+; CHECK-NOT: call
+; CHECK: load i32*
+; CHECK-NOT: call
+; CHECK: }
entry:
%tmp2 = icmp sge i32 %start_arg, %a_len_arg ; <i1> [#uses=1]
br i1 %tmp2, label %if, label %else
@@ -48,7 +59,6 @@ unwind: ; preds = %else
recurse: ; preds = %else
%tmp7 = add i32 %start_arg, 1 ; <i32> [#uses=1]
-; CHECK-NOT: call
%tmp8 = call fastcc i32 @raise_load_2(i32* %a_arg, i32 %a_len_arg, i32 %tmp7) ; <i32> [#uses=1]
%tmp9 = load i32* @global ; <i32> [#uses=1]
%tmp10 = add i32 %tmp9, %tmp8 ; <i32> [#uses=1]
@@ -59,6 +69,11 @@ recurse: ; preds = %else
; This load can be safely moved above the call (even though it's from an
; extern_weak global) because the call has no side effects.
define fastcc i32 @raise_load_3(i32* %a_arg, i32 %a_len_arg, i32 %start_arg) nounwind readonly {
+; CHECK-LABEL: @raise_load_3(
+; CHECK-NOT: call
+; CHECK: load i32*
+; CHECK-NOT: call
+; CHECK: }
entry:
%tmp2 = icmp sge i32 %start_arg, %a_len_arg ; <i1> [#uses=1]
br i1 %tmp2, label %if, label %else
@@ -68,7 +83,6 @@ if: ; preds = %entry
else: ; preds = %entry
%tmp7 = add i32 %start_arg, 1 ; <i32> [#uses=1]
-; CHECK-NOT: call
%tmp8 = call fastcc i32 @raise_load_3(i32* %a_arg, i32 %a_len_arg, i32 %tmp7) ; <i32> [#uses=1]
%tmp9 = load i32* @extern_weak_global ; <i32> [#uses=1]
%tmp10 = add i32 %tmp9, %tmp8 ; <i32> [#uses=1]
@@ -80,6 +94,12 @@ else: ; preds = %entry
; unknown pointer (which normally means it might trap) because the first load
; proves it doesn't trap.
define fastcc i32 @raise_load_4(i32* %a_arg, i32 %a_len_arg, i32 %start_arg) readonly {
+; CHECK-LABEL: @raise_load_4(
+; CHECK-NOT: call
+; CHECK: load i32*
+; CHECK-NEXT: load i32*
+; CHECK-NOT: call
+; CHECK: }
entry:
%tmp2 = icmp sge i32 %start_arg, %a_len_arg ; <i1> [#uses=1]
br i1 %tmp2, label %if, label %else
@@ -97,7 +117,6 @@ unwind: ; preds = %else
recurse: ; preds = %else
%tmp7 = add i32 %start_arg, 1 ; <i32> [#uses=1]
%first = load i32* %a_arg ; <i32> [#uses=1]
-; CHECK-NOT: call
%tmp8 = call fastcc i32 @raise_load_4(i32* %a_arg, i32 %first, i32 %tmp7) ; <i32> [#uses=1]
%second = load i32* %a_arg ; <i32> [#uses=1]
%tmp10 = add i32 %second, %tmp8 ; <i32> [#uses=1]
OpenPOWER on IntegriCloud