summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/include/llvm/Transforms/Utils/LoopUtils.h4
-rw-r--r--llvm/lib/Transforms/Scalar/LICM.cpp67
-rw-r--r--llvm/test/Transforms/LICM/promote-tls.ll133
3 files changed, 191 insertions, 13 deletions
diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
index 222fe5e89d1..9705e5c6872 100644
--- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
@@ -373,8 +373,8 @@ bool hoistRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *,
bool promoteLoopAccessesToScalars(AliasSet &, SmallVectorImpl<BasicBlock*> &,
SmallVectorImpl<Instruction*> &,
PredIteratorCache &, LoopInfo *,
- DominatorTree *, Loop *, AliasSetTracker *,
- LICMSafetyInfo *);
+ DominatorTree *, const TargetLibraryInfo *,
+ Loop *, AliasSetTracker *, LICMSafetyInfo *);
/// \brief Computes safety information for a loop
/// checks loop body & header for the possibility of may throw
diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index 0ac53ba92cb..daf76218e62 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -35,10 +35,12 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AliasSetTracker.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
@@ -223,7 +225,7 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
// Loop over all of the alias sets in the tracker object.
for (AliasSet &AS : *CurAST)
Changed |= promoteLoopAccessesToScalars(AS, ExitBlocks, InsertPts,
- PIC, LI, DT, CurLoop,
+ PIC, LI, DT, TLI, CurLoop,
CurAST, &SafetyInfo);
// Once we have promoted values across the loop body we have to recursively
@@ -846,7 +848,9 @@ bool llvm::promoteLoopAccessesToScalars(AliasSet &AS,
SmallVectorImpl<BasicBlock*>&ExitBlocks,
SmallVectorImpl<Instruction*>&InsertPts,
PredIteratorCache &PIC, LoopInfo *LI,
- DominatorTree *DT, Loop *CurLoop,
+ DominatorTree *DT,
+ const TargetLibraryInfo *TLI,
+ Loop *CurLoop,
AliasSetTracker *CurAST,
LICMSafetyInfo * SafetyInfo) {
// Verify inputs.
@@ -879,10 +883,25 @@ bool llvm::promoteLoopAccessesToScalars(AliasSet &AS,
//
// is not safe, because *P may only be valid to access if 'c' is true.
//
+ // The safety property divides into two parts:
+ // 1) The memory may not be dereferenceable on entry to the loop. In this
+ // case, we can't insert the required load in the preheader.
+ // 2) The memory model does not allow us to insert a store along any dynamic
+ // path which did not originally have one.
+ //
// It is safe to promote P if all uses are direct load/stores and if at
// least one is guaranteed to be executed.
bool GuaranteedToExecute = false;
+ // It is also safe to promote P if we can prove that speculating a load into
+ // the preheader is safe (i.e. proving dereferenceability on all
+ // paths through the loop), and that the memory can be proven thread local
+ // (so that the memory model requirement doesn't apply.) We first establish
+ // the former, and then run a capture analysis below to establish the later.
+ // We can use any access within the alias set to prove dereferenceability
+ // since they're all must alias.
+ bool CanSpeculateLoad = false;
+
SmallVector<Instruction*, 64> LoopUses;
SmallPtrSet<Value*, 4> PointerMustAliases;
@@ -892,6 +911,16 @@ bool llvm::promoteLoopAccessesToScalars(AliasSet &AS,
AAMDNodes AATags;
bool HasDedicatedExits = CurLoop->hasDedicatedExits();
+ // Don't sink stores from loops without dedicated block exits. Exits
+ // containing indirect branches are not transformed by loop simplify,
+ // make sure we catch that. An additional load may be generated in the
+ // preheader for SSA updater, so also avoid sinking when no preheader
+ // is available.
+ if (!HasDedicatedExits || !Preheader)
+ return false;
+
+ const DataLayout &MDL = Preheader->getModule()->getDataLayout();
+
// Check that all of the pointers in the alias set have the same type. We
// cannot (yet) promote a memory location that is loaded and stored in
// different sizes. While we are at it, collect alignment and AA info.
@@ -918,6 +947,12 @@ bool llvm::promoteLoopAccessesToScalars(AliasSet &AS,
assert(!Load->isVolatile() && "AST broken");
if (!Load->isSimple())
return Changed;
+
+ if (!GuaranteedToExecute && !CanSpeculateLoad)
+ CanSpeculateLoad =
+ isSafeToExecuteUnconditionally(*Load, DT, TLI, CurLoop,
+ SafetyInfo,
+ Preheader->getTerminator());
} else if (const StoreInst *Store = dyn_cast<StoreInst>(UI)) {
// Stores *of* the pointer are not interesting, only stores *to* the
// pointer.
@@ -926,13 +961,6 @@ bool llvm::promoteLoopAccessesToScalars(AliasSet &AS,
assert(!Store->isVolatile() && "AST broken");
if (!Store->isSimple())
return Changed;
- // Don't sink stores from loops without dedicated block exits. Exits
- // containing indirect branches are not transformed by loop simplify,
- // make sure we catch that. An additional load may be generated in the
- // preheader for SSA updater, so also avoid sinking when no preheader
- // is available.
- if (!HasDedicatedExits || !Preheader)
- return Changed;
// Note that we only check GuaranteedToExecute inside the store case
// so that we do not introduce stores where they did not exist before
@@ -953,6 +981,14 @@ bool llvm::promoteLoopAccessesToScalars(AliasSet &AS,
GuaranteedToExecute = isGuaranteedToExecute(*UI, DT,
CurLoop, SafetyInfo);
+
+ if (!GuaranteedToExecute && !CanSpeculateLoad) {
+ CanSpeculateLoad =
+ isDereferenceableAndAlignedPointer(Store->getPointerOperand(),
+ Store->getAlignment(), MDL,
+ Preheader->getTerminator(),
+ DT, TLI);
+ }
} else
return Changed; // Not a load or store.
@@ -968,8 +1004,17 @@ bool llvm::promoteLoopAccessesToScalars(AliasSet &AS,
}
}
- // If there isn't a guaranteed-to-execute instruction, we can't promote.
- if (!GuaranteedToExecute)
+ // Check legality per comment above. Otherwise, we can't promote.
+ bool PromotionIsLegal = GuaranteedToExecute;
+ if (!PromotionIsLegal && CanSpeculateLoad) {
+ // If this is a thread local location, then we can insert stores along
+ // paths which originally didn't have them without violating the memory
+ // model.
+ Value *Object = GetUnderlyingObject(SomePtr, MDL);
+ PromotionIsLegal = isAllocLikeFn(Object, TLI) &&
+ !PointerMayBeCaptured(Object, true, true);
+ }
+ if (!PromotionIsLegal)
return Changed;
// Figure out the loop exits and their insertion points, if this is the
diff --git a/llvm/test/Transforms/LICM/promote-tls.ll b/llvm/test/Transforms/LICM/promote-tls.ll
new file mode 100644
index 00000000000..7763114ef45
--- /dev/null
+++ b/llvm/test/Transforms/LICM/promote-tls.ll
@@ -0,0 +1,133 @@
+; RUN: opt -tbaa -basicaa -licm -S < %s | FileCheck %s
+
+; If we can prove a local is thread local, we can insert stores during
+; promotion which wouldn't be legal otherwise.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-linux-generic"
+
+@p = external global i8*
+
+declare i8* @malloc(i64)
+
+; Exercise the TLS case
+define i32* @test(i32 %n) {
+entry:
+ ;; ignore the required null check for simplicity
+ %mem = call dereferenceable(16) noalias i8* @malloc(i64 16)
+ %addr = bitcast i8* %mem to i32*
+ br label %for.body.lr.ph
+
+for.body.lr.ph: ; preds = %entry
+ br label %for.header
+
+for.header:
+ %i.02 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+ %old = load i32, i32* %addr, align 4
+ ; deliberate impossible to analyze branch
+ %guard = load volatile i8*, i8** @p
+ %exitcmp = icmp eq i8* %guard, null
+ br i1 %exitcmp, label %for.body, label %early-exit
+
+early-exit:
+; CHECK-LABEL: early-exit:
+; CHECK: store i32 %new1.lcssa, i32* %addr, align 1
+ ret i32* null
+
+for.body:
+ %new = add i32 %old, 1
+ store i32 %new, i32* %addr, align 4
+ %inc = add nsw i32 %i.02, 1
+ %cmp = icmp slt i32 %inc, %n
+ br i1 %cmp, label %for.header, label %for.cond.for.end_crit_edge
+
+for.cond.for.end_crit_edge: ; preds = %for.body
+; CHECK-LABEL: for.cond.for.end_crit_edge:
+; CHECK: store i32 %new.lcssa, i32* %addr, align 1
+ %split = phi i32* [ %addr, %for.body ]
+ ret i32* null
+}
+
+declare i8* @not_malloc(i64)
+
+; Negative test - not TLS
+define i32* @test_neg(i32 %n) {
+entry:
+ ;; ignore the required null check for simplicity
+ %mem = call dereferenceable(16) noalias i8* @not_malloc(i64 16)
+ %addr = bitcast i8* %mem to i32*
+ br label %for.body.lr.ph
+
+for.body.lr.ph: ; preds = %entry
+ br label %for.header
+
+for.header:
+ %i.02 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+ %old = load i32, i32* %addr, align 4
+ ; deliberate impossible to analyze branch
+ %guard = load volatile i8*, i8** @p
+ %exitcmp = icmp eq i8* %guard, null
+ br i1 %exitcmp, label %for.body, label %early-exit
+
+early-exit:
+; CHECK-LABEL: early-exit:
+; CHECK-NOT: store
+ ret i32* null
+
+for.body:
+; CHECK-LABEL: for.body:
+; CHECK: store i32 %new, i32* %addr, align 4
+ %new = add i32 %old, 1
+ store i32 %new, i32* %addr, align 4
+ %inc = add nsw i32 %i.02, 1
+ %cmp = icmp slt i32 %inc, %n
+ br i1 %cmp, label %for.header, label %for.cond.for.end_crit_edge
+
+for.cond.for.end_crit_edge: ; preds = %for.body
+; CHECK-LABEL: for.cond.for.end_crit_edge:
+; CHECK-NOT: store
+ %split = phi i32* [ %addr, %for.body ]
+ ret i32* null
+}
+
+; Negative test - can't speculate load since branch
+; may control alignment
+define i32* @test_neg2(i32 %n) {
+entry:
+ ;; ignore the required null check for simplicity
+ %mem = call dereferenceable(16) noalias i8* @malloc(i64 16)
+ %addr = bitcast i8* %mem to i32*
+ br label %for.body.lr.ph
+
+for.body.lr.ph: ; preds = %entry
+ br label %for.header
+
+for.header:
+ %i.02 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+ ; deliberate impossible to analyze branch
+ %guard = load volatile i8*, i8** @p
+ %exitcmp = icmp eq i8* %guard, null
+ br i1 %exitcmp, label %for.body, label %early-exit
+
+early-exit:
+; CHECK-LABEL: early-exit:
+; CHECK-NOT: store
+ ret i32* null
+
+for.body:
+; CHECK-LABEL: for.body:
+; CHECK: store i32 %new, i32* %addr, align 4
+ %old = load i32, i32* %addr, align 4
+ %new = add i32 %old, 1
+ store i32 %new, i32* %addr, align 4
+ %inc = add nsw i32 %i.02, 1
+ %cmp = icmp slt i32 %inc, %n
+ br i1 %cmp, label %for.header, label %for.cond.for.end_crit_edge
+
+for.cond.for.end_crit_edge: ; preds = %for.body
+; CHECK-LABEL: for.cond.for.end_crit_edge:
+; CHECK-NOT: store
+ %split = phi i32* [ %addr, %for.body ]
+ ret i32* null
+}
+
OpenPOWER on IntegriCloud