summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Transforms/Scalar/LICM.cpp83
-rw-r--r--llvm/test/Transforms/LICM/argmemonly-call.ll31
-rw-r--r--llvm/test/Transforms/LICM/invariant.start.ll42
-rw-r--r--llvm/test/Transforms/LICM/read-only-calls.ll85
4 files changed, 219 insertions, 22 deletions
diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index f576661363d..ee187c1d601 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -89,6 +89,13 @@ static cl::opt<uint32_t> MaxNumUsesTraversed(
cl::desc("Max num uses visited for identifying load "
"invariance in loop using invariant start (default = 8)"));
+// Default value of zero implies we use the regular alias set tracker mechanism
+// instead of the cross product using AA to identify aliasing of the memory
+// location we are interested in.
+static cl::opt<int>
+LICMN2Theshold("licm-n2-threshold", cl::Hidden, cl::init(0),
+ cl::desc("How many instruction to cross product using AA"));
+
static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
const LoopSafetyInfo *SafetyInfo,
@@ -105,8 +112,10 @@ static bool isSafeToExecuteUnconditionally(Instruction &Inst,
const LoopSafetyInfo *SafetyInfo,
OptimizationRemarkEmitter *ORE,
const Instruction *CtxI = nullptr);
-static bool isInvalidatedByLoop(const MemoryLocation &MemLoc,
- AliasSetTracker *CurAST);
+static bool pointerInvalidatedByLoop(MemoryLocation MemLoc,
+ AliasSetTracker *CurAST, Loop *CurLoop,
+ AliasAnalysis *AA);
+
static Instruction *
CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN,
const LoopInfo *LI,
@@ -628,7 +637,16 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
if (isLoadInvariantInLoop(LI, DT, CurLoop))
return true;
- bool Invalidated = isInvalidatedByLoop(MemoryLocation::get(LI), CurAST);
+ // Don't hoist loads which have may-aliased stores in loop.
+ uint64_t Size = 0;
+ if (LI->getType()->isSized())
+ Size = I.getModule()->getDataLayout().getTypeStoreSize(LI->getType());
+
+ AAMDNodes AAInfo;
+ LI->getAAMetadata(AAInfo);
+
+ bool Invalidated = pointerInvalidatedByLoop(
+ MemoryLocation(LI->getOperand(0), Size, AAInfo), CurAST, CurLoop, AA);
// Check loop-invariant address because this may also be a sinkable load
// whose address is not necessarily loop-invariant.
if (ORE && Invalidated && CurLoop->isLoopInvariant(LI->getPointerOperand()))
@@ -669,10 +687,9 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
if (AliasAnalysis::onlyAccessesArgPointees(Behavior)) {
for (Value *Op : CI->arg_operands())
if (Op->getType()->isPointerTy() &&
- isInvalidatedByLoop(MemoryLocation(Op,
- MemoryLocation::UnknownSize,
- AAMDNodes()),
- CurAST))
+ pointerInvalidatedByLoop(
+ MemoryLocation(Op, MemoryLocation::UnknownSize, AAMDNodes()),
+ CurAST, CurLoop, AA))
return false;
return true;
}
@@ -1569,13 +1586,51 @@ void LegacyLICMPass::deleteAnalysisLoop(Loop *L) {
LICM.getLoopToAliasSetMap().erase(L);
}
-/// Return true if the body of this loop may store into the memory
-/// location pointed to by V.
-///
-static bool isInvalidatedByLoop(const MemoryLocation &MemLoc,
- AliasSetTracker *CurAST) {
- // Check to see if any of the basic blocks in CurLoop invalidate *V.
- return CurAST->getAliasSetFor(MemLoc).isMod();
+static bool pointerInvalidatedByLoop(MemoryLocation MemLoc,
+ AliasSetTracker *CurAST, Loop *CurLoop,
+ AliasAnalysis *AA) {
+ // First check to see if any of the basic blocks in CurLoop invalidate *V.
+ bool isInvalidatedAccordingToAST = CurAST->getAliasSetFor(MemLoc).isMod();
+
+ if (!isInvalidatedAccordingToAST || !LICMN2Theshold)
+ return isInvalidatedAccordingToAST;
+
+ // Check with a diagnostic analysis if we can refine the information above.
+ // This is to identify the limitations of using the AST.
+ // The alias set mechanism used by LICM has a major weakness in that it
+ // combines all things which may alias into a single set *before* asking
+ // modref questions. As a result, a single readonly call within a loop will
+ // collapse all loads and stores into a single alias set and report
+ // invalidation if the loop contains any store. For example, readonly calls
+ // with deopt states have this form and create a general alias set with all
+ // loads and stores. In order to get any LICM in loops containing possible
+ // deopt states we need a more precise invalidation of checking the mod ref
+ // info of each instruction within the loop and LI. This has a complexity of
+ // O(N^2), so currently, it is used only as a diagnostic tool since the
+ // default value of LICMN2Threshold is zero.
+
+ // Don't look at nested loops.
+ if (CurLoop->begin() != CurLoop->end())
+ return true;
+
+ int N = 0;
+ for (BasicBlock *BB : CurLoop->getBlocks())
+ for (Instruction &I : *BB) {
+ if (N >= LICMN2Theshold) {
+ LLVM_DEBUG(dbgs() << "Alasing N2 threshold exhausted for "
+ << *(MemLoc.Ptr) << "\n");
+ return true;
+ }
+ N++;
+ auto Res = AA->getModRefInfo(&I, MemLoc);
+ if (isModSet(Res)) {
+ LLVM_DEBUG(dbgs() << "Aliasing failed on " << I << " for "
+ << *(MemLoc.Ptr) << "\n");
+ return true;
+ }
+ }
+ LLVM_DEBUG(dbgs() << "Aliasing okay for " << *(MemLoc.Ptr) << "\n");
+ return false;
}
/// Little predicate that returns true if the specified basic block is in
diff --git a/llvm/test/Transforms/LICM/argmemonly-call.ll b/llvm/test/Transforms/LICM/argmemonly-call.ll
index fe7c6af6d6d..b7d7d6a3953 100644
--- a/llvm/test/Transforms/LICM/argmemonly-call.ll
+++ b/llvm/test/Transforms/LICM/argmemonly-call.ll
@@ -1,5 +1,8 @@
-; RUN: opt -S -basicaa -licm %s | FileCheck %s
-; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' < %s -S | FileCheck %s
+; RUN: opt -S -basicaa -licm -licm-n2-threshold=0 %s | FileCheck %s
+; RUN: opt -licm -basicaa -licm-n2-threshold=200 < %s -S | FileCheck %s --check-prefix=ALIAS-N2
+; RUN: opt -aa-pipeline=basic-aa -licm-n2-threshold=0 -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' < %s -S | FileCheck %s
+; RUN: opt -aa-pipeline=basic-aa -licm-n2-threshold=200 -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' < %s -S | FileCheck %s --check-prefix=ALIAS-N2
+
declare i32 @foo() readonly argmemonly nounwind
declare i32 @foo2() readonly nounwind
declare i32 @bar(i32* %loc2) readonly argmemonly nounwind
@@ -68,3 +71,27 @@ loop:
store i32 %res, i32* %loc
br label %loop
}
+
+declare i32 @foo_new(i32*) readonly
+; With the default AST mechanism used by LICM for alias analysis,
+; we clump foo_new with bar.
+; With the N2 Alias analysis diagnostic tool, we are able to hoist the
+; argmemonly bar call out of the loop.
+
+define void @test5(i32* %loc2, i32* noalias %loc) {
+; ALIAS-N2-LABEL: @test5
+; ALIAS-N2: @bar
+; ALIAS-N2-LABEL: loop:
+
+; CHECK-LABEL: @test5
+; CHECK-LABEL: loop:
+; CHECK: @bar
+ br label %loop
+
+loop:
+ %res1 = call i32 @bar(i32* %loc2)
+ %res = call i32 @foo_new(i32* %loc2)
+ store volatile i32 %res1, i32* %loc
+ br label %loop
+}
+
diff --git a/llvm/test/Transforms/LICM/invariant.start.ll b/llvm/test/Transforms/LICM/invariant.start.ll
index ff17bd096ce..3ac6793d185 100644
--- a/llvm/test/Transforms/LICM/invariant.start.ll
+++ b/llvm/test/Transforms/LICM/invariant.start.ll
@@ -1,7 +1,9 @@
-; RUN: opt -licm -basicaa < %s -S | FileCheck %s
-; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' < %s -S | FileCheck %s
+; RUN: opt -licm -basicaa -licm-n2-threshold=0 < %s -S | FileCheck %s
+; RUN: opt -licm -basicaa -licm-n2-threshold=200 < %s -S | FileCheck %s --check-prefix=ALIAS-N2
+; RUN: opt -aa-pipeline=basic-aa -licm-n2-threshold=0 -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' < %s -S | FileCheck %s
+; RUN: opt -aa-pipeline=basic-aa -licm-n2-threshold=200 -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' < %s -S | FileCheck %s --check-prefix=ALIAS-N2
-; TODO: should be able to hoist both load and invariant.start
+; TODO: By default (without the -licm-n2-threshold value), we should be able to hoist both load and invariant.start
define void @test1(i1 %cond, i32* %ptr) {
; CHECK-LABEL: @test1(
; CHECK-LABEL: entry:
@@ -9,6 +11,12 @@ define void @test1(i1 %cond, i32* %ptr) {
; CHECK: call {}* @llvm.invariant.start.p0i32(i64 4, i32* %ptr)
; CHECK: %val = load i32, i32* %ptr
+; ALIAS-N2-LABEL: @test1(
+; ALIAS-N2-LABEL: entry:
+; ALIAS-N2: %val = load i32, i32* %ptr
+; ALIAS-N2-LABEL: loop:
+; ALIAS-N2: call {}* @llvm.invariant.start.p0i32(i64 4, i32* %ptr)
+
entry:
br label %loop
@@ -20,7 +28,7 @@ loop:
br label %loop
}
-;; TODO: despite the loop varying invariant.start, we should be
+;; TODO: By default, despite the loop varying invariant.start, we should be
;; able to hoist the load
define void @test2(i1 %cond, i32* %ptr) {
; CHECK-LABEL: @test2(
@@ -28,7 +36,12 @@ define void @test2(i1 %cond, i32* %ptr) {
; CHECK-LABEL: loop:
; CHECK: call {}* @llvm.invariant.start.p0i32(i64 4, i32* %piv)
; CHECK: %val = load i32, i32* %ptr
-
+
+; ALIAS-N2-LABEL: @test2(
+; ALIAS-N2-LABEL: entry:
+; ALIAS-N2: %val = load i32, i32* %ptr
+; ALIAS-N2-LABEL: loop:
+; ALIAS-N2: call {}* @llvm.invariant.start.p0i32(i64 4, i32* %piv)
entry:
br label %loop
@@ -41,7 +54,7 @@ loop:
br label %loop
}
-; Should be able to hoist since store doesn't alias
+; By default, should be able to hoist since store doesn't alias
define void @test3(i1 %cond, i32* %ptr) {
; CHECK-LABEL: @test3(
; CHECK-LABEL: entry:
@@ -49,6 +62,11 @@ define void @test3(i1 %cond, i32* %ptr) {
; CHECK: call {}* @llvm.invariant.start.p0i32(i64 4, i32* %ptr)
; CHECK: %val = load i32, i32* %ptr
+; ALIAS-N2-LABEL: @test3(
+; ALIAS-N2-LABEL: entry:
+; ALIAS-N2: %val = load i32, i32* %ptr
+; ALIAS-N2-LABEL: loop:
+; ALIAS-N2: call {}* @llvm.invariant.start.p0i32(i64 4, i32* %ptr)
entry:
br label %loop
@@ -72,6 +90,12 @@ define void @test4(i1 %cond, i32* %ptr) {
; CHECK: call {}* @llvm.invariant.start.p0i32(i64 4, i32* %ptr)
; CHECK: %val = load i32, i32* %ptr
+; ALIAS-N2-LABEL: @test4(
+; ALIAS-N2-LABEL: entry:
+; ALIAS-N2-LABEL: loop:
+; ALIAS-N2: store i32 0, i32* %ptr
+; ALIAS-N2: call {}* @llvm.invariant.start.p0i32(i64 4, i32* %ptr)
+; ALIAS-N2: %val = load i32, i32* %ptr
entry:
br label %loop
@@ -93,6 +117,12 @@ define void @test5(i1 %cond, i32* %ptr) {
; CHECK: call {}* @llvm.invariant.start.p0i32(i64 4, i32* %ptr)
; CHECK: %val = load i32, i32* %ptr
+; ALIAS-N2-LABEL: @test5(
+; ALIAS-N2-LABEL: entry:
+; ALIAS-N2-LABEL: loop:
+; ALIAS-N2: store i32 0, i32* %ptr
+; ALIAS-N2: call {}* @llvm.invariant.start.p0i32(i64 4, i32* %ptr)
+; ALIAS-N2: %val = load i32, i32* %ptr
entry:
br label %loop
diff --git a/llvm/test/Transforms/LICM/read-only-calls.ll b/llvm/test/Transforms/LICM/read-only-calls.ll
new file mode 100644
index 00000000000..0a378144fb7
--- /dev/null
+++ b/llvm/test/Transforms/LICM/read-only-calls.ll
@@ -0,0 +1,85 @@
+; RUN: opt -S -basicaa -licm -licm-n2-threshold=0 %s | FileCheck %s
+; RUN: opt -licm -basicaa -licm-n2-threshold=200 < %s -S | FileCheck %s --check-prefix=ALIAS-N2
+; RUN: opt -aa-pipeline=basic-aa -licm-n2-threshold=0 -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' < %s -S | FileCheck %s
+; RUN: opt -aa-pipeline=basic-aa -licm-n2-threshold=200 -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' < %s -S | FileCheck %s --check-prefix=ALIAS-N2
+
+; We should be able to hoist loads in presence of read only calls and stores
+; that do not alias.
+
+; Since LICM uses the AST mechanism for alias analysis, we will clump
+; together all loads and stores in one set along with the read-only call.
+; This prevents hoisting load that doesn't alias with any other memory
+; operations.
+
+declare void @foo(i64, i32*) readonly
+
+; hoist the load out with the n2-threshold
+; since it doesn't alias with the store.
+; default AST mechanism clumps all memory locations in one set because of the
+; readonly call
+define void @test1(i32* %ptr) {
+; CHECK-LABEL: @test1(
+; CHECK-LABEL: entry:
+; CHECK-LABEL: loop:
+; CHECK: %val = load i32, i32* %ptr
+
+; ALIAS-N2-LABEL: @test1(
+; ALIAS-N2-LABEL: entry:
+; ALIAS-N2: %val = load i32, i32* %ptr
+; ALIAS-N2-LABEL: loop:
+entry:
+ br label %loop
+
+loop:
+ %x = phi i32 [ 0, %entry ], [ %x.inc, %loop ]
+ %val = load i32, i32* %ptr
+ call void @foo(i64 4, i32* %ptr)
+ %p2 = getelementptr i32, i32* %ptr, i32 1
+ store volatile i32 0, i32* %p2
+ %x.inc = add i32 %x, %val
+ br label %loop
+}
+
+; can hoist out load with the default AST and the alias analysis mechanism.
+define void @test2(i32* %ptr) {
+; CHECK-LABEL: @test2(
+; CHECK-LABEL: entry:
+; CHECK: %val = load i32, i32* %ptr
+; CHECK-LABEL: loop:
+
+; ALIAS-N2-LABEL: @test2(
+; ALIAS-N2-LABEL: entry:
+; ALIAS-N2: %val = load i32, i32* %ptr
+; ALIAS-N2-LABEL: loop:
+entry:
+ br label %loop
+
+loop:
+ %x = phi i32 [ 0, %entry ], [ %x.inc, %loop ]
+ %val = load i32, i32* %ptr
+ call void @foo(i64 4, i32* %ptr)
+ %x.inc = add i32 %x, %val
+ br label %loop
+}
+
+; cannot hoist load since not guaranteed to execute
+define void @test3(i32* %ptr) {
+; CHECK-LABEL: @test3(
+; CHECK-LABEL: entry:
+; CHECK-LABEL: loop:
+; CHECK: %val = load i32, i32* %ptr
+
+; ALIAS-N2-LABEL: @test3(
+; ALIAS-N2-LABEL: entry:
+; ALIAS-N2-LABEL: loop:
+; ALIAS-N2: %val = load i32, i32* %ptr
+entry:
+ br label %loop
+
+loop:
+ %x = phi i32 [ 0, %entry ], [ %x.inc, %loop ]
+ call void @foo(i64 4, i32* %ptr)
+ %val = load i32, i32* %ptr
+ %x.inc = add i32 %x, %val
+ br label %loop
+}
OpenPOWER on IntegriCloud