summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h10
-rw-r--r--llvm/lib/Analysis/MemoryDependenceAnalysis.cpp63
-rw-r--r--llvm/test/Transforms/GVN/assume-equal.ll8
-rw-r--r--llvm/test/Transforms/GVN/invariant.group.ll38
-rw-r--r--llvm/test/Transforms/NewGVN/assume-equal.ll8
-rw-r--r--llvm/test/Transforms/NewGVN/invariant.group.ll39
6 files changed, 145 insertions, 21 deletions
diff --git a/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h b/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h
index 33dbd22f7a2..a401887016c 100644
--- a/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h
+++ b/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h
@@ -302,6 +302,10 @@ private:
NonLocalPointerInfo() : Size(MemoryLocation::UnknownSize) {}
};
+ /// Cache storing single nonlocal def for the instruction.
+ /// It is set when nonlocal def would be found in function returning only
+ /// local dependencies.
+ DenseMap<Instruction *, NonLocalDepResult> NonLocalDefsCache;
/// This map stores the cached results of doing a pointer lookup at the
/// bottom of a block.
///
@@ -441,9 +445,9 @@ public:
/// This analysis looks for other loads and stores with invariant.group
/// metadata and the same pointer operand. Returns Unknown if it does not
/// find anything, and Def if it can be assumed that 2 instructions load or
- /// store the same value.
- /// FIXME: This analysis works only on single block because of restrictions
- /// at the call site.
+ /// store the same value and NonLocal which indicate that non-local Def was
+ /// found, which can be retrieved by calling getNonLocalPointerDependency
+ /// with the same queried instruction.
MemDepResult getInvariantGroupPointerDependency(LoadInst *LI, BasicBlock *BB);
/// Looks at a memory location for a load (specified by MemLocBase, Offs, and
diff --git a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
index acdf14733b0..66a0d145dcd 100644
--- a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -323,17 +323,28 @@ MemDepResult MemoryDependenceResults::getPointerDependencyFrom(
const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) {
+ MemDepResult InvariantGroupDependency = MemDepResult::getUnknown();
if (QueryInst != nullptr) {
if (auto *LI = dyn_cast<LoadInst>(QueryInst)) {
- MemDepResult InvariantGroupDependency =
- getInvariantGroupPointerDependency(LI, BB);
+ InvariantGroupDependency = getInvariantGroupPointerDependency(LI, BB);
if (InvariantGroupDependency.isDef())
return InvariantGroupDependency;
}
}
- return getSimplePointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, QueryInst,
- Limit);
+ MemDepResult SimpleDep = getSimplePointerDependencyFrom(
+ MemLoc, isLoad, ScanIt, BB, QueryInst, Limit);
+ if (SimpleDep.isDef())
+ return SimpleDep;
+ // Non-local invariant group dependency indicates there is non local Def
+ // (it only returns nonLocal if it finds nonLocal def), which is better than
+ // local clobber and everything else.
+ if (InvariantGroupDependency.isNonLocal())
+ return InvariantGroupDependency;
+
+ assert(InvariantGroupDependency.isUnknown() &&
+ "InvariantGroupDependency should be only unknown at this point");
+ return SimpleDep;
}
MemDepResult
@@ -358,6 +369,20 @@ MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI,
// Queue to process all pointers that are equivalent to load operand.
SmallVector<const Value *, 8> LoadOperandsQueue;
LoadOperandsQueue.push_back(LoadOperand);
+
+ Instruction *ClosestDependency = nullptr;
+ // Order of instructions in uses list is unpredictible. In order to always
+ // get the same result, we will look for the closest dominance.
+ auto GetClosestDependency = [this](Instruction *Best, Instruction *Other) {
+ assert(Other && "Must call it with not null instruction");
+ if (Best == nullptr || DT.dominates(Best, Other))
+ return Other;
+ return Best;
+ };
+
+
+ // FIXME: This loop is O(N^2) because dominates can be O(n) and in worst case
+ // we will see all the instructions. This should be fixed in MSSA.
while (!LoadOperandsQueue.empty()) {
const Value *Ptr = LoadOperandsQueue.pop_back_val();
assert(Ptr && !isa<GlobalValue>(Ptr) &&
@@ -388,12 +413,24 @@ MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI,
// If we hit load/store with the same invariant.group metadata (and the
// same pointer operand) we can assume that value pointed by pointer
// operand didn't change.
- if ((isa<LoadInst>(U) || isa<StoreInst>(U)) && U->getParent() == BB &&
+ if ((isa<LoadInst>(U) || isa<StoreInst>(U)) &&
U->getMetadata(LLVMContext::MD_invariant_group) == InvariantGroupMD)
- return MemDepResult::getDef(U);
+ ClosestDependency = GetClosestDependency(ClosestDependency, U);
}
}
- return MemDepResult::getUnknown();
+
+ if (!ClosestDependency)
+ return MemDepResult::getUnknown();
+ if (ClosestDependency->getParent() == BB)
+ return MemDepResult::getDef(ClosestDependency);
+ // Def(U) can't be returned here because it is non-local. If local
+ // dependency won't be found then return nonLocal counting that the
+ // user will call getNonLocalPointerDependency, which will return cached
+ // result.
+ NonLocalDefsCache.try_emplace(
+ LI, NonLocalDepResult(ClosestDependency->getParent(),
+ MemDepResult::getDef(ClosestDependency), nullptr));
+ return MemDepResult::getNonLocal();
}
MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
@@ -877,7 +914,17 @@ void MemoryDependenceResults::getNonLocalPointerDependency(
assert(Loc.Ptr->getType()->isPointerTy() &&
"Can't get pointer deps of a non-pointer!");
Result.clear();
-
+ {
+ // Check if there is cached Def with invariant.group. FIXME: cache might be
+ // invalid if cached instruction would be removed between call to
+ // getPointerDependencyFrom and this function.
+ auto NonLocalDefIt = NonLocalDefsCache.find(QueryInst);
+ if (NonLocalDefIt != NonLocalDefsCache.end()) {
+ Result.push_back(std::move(NonLocalDefIt->second));
+ NonLocalDefsCache.erase(NonLocalDefIt);
+ return;
+ }
+ }
// This routine does not expect to deal with volatile instructions.
// Doing so would require piping through the QueryInst all the way through.
// TODO: volatiles can't be elided, but they can be reordered with other
diff --git a/llvm/test/Transforms/GVN/assume-equal.ll b/llvm/test/Transforms/GVN/assume-equal.ll
index d423c1685e1..941f14ce402 100644
--- a/llvm/test/Transforms/GVN/assume-equal.ll
+++ b/llvm/test/Transforms/GVN/assume-equal.ll
@@ -65,22 +65,20 @@ if.then: ; preds = %entry
%vtable1 = load i8**, i8*** %1, align 8, !invariant.group !0
%vtable2.cast = bitcast i8** %vtable1 to i32 (%struct.A*)**
%call1 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable2.cast, align 8
-; FIXME: those loads could be also direct, but right now the invariant.group
-; analysis works only on single block
-; CHECK-NOT: call i32 @_ZN1A3fooEv(
+; CHECK: call i32 @_ZN1A3fooEv(
%callx = tail call i32 %call1(%struct.A* %0) #1
%vtable2 = load i8**, i8*** %1, align 8, !invariant.group !0
%vtable3.cast = bitcast i8** %vtable2 to i32 (%struct.A*)**
%call4 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable3.cast, align 8
-; CHECK-NOT: call i32 @_ZN1A3fooEv(
+; CHECK: call i32 @_ZN1A3fooEv(
%cally = tail call i32 %call4(%struct.A* %0) #1
%b = bitcast i8* %call to %struct.A**
%vtable3 = load %struct.A*, %struct.A** %b, align 8, !invariant.group !0
%vtable4.cast = bitcast %struct.A* %vtable3 to i32 (%struct.A*)**
%vfun = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable4.cast, align 8
-; CHECK-NOT: call i32 @_ZN1A3fooEv(
+; CHECK: call i32 @_ZN1A3fooEv(
%unknown = tail call i32 %vfun(%struct.A* %0) #1
br label %if.end
diff --git a/llvm/test/Transforms/GVN/invariant.group.ll b/llvm/test/Transforms/GVN/invariant.group.ll
index d0b32d7f3dd..6f1f357cad6 100644
--- a/llvm/test/Transforms/GVN/invariant.group.ll
+++ b/llvm/test/Transforms/GVN/invariant.group.ll
@@ -392,6 +392,44 @@ define void @testNotGlobal() {
ret void
}
+; CHECK-LABEL: define void @handling_loops()
+define void @handling_loops() {
+ %a = alloca %struct.A, align 8
+ %1 = bitcast %struct.A* %a to i8*
+ %2 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 0
+ store i32 (...)** bitcast (i8** getelementptr inbounds ([3 x i8*], [3 x i8*]* @_ZTV1A, i64 0, i64 2) to i32 (...)**), i32 (...)*** %2, align 8, !invariant.group !0
+ %3 = load i8, i8* @unknownPtr, align 4
+ %4 = icmp sgt i8 %3, 0
+ br i1 %4, label %.lr.ph.i, label %_Z2g2R1A.exit
+
+.lr.ph.i: ; preds = %0
+ %5 = bitcast %struct.A* %a to void (%struct.A*)***
+ %6 = load i8, i8* @unknownPtr, align 4
+ %7 = icmp sgt i8 %6, 1
+ br i1 %7, label %._crit_edge.preheader, label %_Z2g2R1A.exit
+
+._crit_edge.preheader: ; preds = %.lr.ph.i
+ br label %._crit_edge
+
+._crit_edge: ; preds = %._crit_edge.preheader, %._crit_edge
+ %8 = phi i8 [ %10, %._crit_edge ], [ 1, %._crit_edge.preheader ]
+ %.pre = load void (%struct.A*)**, void (%struct.A*)*** %5, align 8, !invariant.group !0
+ %9 = load void (%struct.A*)*, void (%struct.A*)** %.pre, align 8
+ ; CHECK: call void @_ZN1A3fooEv(%struct.A* nonnull %a)
+ call void %9(%struct.A* nonnull %a) #3
+ ; CHECK-NOT: call void %
+ %10 = add nuw nsw i8 %8, 1
+ %11 = load i8, i8* @unknownPtr, align 4
+ %12 = icmp slt i8 %10, %11
+ br i1 %12, label %._crit_edge, label %_Z2g2R1A.exit.loopexit
+
+_Z2g2R1A.exit.loopexit: ; preds = %._crit_edge
+ br label %_Z2g2R1A.exit
+
+_Z2g2R1A.exit: ; preds = %_Z2g2R1A.exit.loopexit, %.lr.ph.i, %0
+ ret void
+}
+
declare void @foo(i8*)
declare void @foo2(i8*, i8)
diff --git a/llvm/test/Transforms/NewGVN/assume-equal.ll b/llvm/test/Transforms/NewGVN/assume-equal.ll
index b6c2a7afb29..7e009192064 100644
--- a/llvm/test/Transforms/NewGVN/assume-equal.ll
+++ b/llvm/test/Transforms/NewGVN/assume-equal.ll
@@ -66,22 +66,20 @@ if.then: ; preds = %entry
%vtable1 = load i8**, i8*** %1, align 8, !invariant.group !0
%vtable2.cast = bitcast i8** %vtable1 to i32 (%struct.A*)**
%call1 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable2.cast, align 8
-; FIXME: those loads could be also direct, but right now the invariant.group
-; analysis works only on single block
-; CHECK-NOT: call i32 @_ZN1A3fooEv(
+; CHECK: call i32 @_ZN1A3fooEv(
%callx = tail call i32 %call1(%struct.A* %0) #1
%vtable2 = load i8**, i8*** %1, align 8, !invariant.group !0
%vtable3.cast = bitcast i8** %vtable2 to i32 (%struct.A*)**
%call4 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable3.cast, align 8
-; CHECK-NOT: call i32 @_ZN1A3fooEv(
+; CHECK: call i32 @_ZN1A3fooEv(
%cally = tail call i32 %call4(%struct.A* %0) #1
%b = bitcast i8* %call to %struct.A**
%vtable3 = load %struct.A*, %struct.A** %b, align 8, !invariant.group !0
%vtable4.cast = bitcast %struct.A* %vtable3 to i32 (%struct.A*)**
%vfun = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable4.cast, align 8
-; CHECK-NOT: call i32 @_ZN1A3fooEv(
+; CHECK: call i32 @_ZN1A3fooEv(
%unknown = tail call i32 %vfun(%struct.A* %0) #1
br label %if.end
diff --git a/llvm/test/Transforms/NewGVN/invariant.group.ll b/llvm/test/Transforms/NewGVN/invariant.group.ll
index 80c6e05a8e2..c421df6bd3b 100644
--- a/llvm/test/Transforms/NewGVN/invariant.group.ll
+++ b/llvm/test/Transforms/NewGVN/invariant.group.ll
@@ -393,6 +393,45 @@ define void @testNotGlobal() {
ret void
}
+; CHECK-LABEL: define void @handling_loops()
+define void @handling_loops() {
+ %a = alloca %struct.A, align 8
+ %1 = bitcast %struct.A* %a to i8*
+ %2 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 0
+ store i32 (...)** bitcast (i8** getelementptr inbounds ([3 x i8*], [3 x i8*]* @_ZTV1A, i64 0, i64 2) to i32 (...)**), i32 (...)*** %2, align 8, !invariant.group !0
+ %3 = load i8, i8* @unknownPtr, align 4
+ %4 = icmp sgt i8 %3, 0
+ br i1 %4, label %.lr.ph.i, label %_Z2g2R1A.exit
+
+.lr.ph.i: ; preds = %0
+ %5 = bitcast %struct.A* %a to void (%struct.A*)***
+ %6 = load i8, i8* @unknownPtr, align 4
+ %7 = icmp sgt i8 %6, 1
+ br i1 %7, label %._crit_edge.preheader, label %_Z2g2R1A.exit
+
+._crit_edge.preheader: ; preds = %.lr.ph.i
+ br label %._crit_edge
+
+._crit_edge: ; preds = %._crit_edge.preheader, %._crit_edge
+ %8 = phi i8 [ %10, %._crit_edge ], [ 1, %._crit_edge.preheader ]
+ %.pre = load void (%struct.A*)**, void (%struct.A*)*** %5, align 8, !invariant.group !0
+ %9 = load void (%struct.A*)*, void (%struct.A*)** %.pre, align 8
+; CHECK: call void @_ZN1A3fooEv(%struct.A* nonnull %a)
+ call void %9(%struct.A* nonnull %a) #3
+
+; CHECK-NOT: call void %
+ %10 = add nuw nsw i8 %8, 1
+ %11 = load i8, i8* @unknownPtr, align 4
+ %12 = icmp slt i8 %10, %11
+ br i1 %12, label %._crit_edge, label %_Z2g2R1A.exit.loopexit
+
+_Z2g2R1A.exit.loopexit: ; preds = %._crit_edge
+ br label %_Z2g2R1A.exit
+
+_Z2g2R1A.exit: ; preds = %_Z2g2R1A.exit.loopexit, %.lr.ph.i, %0
+ ret void
+}
+
declare void @foo(i8*)
declare void @foo2(i8*, i8)
OpenPOWER on IntegriCloud