summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSanjoy Das <sanjoy@playingwithpointers.com>2018-02-20 23:19:34 +0000
committerSanjoy Das <sanjoy@playingwithpointers.com>2018-02-20 23:19:34 +0000
commit737fa40ffad15482801cc60271aed40dd5de9c95 (patch)
tree66826396d645129f6ea4426e8ea4826f3985a9ef
parent6cd4861c22ef078c2a043ac6ffb1a4467dd85534 (diff)
downloadbcm5719-llvm-737fa40ffad15482801cc60271aed40dd5de9c95.tar.gz
bcm5719-llvm-737fa40ffad15482801cc60271aed40dd5de9c95.zip
[DSE] Don't DSE stores that subsequent memmove calls read from
Summary: We used to remove the first memmove in cases like this: memmove(p, p+2, 8); memmove(p, p+2, 8); which is incorrect. Fix this by changing isPossibleSelfRead to what was most likely the intended behavior. Historical note: the buggy code was added in https://reviews.llvm.org/rL120974 to address PR8728. Reviewers: rsmith Subscribers: mcrosier, llvm-commits, jlebar Differential Revision: https://reviews.llvm.org/D43425 llvm-svn: 325641
-rw-r--r--llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp43
-rw-r--r--llvm/test/Transforms/DeadStoreElimination/simple.ll51
2 files changed, 78 insertions, 16 deletions
diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index e6f0bb21dd5..78bfa0ef641 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -510,8 +510,8 @@ static OverwriteResult isOverwrite(const MemoryLocation &Later,
/// memory region into an identical pointer) then it doesn't actually make its
/// input dead in the traditional sense. Consider this case:
///
-/// memcpy(A <- B)
-/// memcpy(A <- A)
+/// memmove(A <- B)
+/// memmove(A <- A)
///
/// In this case, the second store to A does not make the first store to A dead.
/// The usual situation isn't an explicit A<-A store like this (which can be
@@ -527,24 +527,35 @@ static bool isPossibleSelfRead(Instruction *Inst,
// Self reads can only happen for instructions that read memory. Get the
// location read.
MemoryLocation InstReadLoc = getLocForRead(Inst, TLI);
- if (!InstReadLoc.Ptr) return false; // Not a reading instruction.
+ if (!InstReadLoc.Ptr)
+ return false; // Not a reading instruction.
// If the read and written loc obviously don't alias, it isn't a read.
- if (AA.isNoAlias(InstReadLoc, InstStoreLoc)) return false;
-
- // Okay, 'Inst' may copy over itself. However, we can still remove a the
- // DepWrite instruction if we can prove that it reads from the same location
- // as Inst. This handles useful cases like:
- // memcpy(A <- B)
- // memcpy(A <- B)
- // Here we don't know if A/B may alias, but we do know that B/B are must
- // aliases, so removing the first memcpy is safe (assuming it writes <= #
- // bytes as the second one.
- MemoryLocation DepReadLoc = getLocForRead(DepWrite, TLI);
-
- if (DepReadLoc.Ptr && AA.isMustAlias(InstReadLoc.Ptr, DepReadLoc.Ptr))
+ if (AA.isNoAlias(InstReadLoc, InstStoreLoc))
return false;
+ if (isa<MemCpyInst>(Inst)) {
+ // LLVM's memcpy overlap semantics are not fully fleshed out (see PR11763)
+ // but in practice memcpy(A <- B) either means that A and B are disjoint or
+ // are equal (i.e. there are not partial overlaps). Given that, if we have:
+ //
+ // memcpy/memmove(A <- B) // DepWrite
+ // memcpy(A <- B) // Inst
+ //
+ // with Inst reading/writing a >= size than DepWrite, we can reason as
+ // follows:
+ //
+ // - If A == B then both the copies are no-ops, so the DepWrite can be
+ // removed.
+ // - If A != B then A and B are disjoint locations in Inst. Since
+ // Inst.size >= DepWrite.size A and B are disjoint in DepWrite too.
+ // Therefore DepWrite can be removed.
+ MemoryLocation DepReadLoc = getLocForRead(DepWrite, TLI);
+
+ if (DepReadLoc.Ptr && AA.isMustAlias(InstReadLoc.Ptr, DepReadLoc.Ptr))
+ return false;
+ }
+
// If DepWrite doesn't read memory or if we can't prove it is a must alias,
// then it can't be considered dead.
return true;
diff --git a/llvm/test/Transforms/DeadStoreElimination/simple.ll b/llvm/test/Transforms/DeadStoreElimination/simple.ll
index 6130fbbf807..604621130e0 100644
--- a/llvm/test/Transforms/DeadStoreElimination/simple.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/simple.ll
@@ -252,6 +252,9 @@ define void @test17v(i8* %P, i8* %Q) nounwind ssp {
; Do not delete instruction where possible situation is:
; A = B
; A = A
+;
+; NB! See PR11763 - currently LLVM allows memcpy's source and destination to be
+; equal (but not inequal and overlapping).
define void @test18(i8* %P, i8* %Q, i8* %R) nounwind ssp {
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false)
@@ -521,3 +524,51 @@ define void @test35(i32* noalias %p) {
store i32 0, i32* %p
ret void
}
+
+; We cannot optimize away the first memmove since %P could overlap with %Q.
+define void @test36(i8* %P, i8* %Q) {
+; CHECK-LABEL: @test36(
+; CHECK-NEXT: tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
+; CHECK-NEXT: tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
+; CHECK-NEXT: ret
+
+ tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
+ tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
+ ret void
+}
+
+define void @test37(i8* %P, i8* %Q, i8* %R) {
+; CHECK-LABEL: @test37(
+; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
+; CHECK-NEXT: tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false)
+; CHECK-NEXT: ret
+
+ tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
+ tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false)
+ ret void
+}
+
+; Same caveat about memcpy as in @test18 applies here.
+define void @test38(i8* %P, i8* %Q, i8* %R) {
+; CHECK-LABEL: @test38(
+; CHECK-NEXT: tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
+; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false)
+; CHECK-NEXT: ret
+
+ tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
+ tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false)
+ ret void
+}
+
+define void @test39(i8* %P, i8* %Q, i8* %R) {
+; CHECK-LABEL: @test39(
+; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
+; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 8, i1 false)
+; CHECK-NEXT: ret
+
+ tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
+ tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 8, i1 false)
+ ret void
+}
+
+declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1)
OpenPOWER on IntegriCloud