summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
Diffstat (limited to 'llvm')
-rw-r--r--llvm/include/llvm/Analysis/MemoryLocation.h8
-rw-r--r--llvm/lib/Analysis/MemoryLocation.cpp24
-rw-r--r--llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp17
-rw-r--r--llvm/test/Transforms/DeadStoreElimination/memintrinsics.ll10
-rw-r--r--llvm/test/Transforms/DeadStoreElimination/simple.ll35
5 files changed, 52 insertions, 42 deletions
diff --git a/llvm/include/llvm/Analysis/MemoryLocation.h b/llvm/include/llvm/Analysis/MemoryLocation.h
index c1080742e83..e8f6644462e 100644
--- a/llvm/include/llvm/Analysis/MemoryLocation.h
+++ b/llvm/include/llvm/Analysis/MemoryLocation.h
@@ -27,6 +27,10 @@ class LoadInst;
class StoreInst;
class MemTransferInst;
class MemIntrinsic;
+class AtomicMemTransferInst;
+class AtomicMemIntrinsic;
+class AnyMemTransferInst;
+class AnyMemIntrinsic;
class TargetLibraryInfo;
/// Representation for a specific memory location.
@@ -90,10 +94,14 @@ public:
/// Return a location representing the source of a memory transfer.
static MemoryLocation getForSource(const MemTransferInst *MTI);
+ static MemoryLocation getForSource(const AtomicMemTransferInst *MTI);
+ static MemoryLocation getForSource(const AnyMemTransferInst *MTI);
/// Return a location representing the destination of a memory set or
/// transfer.
static MemoryLocation getForDest(const MemIntrinsic *MI);
+ static MemoryLocation getForDest(const AtomicMemIntrinsic *MI);
+ static MemoryLocation getForDest(const AnyMemIntrinsic *MI);
/// Return a location representing a particular argument of a call.
static MemoryLocation getForArgument(ImmutableCallSite CS, unsigned ArgIdx,
diff --git a/llvm/lib/Analysis/MemoryLocation.cpp b/llvm/lib/Analysis/MemoryLocation.cpp
index 9db6c499129..55924db284e 100644
--- a/llvm/lib/Analysis/MemoryLocation.cpp
+++ b/llvm/lib/Analysis/MemoryLocation.cpp
@@ -65,6 +65,14 @@ MemoryLocation MemoryLocation::get(const AtomicRMWInst *RMWI) {
}
MemoryLocation MemoryLocation::getForSource(const MemTransferInst *MTI) {
+ return getForSource(cast<AnyMemTransferInst>(MTI));
+}
+
+MemoryLocation MemoryLocation::getForSource(const AtomicMemTransferInst *MTI) {
+ return getForSource(cast<AnyMemTransferInst>(MTI));
+}
+
+MemoryLocation MemoryLocation::getForSource(const AnyMemTransferInst *MTI) {
uint64_t Size = UnknownSize;
if (ConstantInt *C = dyn_cast<ConstantInt>(MTI->getLength()))
Size = C->getValue().getZExtValue();
@@ -77,17 +85,25 @@ MemoryLocation MemoryLocation::getForSource(const MemTransferInst *MTI) {
return MemoryLocation(MTI->getRawSource(), Size, AATags);
}
-MemoryLocation MemoryLocation::getForDest(const MemIntrinsic *MTI) {
+MemoryLocation MemoryLocation::getForDest(const MemIntrinsic *MI) {
+ return getForDest(cast<AnyMemIntrinsic>(MI));
+}
+
+MemoryLocation MemoryLocation::getForDest(const AtomicMemIntrinsic *MI) {
+ return getForDest(cast<AnyMemIntrinsic>(MI));
+}
+
+MemoryLocation MemoryLocation::getForDest(const AnyMemIntrinsic *MI) {
uint64_t Size = UnknownSize;
- if (ConstantInt *C = dyn_cast<ConstantInt>(MTI->getLength()))
+ if (ConstantInt *C = dyn_cast<ConstantInt>(MI->getLength()))
Size = C->getValue().getZExtValue();
// memcpy/memmove can have AA tags. For memcpy, they apply
// to both the source and the destination.
AAMDNodes AATags;
- MTI->getAAMetadata(AATags);
+ MI->getAAMetadata(AATags);
- return MemoryLocation(MTI->getRawDest(), Size, AATags);
+ return MemoryLocation(MI->getRawDest(), Size, AATags);
}
MemoryLocation MemoryLocation::getForArgument(ImmutableCallSite CS,
diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 85f96f59e6d..b3dbe4df7ef 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -160,6 +160,9 @@ static bool hasAnalyzableMemoryWrite(Instruction *I,
case Intrinsic::memset:
case Intrinsic::memmove:
case Intrinsic::memcpy:
+ case Intrinsic::memcpy_element_unordered_atomic:
+ case Intrinsic::memmove_element_unordered_atomic:
+ case Intrinsic::memset_element_unordered_atomic:
case Intrinsic::init_trampoline:
case Intrinsic::lifetime_end:
return true;
@@ -189,7 +192,7 @@ static MemoryLocation getLocForWrite(Instruction *Inst) {
if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
return MemoryLocation::get(SI);
- if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(Inst)) {
+ if (auto *MI = dyn_cast<AnyMemIntrinsic>(Inst)) {
// memcpy/memmove/memset.
MemoryLocation Loc = MemoryLocation::getForDest(MI);
return Loc;
@@ -222,7 +225,7 @@ static MemoryLocation getLocForRead(Instruction *Inst,
// The only instructions that both read and write are the mem transfer
// instructions (memcpy/memmove).
- if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(Inst))
+ if (auto *MTI = dyn_cast<AnyMemTransferInst>(Inst))
return MemoryLocation::getForSource(MTI);
return MemoryLocation();
}
@@ -249,6 +252,10 @@ static bool isRemovable(Instruction *I) {
case Intrinsic::memcpy:
// Don't remove volatile memory intrinsics.
return !cast<MemIntrinsic>(II)->isVolatile();
+ case Intrinsic::memcpy_element_unordered_atomic:
+ case Intrinsic::memmove_element_unordered_atomic:
+ case Intrinsic::memset_element_unordered_atomic:
+ return true;
}
}
@@ -273,6 +280,7 @@ static bool isShortenableAtTheEnd(Instruction *I) {
case Intrinsic::memcpy:
// Do shorten memory intrinsics.
// FIXME: Add memmove if it's also safe to transform.
+ // TODO: Add atomic memcpy/memset
return true;
}
}
@@ -287,6 +295,7 @@ static bool isShortenableAtTheEnd(Instruction *I) {
static bool isShortenableAtTheBeginning(Instruction *I) {
// FIXME: Handle only memset for now. Supporting memcpy/memmove should be
// easily done by offsetting the source address.
+ // TODO: Handle atomic memory intrinsics
IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
return II && II->getIntrinsicID() == Intrinsic::memset;
}
@@ -534,7 +543,7 @@ static bool isPossibleSelfRead(Instruction *Inst,
if (AA.isNoAlias(InstReadLoc, InstStoreLoc))
return false;
- if (isa<MemCpyInst>(Inst)) {
+ if (isa<AnyMemCpyInst>(Inst)) {
// LLVM's memcpy overlap semantics are not fully fleshed out (see PR11763)
// but in practice memcpy(A <- B) either means that A and B are disjoint or
// are equal (i.e. there are not partial overlaps). Given that, if we have:
@@ -856,8 +865,6 @@ static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA,
LoadedLoc = MemoryLocation::get(L);
} else if (VAArgInst *V = dyn_cast<VAArgInst>(BBI)) {
LoadedLoc = MemoryLocation::get(V);
- } else if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(BBI)) {
- LoadedLoc = MemoryLocation::getForSource(MTI);
} else if (!BBI->mayReadFromMemory()) {
// Instruction doesn't read memory. Note that stores that weren't removed
// above will hit this case.
diff --git a/llvm/test/Transforms/DeadStoreElimination/memintrinsics.ll b/llvm/test/Transforms/DeadStoreElimination/memintrinsics.ll
index 00d85f4460d..68943d383ba 100644
--- a/llvm/test/Transforms/DeadStoreElimination/memintrinsics.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/memintrinsics.ll
@@ -54,10 +54,6 @@ declare void @llvm.memset.element.unordered.atomic.p0i16.i16(i16* nocapture, i8,
define void @test4() {
; CHECK-LABEL: @test4(
-; CHECK-NEXT: [[A:%.*]] = alloca i16, i16 1024, align 2
-; CHECK-NEXT: [[B:%.*]] = alloca i16, i16 1024, align 2
-; CHECK-NEXT: store atomic i16 0, i16* [[B]] unordered, align 2
-; CHECK-NEXT: call void @llvm.memcpy.element.unordered.atomic.p0i16.p0i16.i16(i16* align 2 [[A]], i16* align 2 [[B]], i16 1024, i32 2)
; CHECK-NEXT: ret void
;
%A = alloca i16, i16 1024, align 2
@@ -73,10 +69,6 @@ define void @test4() {
define void @test5() {
; CHECK-LABEL: @test5(
-; CHECK-NEXT: [[A:%.*]] = alloca i16, i16 1024, align 2
-; CHECK-NEXT: [[B:%.*]] = alloca i16, i16 1024, align 2
-; CHECK-NEXT: store atomic i16 0, i16* [[B]] unordered, align 2
-; CHECK-NEXT: call void @llvm.memmove.element.unordered.atomic.p0i16.p0i16.i16(i16* align 2 [[A]], i16* align 2 [[B]], i16 1024, i32 2)
; CHECK-NEXT: ret void
;
%A = alloca i16, i16 1024, align 2
@@ -92,8 +84,6 @@ define void @test5() {
define void @test6() {
; CHECK-LABEL: @test6(
-; CHECK-NEXT: [[A:%.*]] = alloca i16, i16 1024, align 2
-; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i16.i16(i16* align 2 [[A]], i8 0, i16 1024, i32 2)
; CHECK-NEXT: ret void
;
%A = alloca i16, i16 1024, align 2
diff --git a/llvm/test/Transforms/DeadStoreElimination/simple.ll b/llvm/test/Transforms/DeadStoreElimination/simple.ll
index 585b7ca28a9..412b563ffb0 100644
--- a/llvm/test/Transforms/DeadStoreElimination/simple.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/simple.ll
@@ -92,9 +92,8 @@ define void @test6(i32 *%p, i8 *%q) {
; alias).
define void @test6_atomic(i32* align 4 %p, i8* align 4 %q) {
; CHECK-LABEL: @test6_atomic(
-; CHECK-NEXT: store atomic i32 10, i32* [[P:%.*]] unordered, align 4
; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[Q:%.*]], i8 42, i64 900, i32 4)
-; CHECK-NEXT: store atomic i32 30, i32* [[P]] unordered, align 4
+; CHECK-NEXT: store atomic i32 30, i32* [[P:%.*]] unordered, align 4
; CHECK-NEXT: ret void
;
store atomic i32 10, i32* %p unordered, align 4 ;; dead.
@@ -121,9 +120,8 @@ define void @test7(i32 *%p, i8 *%q, i8* noalias %r) {
; alias).
define void @test7_atomic(i32* align 4 %p, i8* align 4 %q, i8* noalias align 4 %r) {
; CHECK-LABEL: @test7_atomic(
-; CHECK-NEXT: store atomic i32 10, i32* [[P:%.*]] unordered, align 4
; CHECK-NEXT: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 4 [[Q:%.*]], i8* align 4 [[R:%.*]], i64 900, i32 4)
-; CHECK-NEXT: store atomic i32 30, i32* [[P]] unordered, align 4
+; CHECK-NEXT: store atomic i32 30, i32* [[P:%.*]] unordered, align 4
; CHECK-NEXT: ret void
;
store atomic i32 10, i32* %p unordered, align 4 ;; dead.
@@ -292,7 +290,6 @@ define void @test15(i8* %P, i8* %Q) nounwind ssp {
define void @test15_atomic(i8* %P, i8* %Q) nounwind ssp {
; CHECK-LABEL: @test15_atomic(
; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1)
-; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[Q]], i64 12, i32 1)
; CHECK-NEXT: ret void
;
tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1)
@@ -300,11 +297,10 @@ define void @test15_atomic(i8* %P, i8* %Q) nounwind ssp {
ret void
}
-; It would only be valid to remove the non-atomic memcpy
+;; Fully dead overwrite of memcpy.
define void @test15_atomic_weaker(i8* %P, i8* %Q) nounwind ssp {
; CHECK-LABEL: @test15_atomic_weaker(
-; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i1 false)
-; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[Q]], i64 12, i32 1)
+; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1)
; CHECK-NEXT: ret void
;
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i1 false)
@@ -312,11 +308,10 @@ define void @test15_atomic_weaker(i8* %P, i8* %Q) nounwind ssp {
ret void
}
-; It would only be valid to remove the non-atomic memcpy
+;; Fully dead overwrite of memcpy.
define void @test15_atomic_weaker_2(i8* %P, i8* %Q) nounwind ssp {
; CHECK-LABEL: @test15_atomic_weaker_2(
-; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1)
-; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[Q]], i64 12, i1 false)
+; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i1 false)
; CHECK-NEXT: ret void
;
tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1)
@@ -338,8 +333,7 @@ define void @test16(i8* %P, i8* %Q) nounwind ssp {
;; Full overwrite of smaller memcpy.
define void @test16_atomic(i8* %P, i8* %Q) nounwind ssp {
; CHECK-LABEL: @test16_atomic(
-; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 8, i32 1)
-; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[Q]], i64 12, i32 1)
+; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1)
; CHECK-NEXT: ret void
;
tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 8, i32 1)
@@ -350,8 +344,7 @@ define void @test16_atomic(i8* %P, i8* %Q) nounwind ssp {
;; Full overwrite of smaller memory where overwrite has stronger atomicity
define void @test16_atomic_weaker(i8* %P, i8* %Q) nounwind ssp {
; CHECK-LABEL: @test16_atomic_weaker(
-; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 8, i1 false)
-; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[Q]], i64 12, i32 1)
+; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1)
; CHECK-NEXT: ret void
;
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 8, i1 false)
@@ -362,8 +355,7 @@ define void @test16_atomic_weaker(i8* %P, i8* %Q) nounwind ssp {
;; Full overwrite of smaller memory where overwrite has weaker atomicity.
define void @test16_atomic_weaker_2(i8* %P, i8* %Q) nounwind ssp {
; CHECK-LABEL: @test16_atomic_weaker_2(
-; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 8, i32 1)
-; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[Q]], i64 12, i1 false)
+; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i1 false)
; CHECK-NEXT: ret void
;
tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 8, i32 1)
@@ -385,8 +377,7 @@ define void @test17(i8* %P, i8* noalias %Q) nounwind ssp {
;; Overwrite of memset by memcpy.
define void @test17_atomic(i8* %P, i8* noalias %Q) nounwind ssp {
; CHECK-LABEL: @test17_atomic(
-; CHECK-NEXT: tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 1 [[P:%.*]], i8 42, i64 8, i32 1)
-; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[Q:%.*]], i64 12, i32 1)
+; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1)
; CHECK-NEXT: ret void
;
tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 1 %P, i8 42, i64 8, i32 1)
@@ -398,8 +389,7 @@ define void @test17_atomic(i8* %P, i8* noalias %Q) nounwind ssp {
;; remove the memset.
define void @test17_atomic_weaker(i8* %P, i8* noalias %Q) nounwind ssp {
; CHECK-LABEL: @test17_atomic_weaker(
-; CHECK-NEXT: tail call void @llvm.memset.p0i8.i64(i8* align 1 [[P:%.*]], i8 42, i64 8, i1 false)
-; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[Q:%.*]], i64 12, i32 1)
+; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1)
; CHECK-NEXT: ret void
;
tail call void @llvm.memset.p0i8.i64(i8* align 1 %P, i8 42, i64 8, i1 false)
@@ -411,8 +401,7 @@ define void @test17_atomic_weaker(i8* %P, i8* noalias %Q) nounwind ssp {
;; the memset.
define void @test17_atomic_weaker_2(i8* %P, i8* noalias %Q) nounwind ssp {
; CHECK-LABEL: @test17_atomic_weaker_2(
-; CHECK-NEXT: tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 1 [[P:%.*]], i8 42, i64 8, i32 1)
-; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[Q:%.*]], i64 12, i1 false)
+; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i1 false)
; CHECK-NEXT: ret void
;
tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 1 %P, i8 42, i64 8, i32 1)
OpenPOWER on IntegriCloud