summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Transforms/Scalar/GVN.cpp25
-rw-r--r--llvm/test/Transforms/GVN/atomic.ll230
2 files changed, 251 insertions, 4 deletions
diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp
index 9565dc8c3dc..116dc69b372 100644
--- a/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -1219,6 +1219,7 @@ bool GVN::AnalyzeLoadAvailability(LoadInst *LI, MemDepResult DepInfo,
assert((DepInfo.isDef() || DepInfo.isClobber()) &&
"expected a local dependence");
+ assert(LI->isUnordered() && "rules below are incorrect for ordered access");
const DataLayout &DL = LI->getModule()->getDataLayout();
@@ -1227,7 +1228,8 @@ bool GVN::AnalyzeLoadAvailability(LoadInst *LI, MemDepResult DepInfo,
// read by the load, we can extract the bits we need for the load from the
// stored value.
if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInfo.getInst())) {
- if (Address) {
+ // Can't forward from non-atomic to atomic without violating memory model.
+ if (Address && LI->isAtomic() <= DepSI->isAtomic()) {
int Offset =
AnalyzeLoadFromClobberingStore(LI->getType(), Address, DepSI);
if (Offset != -1) {
@@ -1244,7 +1246,8 @@ bool GVN::AnalyzeLoadAvailability(LoadInst *LI, MemDepResult DepInfo,
if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInfo.getInst())) {
// If this is a clobber and L is the first instruction in its block, then
// we have the first instruction in the entry block.
- if (DepLI != LI && Address) {
+ // Can't forward from non-atomic to atomic without violating memory model.
+ if (DepLI != LI && Address && LI->isAtomic() <= DepLI->isAtomic()) {
int Offset =
AnalyzeLoadFromClobberingLoad(LI->getType(), Address, DepLI, DL);
@@ -1258,7 +1261,7 @@ bool GVN::AnalyzeLoadAvailability(LoadInst *LI, MemDepResult DepInfo,
// If the clobbering value is a memset/memcpy/memmove, see if we can
// forward a value on from it.
if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInfo.getInst())) {
- if (Address) {
+ if (Address && !LI->isAtomic()) {
int Offset = AnalyzeLoadFromClobberingMemInst(LI->getType(), Address,
DepMI, DL);
if (Offset != -1) {
@@ -1304,6 +1307,10 @@ bool GVN::AnalyzeLoadAvailability(LoadInst *LI, MemDepResult DepInfo,
LI->getType(), DL))
return false;
+ // Can't forward from non-atomic to atomic without violating memory model.
+ if (S->isAtomic() < LI->isAtomic())
+ return false;
+
Res = AvailableValue::get(S->getValueOperand());
return true;
}
@@ -1316,6 +1323,10 @@ bool GVN::AnalyzeLoadAvailability(LoadInst *LI, MemDepResult DepInfo,
!CanCoerceMustAliasedValueToLoad(LD, LI->getType(), DL))
return false;
+ // Can't forward from non-atomic to atomic without violating memory model.
+ if (LD->isAtomic() < LI->isAtomic())
+ return false;
+
Res = AvailableValue::getLoad(LD);
return true;
}
@@ -1587,6 +1598,11 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
if (LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeAddress))
return false;
+ // This code hasn't been audited for atomic, ordered, or volatile memory
+ // access.
+ if (!LI->isSimple())
+ return false;
+
// Step 1: Find the non-local dependencies of the load.
LoadDepVect Deps;
MD->getNonLocalPointerDependency(LI, Deps);
@@ -1755,7 +1771,8 @@ bool GVN::processLoad(LoadInst *L) {
if (!MD)
return false;
- if (!L->isSimple())
+ // This code hasn't been audited for ordered or volatile memory access
+ if (!L->isUnordered())
return false;
if (L->use_empty()) {
diff --git a/llvm/test/Transforms/GVN/atomic.ll b/llvm/test/Transforms/GVN/atomic.ll
index 11b54f39756..4429e9bbad0 100644
--- a/llvm/test/Transforms/GVN/atomic.ll
+++ b/llvm/test/Transforms/GVN/atomic.ll
@@ -107,3 +107,233 @@ if.end:
; CHECK: load i32, i32* @y, align 4
ret i32 %load
}
+
+; CHECK-LABEL: @test12(
+; Can't remove a load over a ordering barrier
+define i32 @test12(i1 %B, i32* %P1, i32* %P2) {
+ %load0 = load i32, i32* %P1
+ %1 = load atomic i32, i32* %P2 seq_cst, align 4
+ %load1 = load i32, i32* %P1
+ %sel = select i1 %B, i32 %load0, i32 %load1
+ ret i32 %sel
+ ; CHECK: load i32, i32* %P1
+ ; CHECK: load i32, i32* %P1
+}
+
+; CHECK-LABEL: @test13(
+; atomic to non-atomic forwarding is legal
+define i32 @test13(i32* %P1) {
+ %a = load atomic i32, i32* %P1 seq_cst, align 4
+ %b = load i32, i32* %P1
+ %res = sub i32 %a, %b
+ ret i32 %res
+ ; CHECK: load atomic i32, i32* %P1
+ ; CHECK: ret i32 0
+}
+
+; CHECK-LABEL: @test13b(
+define i32 @test13b(i32* %P1) {
+ store atomic i32 0, i32* %P1 unordered, align 4
+ %b = load i32, i32* %P1
+ ret i32 %b
+ ; CHECK: ret i32 0
+}
+
+; CHECK-LABEL: @test14(
+; atomic to unordered atomic forwarding is legal
+define i32 @test14(i32* %P1) {
+ %a = load atomic i32, i32* %P1 seq_cst, align 4
+ %b = load atomic i32, i32* %P1 unordered, align 4
+ %res = sub i32 %a, %b
+ ret i32 %res
+ ; CHECK: load atomic i32, i32* %P1 seq_cst
+ ; CHECK-NEXT: ret i32 0
+}
+
+; CHECK-LABEL: @test15(
+; implementation restriction: can't forward to stonger
+; than unordered
+define i32 @test15(i32* %P1, i32* %P2) {
+ %a = load atomic i32, i32* %P1 seq_cst, align 4
+ %b = load atomic i32, i32* %P1 seq_cst, align 4
+ %res = sub i32 %a, %b
+ ret i32 %res
+ ; CHECK: load atomic i32, i32* %P1
+ ; CHECK: load atomic i32, i32* %P1
+}
+
+; CHECK-LABEL: @test16(
+; forwarding non-atomic to atomic is wrong! (However,
+; it would be legal to use the later value in place of the
+; former in this particular example. We just don't
+; do that right now.)
+define i32 @test16(i32* %P1, i32* %P2) {
+ %a = load i32, i32* %P1, align 4
+ %b = load atomic i32, i32* %P1 unordered, align 4
+ %res = sub i32 %a, %b
+ ret i32 %res
+ ; CHECK: load i32, i32* %P1
+ ; CHECK: load atomic i32, i32* %P1
+}
+
+; CHECK-LABEL: @test16b(
+define i32 @test16b(i32* %P1) {
+ store i32 0, i32* %P1
+ %b = load atomic i32, i32* %P1 unordered, align 4
+ ret i32 %b
+ ; CHECK: load atomic i32, i32* %P1
+}
+
+; Can't DSE across a full fence
+define void @fence_seq_cst_store(i32* %P1, i32* %P2) {
+; CHECK-LABEL: @fence_seq_cst_store(
+; CHECK: store
+; CHECK: store atomic
+; CHECK: store
+ store i32 0, i32* %P1, align 4
+ store atomic i32 0, i32* %P2 seq_cst, align 4
+ store i32 0, i32* %P1, align 4
+ ret void
+}
+
+; Can't DSE across a full fence
+define void @fence_seq_cst(i32* %P1, i32* %P2) {
+; CHECK-LABEL: @fence_seq_cst(
+; CHECK: store
+; CHECK: fence seq_cst
+; CHECK: store
+ store i32 0, i32* %P1, align 4
+ fence seq_cst
+ store i32 0, i32* %P1, align 4
+ ret void
+}
+
+; Can't DSE across a full singlethread fence
+define void @fence_seq_cst_st(i32* %P1, i32* %P2) {
+; CHECK-LABEL: @fence_seq_cst_st(
+; CHECK: store
+; CHECK: fence singlethread seq_cst
+; CHECK: store
+ store i32 0, i32* %P1, align 4
+ fence singlethread seq_cst
+ store i32 0, i32* %P1, align 4
+ ret void
+}
+
+; Can't DSE across a full fence
+define void @fence_asm_sideeffect(i32* %P1, i32* %P2) {
+; CHECK-LABEL: @fence_asm_sideeffect(
+; CHECK: store
+; CHECK: call void asm sideeffect
+; CHECK: store
+ store i32 0, i32* %P1, align 4
+ call void asm sideeffect "", ""()
+ store i32 0, i32* %P1, align 4
+ ret void
+}
+
+; Can't DSE across a full fence
+define void @fence_asm_memory(i32* %P1, i32* %P2) {
+; CHECK-LABEL: @fence_asm_memory(
+; CHECK: store
+; CHECK: call void asm
+; CHECK: store
+ store i32 0, i32* %P1, align 4
+ call void asm "", "~{memory}"()
+ store i32 0, i32* %P1, align 4
+ ret void
+}
+
+; Can't remove a volatile load
+define i32 @volatile_load(i32* %P1, i32* %P2) {
+ %a = load i32, i32* %P1, align 4
+ %b = load volatile i32, i32* %P1, align 4
+ %res = sub i32 %a, %b
+ ret i32 %res
+ ; CHECK-LABEL: @volatile_load(
+ ; CHECK: load i32, i32* %P1
+ ; CHECK: load volatile i32, i32* %P1
+}
+
+; Can't remove redundant volatile loads
+define i32 @redundant_volatile_load(i32* %P1, i32* %P2) {
+ %a = load volatile i32, i32* %P1, align 4
+ %b = load volatile i32, i32* %P1, align 4
+ %res = sub i32 %a, %b
+ ret i32 %res
+ ; CHECK-LABEL: @redundant_volatile_load(
+ ; CHECK: load volatile i32, i32* %P1
+ ; CHECK: load volatile i32, i32* %P1
+ ; CHECK: sub
+}
+
+; Can't DSE a volatile store
+define void @volatile_store(i32* %P1, i32* %P2) {
+; CHECK-LABEL: @volatile_store(
+; CHECK: store volatile
+; CHECK: store
+ store volatile i32 0, i32* %P1, align 4
+ store i32 3, i32* %P1, align 4
+ ret void
+}
+
+; Can't DSE a redundant volatile store
+define void @redundant_volatile_store(i32* %P1, i32* %P2) {
+; CHECK-LABEL: @redundant_volatile_store(
+; CHECK: store volatile
+; CHECK: store volatile
+ store volatile i32 0, i32* %P1, align 4
+ store volatile i32 0, i32* %P1, align 4
+ ret void
+}
+
+; Can value forward from volatiles
+define i32 @test20(i32* %P1, i32* %P2) {
+ %a = load volatile i32, i32* %P1, align 4
+ %b = load i32, i32* %P1, align 4
+ %res = sub i32 %a, %b
+ ret i32 %res
+ ; CHECK-LABEL: @test20(
+ ; CHECK: load volatile i32, i32* %P1
+ ; CHECK: ret i32 0
+}
+
+; We're currently conservative about widening
+define i64 @widen1(i32* %P1) {
+ ; CHECK-LABEL: @widen1(
+ ; CHECK: load atomic i32, i32* %P1
+ ; CHECK: load atomic i64, i64* %p2
+ %p2 = bitcast i32* %P1 to i64*
+ %a = load atomic i32, i32* %P1 unordered, align 4
+ %b = load atomic i64, i64* %p2 unordered, align 4
+ %a64 = sext i32 %a to i64
+ %res = sub i64 %a64, %b
+ ret i64 %res
+}
+
+; narrowing does work
+define i64 @narrow(i32* %P1) {
+ ; CHECK-LABEL: @narrow(
+ ; CHECK: load atomic i64, i64* %p2
+ ; CHECK-NOT: load atomic i32, i32* %P1
+ %p2 = bitcast i32* %P1 to i64*
+ %a64 = load atomic i64, i64* %p2 unordered, align 4
+ %b = load atomic i32, i32* %P1 unordered, align 4
+ %b64 = sext i32 %b to i64
+ %res = sub i64 %a64, %b64
+ ret i64 %res
+}
+
+; Missed optimization, we don't yet optimize ordered loads
+define i64 @narrow2(i32* %P1) {
+ ; CHECK-LABEL: @narrow2(
+ ; CHECK: load atomic i64, i64* %p2
+ ; CHECK: load atomic i32, i32* %P1
+ %p2 = bitcast i32* %P1 to i64*
+ %a64 = load atomic i64, i64* %p2 acquire, align 4
+ %b = load atomic i32, i32* %P1 acquire, align 4
+ %b64 = sext i32 %b to i64
+ %res = sub i64 %a64, %b64
+ ret i64 %res
+}
+
OpenPOWER on IntegriCloud