summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPhilip Reames <listmail@philipreames.com>2015-12-16 01:01:30 +0000
committerPhilip Reames <listmail@philipreames.com>2015-12-16 01:01:30 +0000
commitae1f265bf19858ada9c9b6761fba7f8e1f9e395a (patch)
treecde415aaca28e72cf9fdfab809176853ad4b8d85
parente2e332a50eee8adc091fab85514649529bc18db2 (diff)
downloadbcm5719-llvm-ae1f265bf19858ada9c9b6761fba7f8e1f9e395a.tar.gz
bcm5719-llvm-ae1f265bf19858ada9c9b6761fba7f8e1f9e395a.zip
[EarlyCSE] DSE of stores which write back loaded values
Extend EarlyCSE with an additional style of dead store elimination. If we write back a value just read from that memory location, we can eliminate the store under the assumption that the value hasn't changed. I'm implementing this mostly because I noticed the omission when looking at the code. It seemed strange to have InstCombine have a peephole which was more powerful than EarlyCSE. :) Differential Revision: http://reviews.llvm.org/D15397 llvm-svn: 255739
-rw-r--r--llvm/lib/Transforms/Scalar/EarlyCSE.cpp27
-rw-r--r--llvm/test/Transforms/EarlyCSE/basic.ll74
2 files changed, 101 insertions, 0 deletions
diff --git a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
index 6fa194e5709..03f8c05c9d3 100644
--- a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -687,6 +687,33 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
continue;
}
+ // write back DSE - If we write back the same value we just loaded from
+ // the same location and haven't passed any intervening writes or ordering
+ // operations, we can remove the write. The primary benefit is in allowing
+ // the available load table to remain valid and value forward past where
+ // the store originally was.
+ if (MemInst.isValid() && MemInst.isStore()) {
+ LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
+ if (InVal.Data &&
+ InVal.Data == getOrCreateResult(Inst, InVal.Data->getType()) &&
+ InVal.Generation == CurrentGeneration &&
+ InVal.MatchingId == MemInst.getMatchingId() &&
+ // We don't yet handle removing stores with ordering of any kind.
+ !MemInst.isVolatile() && MemInst.isUnordered()) {
+ assert((!LastStore ||
+ ParseMemoryInst(LastStore, TTI).getPointerOperand() ==
+ MemInst.getPointerOperand()) &&
+ "can't have an intervening store!");
+ DEBUG(dbgs() << "EarlyCSE DSE (writeback): " << *Inst << '\n');
+ Inst->eraseFromParent();
+ Changed = true;
+ ++NumDSE;
+ // We can avoid incrementing the generation count since we were able
+ // to eliminate this store.
+ continue;
+ }
+ }
+
// Okay, this isn't something we can CSE at all. Check to see if it is
// something that could modify memory. If so, our available memory values
// cannot be used so bump the generation count.
diff --git a/llvm/test/Transforms/EarlyCSE/basic.ll b/llvm/test/Transforms/EarlyCSE/basic.ll
index 43b5e6098f6..8c9b74b4d0e 100644
--- a/llvm/test/Transforms/EarlyCSE/basic.ll
+++ b/llvm/test/Transforms/EarlyCSE/basic.ll
@@ -203,3 +203,77 @@ define i32 @test12(i1 %B, i32* %P1, i32* %P2) {
; CHECK: load i32, i32* %P1
; CHECK: load i32, i32* %P1
}
+
+define void @dse1(i32 *%P) {
+; CHECK-LABEL: @dse1
+; CHECK-NOT: store
+ %v = load i32, i32* %P
+ store i32 %v, i32* %P
+ ret void
+}
+
+define void @dse2(i32 *%P) {
+; CHECK-LABEL: @dse2
+; CHECK-NOT: store
+ %v = load atomic i32, i32* %P seq_cst, align 4
+ store i32 %v, i32* %P
+ ret void
+}
+
+define void @dse3(i32 *%P) {
+; CHECK-LABEL: @dse3
+; CHECK-NOT: store
+ %v = load atomic i32, i32* %P seq_cst, align 4
+ store atomic i32 %v, i32* %P unordered, align 4
+ ret void
+}
+
+define i32 @dse4(i32 *%P, i32 *%Q) {
+; CHECK-LABEL: @dse4
+; CHECK-NOT: store
+; CHECK: ret i32 0
+ %a = load i32, i32* %Q
+ %v = load atomic i32, i32* %P unordered, align 4
+ store atomic i32 %v, i32* %P unordered, align 4
+ %b = load i32, i32* %Q
+ %res = sub i32 %a, %b
+ ret i32 %res
+}
+
+; Note that in this example, %P and %Q could in fact be the same
+; pointer. %v could be different than the value observed for %a
+; and that's okay because we're using relaxed memory ordering.
+; The only guarantee we have to provide is that each of the loads
+; has to observe some value written to that location. We do
+; not have to respect the order in which those writes were done.
+define i32 @dse5(i32 *%P, i32 *%Q) {
+; CHECK-LABEL: @dse5
+; CHECK-NOT: store
+; CHECK: ret i32 0
+ %v = load atomic i32, i32* %P unordered, align 4
+ %a = load atomic i32, i32* %Q unordered, align 4
+ store atomic i32 %v, i32* %P unordered, align 4
+ %b = load atomic i32, i32* %Q unordered, align 4
+ %res = sub i32 %a, %b
+ ret i32 %res
+}
+
+
+define void @dse_neg1(i32 *%P) {
+; CHECK-LABEL: @dse_neg1
+; CHECK: store
+ %v = load i32, i32* %P
+ store i32 5, i32* %P
+ ret void
+}
+
+; Could remove the store, but only if ordering was somehow
+; encoded.
+define void @dse_neg2(i32 *%P) {
+; CHECK-LABEL: @dse_neg2
+; CHECK: store
+ %v = load i32, i32* %P
+ store atomic i32 %v, i32* %P seq_cst, align 4
+ ret void
+}
+
OpenPOWER on IntegriCloud