diff options
| author | Nirav Dave <niravd@google.com> | 2018-01-09 15:23:12 +0000 |
|---|---|---|
| committer | Nirav Dave <niravd@google.com> | 2018-01-09 15:23:12 +0000 |
| commit | 30304a3bd720972071debe43fdbe32befbd9fdea (patch) | |
| tree | 96047a2e5c51d1ac29eb6203010f13602e9d8789 | |
| parent | 5732c0e6ead6aba10b6403ee3ca17df469fd0bd6 (diff) | |
| download | bcm5719-llvm-30304a3bd720972071debe43fdbe32befbd9fdea.tar.gz bcm5719-llvm-30304a3bd720972071debe43fdbe32befbd9fdea.zip | |
[DAG] Elide overlapping stores
Relanding after fixing handling of pre-indexed memory operations in
BaseIndexOffset analysis (r322003).
Extend overlapping store elision to handle overwrites of stores by
larger stores.
Reviewers: craig.topper, rnk, t.p.northover
Subscribers: javed.absar, hiraditya, llvm-commits
Differential Revision: https://reviews.llvm.org/D40969
llvm-svn: 322085
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 41 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AArch64/ldst-paired-aliasing.ll | 5 |
2 files changed, 22 insertions, 24 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 58b1dd92b33..f229be15c5d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -13798,30 +13798,29 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { } } - if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) { - if (ST->isUnindexed() && !ST->isVolatile() && ST1->isUnindexed() && - !ST1->isVolatile() && ST1->getBasePtr() == Ptr && - ST->getMemoryVT() == ST1->getMemoryVT()) { - // If this is a store followed by a store with the same value to the same - // location, then the store is dead/noop. - if (ST1->getValue() == Value) { - // The store is dead, remove it. - return Chain; - } - - // If this is a store who's preceeding store to the same location - // and no one other node is chained to that store we can effectively - // drop the store. Do not remove stores to undef as they may be used as - // data sinks. - if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() && - !ST1->getBasePtr().isUndef()) { - // ST1 is fully overwritten and can be elided. Combine with it's chain - // value. + // Deal with elidable overlapping chained stores. + if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) + if (OptLevel != CodeGenOpt::None && ST->isUnindexed() && + ST1->isUnindexed() && !ST1->isVolatile() && ST1->hasOneUse() && + !ST1->getBasePtr().isUndef() && !ST->isVolatile()) { + BaseIndexOffset STBasePtr = BaseIndexOffset::match(ST, DAG); + BaseIndexOffset ST1BasePtr = BaseIndexOffset::match(ST1, DAG); + unsigned STBytes = ST->getMemoryVT().getStoreSize(); + unsigned ST1Bytes = ST1->getMemoryVT().getStoreSize(); + int64_t PtrDiff; + // If this is a store who's preceeding store to a subset of the same + // memory and no one other node is chained to that store we can + // effectively drop the store. Do not remove stores to undef as they may + // be used as data sinks. + + if (((ST->getBasePtr() == ST1->getBasePtr()) && + (ST->getValue() == ST1->getValue())) || + (STBasePtr.equalBaseIndex(ST1BasePtr, DAG, PtrDiff) && + (0 <= PtrDiff) && (PtrDiff + ST1Bytes <= STBytes))) { CombineTo(ST1, ST1->getChain()); - return SDValue(); + return SDValue(N, 0); } } - } // If this is an FP_ROUND or TRUNC followed by a store, fold this into a // truncating store. We can do this even if this is already a truncstore. diff --git a/llvm/test/CodeGen/AArch64/ldst-paired-aliasing.ll b/llvm/test/CodeGen/AArch64/ldst-paired-aliasing.ll index 9c698b5fdcc..9b0b51d369a 100644 --- a/llvm/test/CodeGen/AArch64/ldst-paired-aliasing.ll +++ b/llvm/test/CodeGen/AArch64/ldst-paired-aliasing.ll @@ -10,11 +10,10 @@ declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #3 define i32 @main() local_unnamed_addr #1 { ; Make sure the stores happen in the correct order (the exact instructions could change). ; CHECK-LABEL: main: -; CHECK: stp xzr, xzr, [sp, #72] +; CHECK: str xzr, [sp, #80] ; CHECK: str w9, [sp, #80] -; CHECK: str q0, [sp, #48] +; CHECK: stp q0, q0, [sp, #48] ; CHECK: ldr w8, [sp, #48] -; CHECK: str q0, [sp, #64] for.body.lr.ph.i.i.i.i.i.i63: %b1 = alloca [10 x i32], align 16 |

