diff options
author | Arnaud A. de Grandmaison <arnaud.degrandmaison@arm.com> | 2015-10-07 07:41:29 +0000 |
---|---|---|
committer | Arnaud A. de Grandmaison <arnaud.degrandmaison@arm.com> | 2015-10-07 07:41:29 +0000 |
commit | a6178a179d32e8dac6c0a8600e05cda69c32f4c7 (patch) | |
tree | b4ac361bdaaaa8d05c4cfe1fea5c820ad1eacd6f | |
parent | 259f1508f08f662eb2ecfab5155cd92e0c03eff8 (diff) | |
download | bcm5719-llvm-a6178a179d32e8dac6c0a8600e05cda69c32f4c7.tar.gz bcm5719-llvm-a6178a179d32e8dac6c0a8600e05cda69c32f4c7.zip |
[EarlyCSE] Fix handling of target memory intrinsics for CSE'ing loads.
Summary:
Some target intrinsics can access multiple elements, using the pointer as a
base address (e.g. AArch64 ld4). When trying to CSE such instructions,
it must be checked the available value comes from a compatible instruction
because the pointer is not enough to discriminate whether the value is
correct.
Reviewers: ssijaric
Subscribers: mcrosier, llvm-commits, aemerson
Differential Revision: http://reviews.llvm.org/D13475
llvm-svn: 249523
-rw-r--r-- | llvm/lib/Transforms/Scalar/EarlyCSE.cpp | 37 | ||||
-rw-r--r-- | llvm/test/Transforms/EarlyCSE/AArch64/ldstN.ll | 18 |
2 files changed, 41 insertions, 14 deletions
diff --git a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp index 9ac3c2cdab2..008b0e5ffb3 100644 --- a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp @@ -290,12 +290,19 @@ public: /// current generation count. The current generation count is incremented /// after every possibly writing memory operation, which ensures that we only /// CSE loads with other loads that have no intervening store. - typedef RecyclingAllocator< - BumpPtrAllocator, - ScopedHashTableVal<Value *, std::pair<Value *, unsigned>>> + struct LoadValue { + Value *data; + unsigned generation; + int matchingId; + LoadValue() : data(nullptr), generation(0), matchingId(-1) {} + LoadValue(Value *data, unsigned generation, unsigned matchingId) + : data(data), generation(generation), matchingId(matchingId) {} + }; + typedef RecyclingAllocator<BumpPtrAllocator, + ScopedHashTableVal<Value *, LoadValue>> LoadMapAllocator; - typedef ScopedHashTable<Value *, std::pair<Value *, unsigned>, - DenseMapInfo<Value *>, LoadMapAllocator> LoadHTType; + typedef ScopedHashTable<Value *, LoadValue, DenseMapInfo<Value *>, + LoadMapAllocator> LoadHTType; LoadHTType AvailableLoads; /// \brief A scoped hash table of the current values of read-only call @@ -560,13 +567,13 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { // If we have an available version of this load, and if it is the right // generation, replace this instruction. - std::pair<Value *, unsigned> InVal = - AvailableLoads.lookup(MemInst.getPtr()); - if (InVal.first != nullptr && InVal.second == CurrentGeneration) { - Value *Op = getOrCreateResult(InVal.first, Inst->getType()); + LoadValue InVal = AvailableLoads.lookup(MemInst.getPtr()); + if (InVal.data != nullptr && InVal.generation == CurrentGeneration && + InVal.matchingId == MemInst.getMatchingId()) { + Value *Op = getOrCreateResult(InVal.data, Inst->getType()); if (Op != nullptr) { DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << *Inst - << " to: " << *InVal.first << '\n'); + << " to: " << *InVal.data << '\n'); if (!Inst->use_empty()) Inst->replaceAllUsesWith(Op); Inst->eraseFromParent(); @@ -577,8 +584,9 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { } // Otherwise, remember that we have this instruction. - AvailableLoads.insert(MemInst.getPtr(), std::pair<Value *, unsigned>( - Inst, CurrentGeneration)); + AvailableLoads.insert( + MemInst.getPtr(), + LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId())); LastStore = nullptr; continue; } @@ -652,8 +660,9 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { // version of the pointer. It is safe to forward from volatile stores // to non-volatile loads, so we don't have to check for volatility of // the store. - AvailableLoads.insert(MemInst.getPtr(), std::pair<Value *, unsigned>( - Inst, CurrentGeneration)); + AvailableLoads.insert( + MemInst.getPtr(), + LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId())); // Remember that this was the last store we saw for DSE. if (!MemInst.isVolatile()) diff --git a/llvm/test/Transforms/EarlyCSE/AArch64/ldstN.ll b/llvm/test/Transforms/EarlyCSE/AArch64/ldstN.ll new file mode 100644 index 00000000000..cc1af31429e --- /dev/null +++ b/llvm/test/Transforms/EarlyCSE/AArch64/ldstN.ll @@ -0,0 +1,18 @@ +; RUN: opt -S -early-cse < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0v4i16(<4 x i16>*)
+
+; Although the store and the ld4 are using the same pointer, the
+; data can not be reused because ld4 accesses multiple elements.
+define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @foo() {
+entry:
+ store <4 x i16> undef, <4 x i16>* undef, align 8
+ %0 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0v4i16(<4 x i16>* undef)
+ ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %0
+; CHECK-LABEL: @foo(
+; CHECK: store
+; CHECK-NEXT: call
+; CHECK-NEXT: ret
+}
|