summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAmaury Sechet <deadalnix@gmail.com>2016-03-14 22:52:27 +0000
committerAmaury Sechet <deadalnix@gmail.com>2016-03-14 22:52:27 +0000
commitbdb261b4c01b0bdd3fa22b60ffbe1318ab906630 (patch)
tree1313001e959433c0b87fe7cd419ef6ec9616a779
parentf50e1d8358103ef158d4ee6f9263e673d7515631 (diff)
downloadbcm5719-llvm-bdb261b4c01b0bdd3fa22b60ffbe1318ab906630.tar.gz
bcm5719-llvm-bdb261b4c01b0bdd3fa22b60ffbe1318ab906630.zip
Imporove load to store => memcpy
Summary: This now try to reorder instructions in order to help create the optimizable pattern. Reviewers: craig.topper, spatel, dexonsmith, Prazek, chandlerc, joker.eph, majnemer Differential Revision: http://reviews.llvm.org/D16523 llvm-svn: 263503
-rw-r--r--llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp117
-rw-r--r--llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll36
2 files changed, 128 insertions, 25 deletions
diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 3d84c3c9b1d..26359f98ed4 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
@@ -493,6 +494,92 @@ static unsigned findCommonAlignment(const DataLayout &DL, const StoreInst *SI,
return std::min(StoreAlign, LoadAlign);
}
+// This method try to lift a store instruction before position P.
+// It will lift the store and its argument + that anything that
+// lay alias with these.
+// The method returns true if it was successful.
+static bool moveUp(AliasAnalysis &AA, StoreInst *SI, Instruction *P) {
+ // If the store alias this position, early bail out.
+ MemoryLocation StoreLoc = MemoryLocation::get(SI);
+ if (AA.getModRefInfo(P, StoreLoc) != MRI_NoModRef)
+ return false;
+
+ // Keep track of the arguments of all instruction we plan to lift
+ // so we can make sure to lift them as well if apropriate.
+ DenseSet<Instruction*> Args;
+ if (auto *Ptr = dyn_cast<Instruction>(SI->getPointerOperand()))
+ if (Ptr->getParent() == SI->getParent())
+ Args.insert(Ptr);
+
+ // Instruction to lift before P.
+ SmallVector<Instruction*, 8> ToLift;
+
+ // Memory locations of lifted instructions.
+ SmallVector<MemoryLocation, 8> MemLocs;
+ MemLocs.push_back(StoreLoc);
+
+ // Lifted callsites.
+ SmallVector<ImmutableCallSite, 8> CallSites;
+
+ for (auto I = --SI->getIterator(), E = P->getIterator(); I != E; --I) {
+ auto *C = &*I;
+
+ bool MayAlias = AA.getModRefInfo(C) != MRI_NoModRef;
+
+ bool NeedLift = false;
+ if (Args.erase(C))
+ NeedLift = true;
+ else if (MayAlias) {
+ NeedLift = std::any_of(MemLocs.begin(), MemLocs.end(),
+ [C, &AA](const MemoryLocation &ML) {
+ return AA.getModRefInfo(C, ML);
+ });
+
+ if (!NeedLift)
+ NeedLift = std::any_of(CallSites.begin(), CallSites.end(),
+ [C, &AA](const ImmutableCallSite &CS) {
+ return AA.getModRefInfo(C, CS);
+ });
+ }
+
+ if (!NeedLift)
+ continue;
+
+ if (MayAlias) {
+ if (auto CS = ImmutableCallSite(C)) {
+ // If we can't lift this before P, it's game over.
+ if (AA.getModRefInfo(P, CS) != MRI_NoModRef)
+ return false;
+
+ CallSites.push_back(CS);
+ } else if (isa<LoadInst>(C) || isa<StoreInst>(C) || isa<VAArgInst>(C)) {
+ // If we can't lift this before P, it's game over.
+ auto ML = MemoryLocation::get(C);
+ if (AA.getModRefInfo(P, ML) != MRI_NoModRef)
+ return false;
+
+ MemLocs.push_back(ML);
+ } else
+ // We don't know how to lift this instruction.
+ return false;
+ }
+
+ ToLift.push_back(C);
+ for (unsigned k = 0, e = C->getNumOperands(); k != e; ++k)
+ if (auto *A = dyn_cast<Instruction>(C->getOperand(k)))
+ if (A->getParent() == SI->getParent())
+ Args.insert(A);
+ }
+
+ // We made it, we need to lift
+ for (auto *I : reverse(ToLift)) {
+ DEBUG(dbgs() << "Lifting " << *I << " before " << *P << "\n");
+ I->moveBefore(P);
+ }
+
+ return true;
+}
+
bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
if (!SI->isSimple()) return false;
@@ -522,26 +609,20 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
// such an instruction is found, we try to promote there instead
// of at the store position.
Instruction *P = SI;
- for (BasicBlock::iterator I = ++LI->getIterator(), E = SI->getIterator();
- I != E; ++I) {
- if (!(AA.getModRefInfo(&*I, LoadLoc) & MRI_Mod))
- continue;
-
- // We found an instruction that may write to the loaded memory.
- // We can try to promote at this position instead of the store
- // position if nothing alias the store memory after this and the store
- // destination is not in the range.
- P = &*I;
- for (; I != E; ++I) {
- MemoryLocation StoreLoc = MemoryLocation::get(SI);
- if (&*I == SI->getOperand(1) ||
- AA.getModRefInfo(&*I, StoreLoc) != MRI_NoModRef) {
- P = nullptr;
- break;
- }
+ for (auto &I : make_range(++LI->getIterator(), SI->getIterator())) {
+ if (AA.getModRefInfo(&I, LoadLoc) & MRI_Mod) {
+ P = &I;
+ break;
}
+ }
- break;
+ // We found an instruction that may write to the loaded memory.
+ // We can try to promote at this position instead of the store
+ // position if nothing alias the store memory after this and the store
+ // destination is not in the range.
+ if (P && P != SI) {
+ if (!moveUp(AA, SI, P))
+ P = nullptr;
}
// If a valid insertion position is found, then we can promote
diff --git a/llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll b/llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll
index c8a126848b0..0215431ac35 100644
--- a/llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll
+++ b/llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll
@@ -73,16 +73,38 @@ define void @copyalias(%S* %src, %S* %dst) {
ret void
}
-
-; The GEP is present after the aliasing store, preventing to move the memcpy before
-; (without further analysis/transformation)
-define void @copyaliaswithproducerinbetween(%S* %src, %S* %dst) {
-; CHECK-LABEL: copyalias
-; CHECK-NEXT: [[LOAD:%[a-z0-9\.]+]] = load %S, %S* %src
-; CHECK-NOT: call
+; If the store address is computed ina complex manner, make
+; sure we lift the computation as well if needed and possible.
+define void @addrproducer(%S* %src, %S* %dst) {
+; CHECK-LABEL: addrproducer
+; CHECK: %dst2 = getelementptr %S, %S* %dst, i64 1
+; CHECK: call void @llvm.memmove.p0i8.p0i8.i64
+; CHECK-NEXT: store %S undef, %S* %dst
+; CHECK-NEXT: ret void
%1 = load %S, %S* %src
store %S undef, %S* %dst
%dst2 = getelementptr %S , %S* %dst, i64 1
store %S %1, %S* %dst2
ret void
}
+
+define void @aliasaddrproducer(%S* %src, %S* %dst, i32* %dstidptr) {
+; CHECK-LABEL: aliasaddrproducer
+ %1 = load %S, %S* %src
+ store %S undef, %S* %dst
+ %dstindex = load i32, i32* %dstidptr
+ %dst2 = getelementptr %S , %S* %dst, i32 %dstindex
+ store %S %1, %S* %dst2
+ ret void
+}
+
+define void @noaliasaddrproducer(%S* %src, %S* noalias %dst, i32* noalias %dstidptr) {
+; CHECK-LABEL: noaliasaddrproducer
+ %1 = load %S, %S* %src
+ store %S undef, %S* %src
+ %2 = load i32, i32* %dstidptr
+ %dstindex = or i32 %2, 1
+ %dst2 = getelementptr %S , %S* %dst, i32 %dstindex
+ store %S %1, %S* %dst2
+ ret void
+}
OpenPOWER on IntegriCloud