summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorNikita Popov <nikita.ppv@gmail.com>2018-12-07 21:16:58 +0000
committerNikita Popov <nikita.ppv@gmail.com>2018-12-07 21:16:58 +0000
commit94b8e2ea4ec9246434181e152558cbc2c1c3c7d8 (patch)
treeb217ad259cdcfc0dece662c7c9fa4dbe06baf8ca /llvm/lib
parent4ca00df57189d95b282cfc6296a51bc1058e670a (diff)
downloadbcm5719-llvm-94b8e2ea4ec9246434181e152558cbc2c1c3c7d8.tar.gz
bcm5719-llvm-94b8e2ea4ec9246434181e152558cbc2c1c3c7d8.zip
[MemCpyOpt] memset->memcpy forwarding with undef tail
Currently memcpyopt optimizes cases like memset(a, byte, N); memcpy(b, a, M); to memset(a, byte, N); memset(b, byte, M); if M <= N. Often this allows further simplifications down the line, which drop the first memset entirely. This patch extends this optimization for the case where M > N, but we know that the bytes a[N..M] are undef due to alloca/lifetime.start. This situation arises relatively often for Rust code, because Rust does not initialize trailing structure padding and loves to insert redundant memcpys. This also fixes https://bugs.llvm.org/show_bug.cgi?id=39844. For the implementation, I'm reusing a bit of code for a similar existing optimization (direct memcpy of undef). I've also added memset support to MemDepAnalysis GetLocation -- Instead, getPointerDependencyFrom could be used, but it seems to make more sense to add this to GetLocation and thus make the computation cachable. Differential Revision: https://reviews.llvm.org/D55120 llvm-svn: 348645
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Analysis/MemoryDependenceAnalysis.cpp6
-rw-r--r--llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp46
2 files changed, 36 insertions, 16 deletions
diff --git a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
index 2fe012d3865..090755960c8 100644
--- a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -154,6 +154,12 @@ static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc,
return ModRefInfo::Mod;
}
+ if (const MemSetInst *MI = dyn_cast<MemSetInst>(Inst)) {
+ Loc = MemoryLocation::getForDest(MI);
+ // Conversatively assume ModRef for volatile memset.
+ return MI->isVolatile() ? ModRefInfo::ModRef : ModRefInfo::Mod;
+ }
+
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
switch (II->getIntrinsicID()) {
case Intrinsic::lifetime_start:
diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 4e82e2bd42c..fa44cd9e7df 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -1144,6 +1144,21 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
return true;
}
+/// Determine whether the instruction has undefined content for the given Size,
+/// either because it was freshly alloca'd or started its lifetime.
+static bool hasUndefContents(Instruction *I, ConstantInt *Size) {
+ if (isa<AllocaInst>(I))
+ return true;
+
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start)
+ if (ConstantInt *LTSize = dyn_cast<ConstantInt>(II->getArgOperand(0)))
+ if (LTSize->getZExtValue() >= Size->getZExtValue())
+ return true;
+
+ return false;
+}
+
/// Transform memcpy to memset when its source was just memset.
/// In other words, turn:
/// \code
@@ -1167,12 +1182,23 @@ bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
if (!AA.isMustAlias(MemSet->getRawDest(), MemCpy->getRawSource()))
return false;
- ConstantInt *CopySize = cast<ConstantInt>(MemCpy->getLength());
+ // A known memset size is required.
ConstantInt *MemSetSize = dyn_cast<ConstantInt>(MemSet->getLength());
+ if (!MemSetSize)
+ return false;
+
// Make sure the memcpy doesn't read any more than what the memset wrote.
// Don't worry about sizes larger than i64.
- if (!MemSetSize || CopySize->getZExtValue() > MemSetSize->getZExtValue())
- return false;
+ ConstantInt *CopySize = cast<ConstantInt>(MemCpy->getLength());
+ if (CopySize->getZExtValue() > MemSetSize->getZExtValue()) {
+ // If the memcpy is larger than the memset, but the memory was undef prior
+ // to the memset, we can just ignore the tail.
+ MemDepResult DepInfo = MD->getDependency(MemSet);
+ if (DepInfo.isDef() && hasUndefContents(DepInfo.getInst(), CopySize))
+ CopySize = MemSetSize;
+ else
+ return false;
+ }
IRBuilder<> Builder(MemCpy);
Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1),
@@ -1252,19 +1278,7 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M) {
if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst()))
return processMemCpyMemCpyDependence(M, MDep);
} else if (SrcDepInfo.isDef()) {
- Instruction *I = SrcDepInfo.getInst();
- bool hasUndefContents = false;
-
- if (isa<AllocaInst>(I)) {
- hasUndefContents = true;
- } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
- if (II->getIntrinsicID() == Intrinsic::lifetime_start)
- if (ConstantInt *LTSize = dyn_cast<ConstantInt>(II->getArgOperand(0)))
- if (LTSize->getZExtValue() >= CopySize->getZExtValue())
- hasUndefContents = true;
- }
-
- if (hasUndefContents) {
+ if (hasUndefContents(SrcDepInfo.getInst(), CopySize)) {
MD->removeInstruction(M);
M->eraseFromParent();
++NumMemCpyInstr;
OpenPOWER on IntegriCloud