diff options
| author | Nikita Popov <nikita.ppv@gmail.com> | 2018-12-07 21:16:58 +0000 |
|---|---|---|
| committer | Nikita Popov <nikita.ppv@gmail.com> | 2018-12-07 21:16:58 +0000 |
| commit | 94b8e2ea4ec9246434181e152558cbc2c1c3c7d8 (patch) | |
| tree | b217ad259cdcfc0dece662c7c9fa4dbe06baf8ca /llvm/lib | |
| parent | 4ca00df57189d95b282cfc6296a51bc1058e670a (diff) | |
| download | bcm5719-llvm-94b8e2ea4ec9246434181e152558cbc2c1c3c7d8.tar.gz bcm5719-llvm-94b8e2ea4ec9246434181e152558cbc2c1c3c7d8.zip | |
[MemCpyOpt] memset->memcpy forwarding with undef tail
Currently memcpyopt optimizes cases like
memset(a, byte, N);
memcpy(b, a, M);
to
memset(a, byte, N);
memset(b, byte, M);
if M <= N. Often this allows further simplifications down the line,
which drop the first memset entirely.
This patch extends this optimization for the case where M > N, but we
know that the bytes a[N..M] are undef due to alloca/lifetime.start.
This situation arises relatively often for Rust code, because Rust does
not initialize trailing structure padding and loves to insert redundant
memcpys. This also fixes https://bugs.llvm.org/show_bug.cgi?id=39844.
For the implementation, I'm reusing a bit of code for a similar existing
optimization (direct memcpy of undef). I've also added memset support to
MemDepAnalysis GetLocation -- Instead, getPointerDependencyFrom could be
used, but it seems to make more sense to add this to GetLocation and thus
make the computation cachable.
Differential Revision: https://reviews.llvm.org/D55120
llvm-svn: 348645
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Analysis/MemoryDependenceAnalysis.cpp | 6 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp | 46 |
2 files changed, 36 insertions, 16 deletions
diff --git a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp index 2fe012d3865..090755960c8 100644 --- a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -154,6 +154,12 @@ static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc, return ModRefInfo::Mod; } + if (const MemSetInst *MI = dyn_cast<MemSetInst>(Inst)) { + Loc = MemoryLocation::getForDest(MI); + // Conversatively assume ModRef for volatile memset. + return MI->isVolatile() ? ModRefInfo::ModRef : ModRefInfo::Mod; + } + if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { switch (II->getIntrinsicID()) { case Intrinsic::lifetime_start: diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 4e82e2bd42c..fa44cd9e7df 100644 --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -1144,6 +1144,21 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy, return true; } +/// Determine whether the instruction has undefined content for the given Size, +/// either because it was freshly alloca'd or started its lifetime. +static bool hasUndefContents(Instruction *I, ConstantInt *Size) { + if (isa<AllocaInst>(I)) + return true; + + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) + if (II->getIntrinsicID() == Intrinsic::lifetime_start) + if (ConstantInt *LTSize = dyn_cast<ConstantInt>(II->getArgOperand(0))) + if (LTSize->getZExtValue() >= Size->getZExtValue()) + return true; + + return false; +} + /// Transform memcpy to memset when its source was just memset. /// In other words, turn: /// \code @@ -1167,12 +1182,23 @@ bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy, if (!AA.isMustAlias(MemSet->getRawDest(), MemCpy->getRawSource())) return false; - ConstantInt *CopySize = cast<ConstantInt>(MemCpy->getLength()); + // A known memset size is required. ConstantInt *MemSetSize = dyn_cast<ConstantInt>(MemSet->getLength()); + if (!MemSetSize) + return false; + // Make sure the memcpy doesn't read any more than what the memset wrote. // Don't worry about sizes larger than i64. - if (!MemSetSize || CopySize->getZExtValue() > MemSetSize->getZExtValue()) - return false; + ConstantInt *CopySize = cast<ConstantInt>(MemCpy->getLength()); + if (CopySize->getZExtValue() > MemSetSize->getZExtValue()) { + // If the memcpy is larger than the memset, but the memory was undef prior + // to the memset, we can just ignore the tail. + MemDepResult DepInfo = MD->getDependency(MemSet); + if (DepInfo.isDef() && hasUndefContents(DepInfo.getInst(), CopySize)) + CopySize = MemSetSize; + else + return false; + } IRBuilder<> Builder(MemCpy); Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1), @@ -1252,19 +1278,7 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M) { if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst())) return processMemCpyMemCpyDependence(M, MDep); } else if (SrcDepInfo.isDef()) { - Instruction *I = SrcDepInfo.getInst(); - bool hasUndefContents = false; - - if (isa<AllocaInst>(I)) { - hasUndefContents = true; - } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { - if (II->getIntrinsicID() == Intrinsic::lifetime_start) - if (ConstantInt *LTSize = dyn_cast<ConstantInt>(II->getArgOperand(0))) - if (LTSize->getZExtValue() >= CopySize->getZExtValue()) - hasUndefContents = true; - } - - if (hasUndefContents) { + if (hasUndefContents(SrcDepInfo.getInst(), CopySize)) { MD->removeInstruction(M); M->eraseFromParent(); ++NumMemCpyInstr; |

