diff options
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp | 62 |
1 files changed, 59 insertions, 3 deletions
diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index d6514735c87..72127bb0d6e 100644 --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -346,6 +346,7 @@ namespace { uint64_t cpyLen, unsigned cpyAlign, CallInst *C); bool processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep, uint64_t MSize); + bool processMemSetMemCpyDependence(MemCpyInst *M, MemSetInst *MDep); bool processByValArgument(CallSite CS, unsigned ArgNo); Instruction *tryMergingIntoMemset(Instruction *I, Value *StartPtr, Value *ByteVal); @@ -839,6 +840,53 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep, return true; } +/// We've found that the (upward scanning) memory dependence of \p MemCpy is +/// \p MemSet. Try to simplify \p MemSet to only set the trailing bytes that +/// weren't copied over by \p MemCpy. +/// +/// In other words, transform: +/// \code +/// memset(dst, c, dst_size); +/// memcpy(dst, src, src_size); +/// \endcode +/// into: +/// \code +/// memcpy(dst, src, src_size); +/// memset(dst + src_size, c, dst_size <= src_size ? 0 : dst_size - src_size); +/// \endcode +bool MemCpyOpt::processMemSetMemCpyDependence(MemCpyInst *MemCpy, + MemSetInst *MemSet) { + // We can only transform memset/memcpy with the same destination. + if (MemSet->getDest() != MemCpy->getDest()) + return false; + + Value *Dest = MemSet->getDest(); + Value *DestSize = MemSet->getLength(); + Value *SrcSize = MemCpy->getLength(); + + // By default, create an unaligned memset. + unsigned Align = 1; + // If Dest is aligned, and SrcSize is constant, use the minimum alignment + // of the sum. + const unsigned DestAlign = + std::max(MemSet->getAlignment(), MemCpy->getAlignment()); + if (DestAlign > 1) + if (ConstantInt *SrcSizeC = dyn_cast<ConstantInt>(SrcSize)) + Align = MinAlign(SrcSizeC->getZExtValue(), DestAlign); + + IRBuilder<> Builder(MemCpy->getNextNode()); + + Value *MemsetLen = + Builder.CreateSelect(Builder.CreateICmpULE(DestSize, SrcSize), + ConstantInt::getNullValue(DestSize->getType()), + Builder.CreateSub(DestSize, SrcSize)); + Builder.CreateMemSet(Builder.CreateGEP(Dest, SrcSize), MemSet->getOperand(1), + MemsetLen, Align); + + MD->removeInstruction(MemSet); + MemSet->eraseFromParent(); + return true; +} /// processMemCpy - perform simplification of memcpy's. If we have memcpy A /// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite @@ -869,6 +917,17 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) { return true; } + AliasAnalysis::Location SrcLoc = AliasAnalysis::getLocationForSource(M); + MemDepResult SrcDepInfo = MD->getPointerDependencyFrom(SrcLoc, true, + M, M->getParent()); + + // Try to turn a partially redundant memset + memcpy into + // memcpy + smaller memset. We don't need the memcpy size for this. + if (SrcDepInfo.isClobber()) + if (MemSetInst *MDep = dyn_cast<MemSetInst>(SrcDepInfo.getInst())) + if (processMemSetMemCpyDependence(M, MDep)) + return true; + // The optimizations after this point require the memcpy size. ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength()); if (!CopySize) return false; @@ -892,9 +951,6 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) { } } - AliasAnalysis::Location SrcLoc = AliasAnalysis::getLocationForSource(M); - MemDepResult SrcDepInfo = MD->getPointerDependencyFrom(SrcLoc, true, - M, M->getParent()); if (SrcDepInfo.isClobber()) { if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst())) return processMemCpyMemCpyDependence(M, MDep, CopySize->getZExtValue()); |

