diff options
| author | Igor Laevsky <igmyrj@gmail.com> | 2017-02-08 14:32:04 +0000 |
|---|---|---|
| committer | Igor Laevsky <igmyrj@gmail.com> | 2017-02-08 14:32:04 +0000 |
| commit | 900ffa34c86fbda8909c3550c693f62856325400 (patch) | |
| tree | 713f07f3692cbeb1b4d148aab5d145aa47d1432d /llvm/lib/Transforms | |
| parent | 4b317fa24e6cf645852c9bf69a6817bb5deeb1c9 (diff) | |
| download | bcm5719-llvm-900ffa34c86fbda8909c3550c693f62856325400.tar.gz bcm5719-llvm-900ffa34c86fbda8909c3550c693f62856325400.zip | |
[InstCombineCalls] Unfold element atomic memcpy instruction
Differential Revision: https://reviews.llvm.org/D28909
llvm-svn: 294453
Diffstat (limited to 'llvm/lib/Transforms')
| -rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 81 | ||||
| -rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineInternal.h | 2 |
2 files changed, 83 insertions, 0 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 13f2f9e9c72..23f8035e2aa 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -60,6 +60,12 @@ using namespace PatternMatch; STATISTIC(NumSimplified, "Number of library calls simplified"); +static cl::opt<uint64_t> UnfoldElementAtomicMemcpyMaxElements( + "unfold-element-atomic-memcpy-max-elements", + cl::init(16), + cl::desc("Maximum number of elements in atomic memcpy the optimizer is " + "allowed to unfold")); + /// Return the specified type promoted as it would be to pass though a va_arg /// area. static Type *getPromotedType(Type *Ty) { @@ -108,6 +114,78 @@ static Constant *getNegativeIsTrueBoolVec(ConstantDataVector *V) { return ConstantVector::get(BoolVec); } +Instruction * +InstCombiner::SimplifyElementAtomicMemCpy(ElementAtomicMemCpyInst *AMI) { + // Try to unfold this intrinsic into sequence of explicit atomic loads and + // stores. + // First check that number of elements is compile time constant. + auto *NumElementsCI = dyn_cast<ConstantInt>(AMI->getNumElements()); + if (!NumElementsCI) + return nullptr; + + // Check that there are not too many elements. + uint64_t NumElements = NumElementsCI->getZExtValue(); + if (NumElements >= UnfoldElementAtomicMemcpyMaxElements) + return nullptr; + + // Don't unfold into illegal integers + uint64_t ElementSizeInBytes = AMI->getElementSizeInBytes() * 8; + if (!getDataLayout().isLegalInteger(ElementSizeInBytes)) + return nullptr; + + // Cast source and destination to the correct type. Intrinsic input arguments + // are usually represented as i8*. + // Often operands will be explicitly casted to i8* and we can just strip + // those casts instead of inserting new ones. However it's easier to rely on + // other InstCombine rules which will cover trivial cases anyway. + Value *Src = AMI->getRawSource(); + Value *Dst = AMI->getRawDest(); + Type *ElementPointerType = Type::getIntNPtrTy( + AMI->getContext(), ElementSizeInBytes, Src->getType()->getPointerAddressSpace()); + + Value *SrcCasted = Builder->CreatePointerCast(Src, ElementPointerType, + "memcpy_unfold.src_casted"); + Value *DstCasted = Builder->CreatePointerCast(Dst, ElementPointerType, + "memcpy_unfold.dst_casted"); + + for (uint64_t i = 0; i < NumElements; ++i) { + // Get current element addresses + ConstantInt *ElementIdxCI = + ConstantInt::get(AMI->getContext(), APInt(64, i)); + Value *SrcElementAddr = + Builder->CreateGEP(SrcCasted, ElementIdxCI, "memcpy_unfold.src_addr"); + Value *DstElementAddr = + Builder->CreateGEP(DstCasted, ElementIdxCI, "memcpy_unfold.dst_addr"); + + // Load from the source. Transfer alignment information and mark load as + // unordered atomic. + LoadInst *Load = Builder->CreateLoad(SrcElementAddr, "memcpy_unfold.val"); + Load->setOrdering(AtomicOrdering::Unordered); + // We know alignment of the first element. It is also guaranteed by the + // verifier that element size is less or equal than first element alignment + // and both of this values are powers of two. + // This means that all subsequent accesses are at least element size + // aligned. + // TODO: We can infer better alignment but there is no evidence that this + // will matter. + Load->setAlignment(i == 0 ? AMI->getSrcAlignment() + : AMI->getElementSizeInBytes()); + Load->setDebugLoc(AMI->getDebugLoc()); + + // Store loaded value via unordered atomic store. + StoreInst *Store = Builder->CreateStore(Load, DstElementAddr); + Store->setOrdering(AtomicOrdering::Unordered); + Store->setAlignment(i == 0 ? AMI->getDstAlignment() + : AMI->getElementSizeInBytes()); + Store->setDebugLoc(AMI->getDebugLoc()); + } + + // Set the number of elements of the copy to 0, it will be deleted on the + // next iteration. + AMI->setNumElements(Constant::getNullValue(NumElementsCI->getType())); + return AMI; +} + Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), DL, MI, &AC, &DT); unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), DL, MI, &AC, &DT); @@ -1839,6 +1917,9 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (Constant *C = dyn_cast<Constant>(AMI->getNumElements())) if (C->isNullValue()) return eraseInstFromFunction(*AMI); + + if (Instruction *I = SimplifyElementAtomicMemCpy(AMI)) + return I; } if (Instruction *I = SimplifyNVVMIntrinsic(II, *this)) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index 9e04f5d7cb0..68bae0e5200 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -650,6 +650,8 @@ private: Instruction *PromoteCastOfAllocation(BitCastInst &CI, AllocaInst &AI); Instruction *MatchBSwap(BinaryOperator &I); bool SimplifyStoreAtEndOfBlock(StoreInst &SI); + + Instruction *SimplifyElementAtomicMemCpy(ElementAtomicMemCpyInst *AMI); Instruction *SimplifyMemTransfer(MemIntrinsic *MI); Instruction *SimplifyMemSet(MemSetInst *MI); |

