summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorDaniel Neilson <dneilson@azul.com>2017-06-16 14:43:59 +0000
committerDaniel Neilson <dneilson@azul.com>2017-06-16 14:43:59 +0000
commit3faabbbe85d51f5efb1f3f540fe7b81916144a2e (patch)
treed9e634f1be996698481053dd5b98d46045d33be1 /llvm/lib
parent5852c4c1089126316fe92ce4cbea43640c70e7ed (diff)
downloadbcm5719-llvm-3faabbbe85d51f5efb1f3f540fe7b81916144a2e.tar.gz
bcm5719-llvm-3faabbbe85d51f5efb1f3f540fe7b81916144a2e.zip
[Atomics] Rename and change prototype for atomic memcpy intrinsic
Summary: Background: http://lists.llvm.org/pipermail/llvm-dev/2017-May/112779.html This change is to alter the prototype for the atomic memcpy intrinsic. The prototype itself is being changed to more closely resemble the semantics and parameters of the llvm.memcpy intrinsic -- to ease later combination of the llvm.memcpy and atomic memcpy intrinsics. Furthermore, the name of the atomic memcpy intrinsic is being changed to make it clear that it is not a generic atomic memcpy, but specifically a memcpy is unordered atomic. Reviewers: reames, sanjoy, efriedma Reviewed By: reames Subscribers: mzolotukhin, anna, llvm-commits, skatkov Differential Revision: https://reviews.llvm.org/D33240 llvm-svn: 305558
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp24
-rw-r--r--llvm/lib/CodeGen/TargetLoweringBase.cpp28
-rw-r--r--llvm/lib/IR/IRBuilder.cpp15
-rw-r--r--llvm/lib/IR/Verifier.cpp31
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp121
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineInternal.h3
-rw-r--r--llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp24
7 files changed, 130 insertions, 116 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 3d42990667e..f9f431db55b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -4943,11 +4943,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
updateDAGForMaybeTailCall(MM);
return nullptr;
}
- case Intrinsic::memcpy_element_atomic: {
- SDValue Dst = getValue(I.getArgOperand(0));
- SDValue Src = getValue(I.getArgOperand(1));
- SDValue NumElements = getValue(I.getArgOperand(2));
- SDValue ElementSize = getValue(I.getArgOperand(3));
+ case Intrinsic::memcpy_element_unordered_atomic: {
+ const ElementUnorderedAtomicMemCpyInst &MI =
+ cast<ElementUnorderedAtomicMemCpyInst>(I);
+ SDValue Dst = getValue(MI.getRawDest());
+ SDValue Src = getValue(MI.getRawSource());
+ SDValue Length = getValue(MI.getLength());
// Emit a library call.
TargetLowering::ArgListTy Args;
@@ -4959,18 +4960,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
Entry.Node = Src;
Args.push_back(Entry);
- Entry.Ty = I.getArgOperand(2)->getType();
- Entry.Node = NumElements;
+ Entry.Ty = MI.getLength()->getType();
+ Entry.Node = Length;
Args.push_back(Entry);
- Entry.Ty = Type::getInt32Ty(*DAG.getContext());
- Entry.Node = ElementSize;
- Args.push_back(Entry);
-
- uint64_t ElementSizeConstant =
- cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
+ uint64_t ElementSizeConstant = MI.getElementSizeInBytes();
RTLIB::Libcall LibraryCall =
- RTLIB::getMEMCPY_ELEMENT_ATOMIC(ElementSizeConstant);
+ RTLIB::getMEMCPY_ELEMENT_UNORDERED_ATOMIC(ElementSizeConstant);
if (LibraryCall == RTLIB::UNKNOWN_LIBCALL)
report_fatal_error("Unsupported element size");
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 581cfaf6075..e9d38c10c86 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -374,11 +374,16 @@ static void InitLibcallNames(const char **Names, const Triple &TT) {
Names[RTLIB::MEMCPY] = "memcpy";
Names[RTLIB::MEMMOVE] = "memmove";
Names[RTLIB::MEMSET] = "memset";
- Names[RTLIB::MEMCPY_ELEMENT_ATOMIC_1] = "__llvm_memcpy_element_atomic_1";
- Names[RTLIB::MEMCPY_ELEMENT_ATOMIC_2] = "__llvm_memcpy_element_atomic_2";
- Names[RTLIB::MEMCPY_ELEMENT_ATOMIC_4] = "__llvm_memcpy_element_atomic_4";
- Names[RTLIB::MEMCPY_ELEMENT_ATOMIC_8] = "__llvm_memcpy_element_atomic_8";
- Names[RTLIB::MEMCPY_ELEMENT_ATOMIC_16] = "__llvm_memcpy_element_atomic_16";
+ Names[RTLIB::MEMCPY_ELEMENT_UNORDERED_ATOMIC_1] =
+ "__llvm_memcpy_element_unordered_atomic_1";
+ Names[RTLIB::MEMCPY_ELEMENT_UNORDERED_ATOMIC_2] =
+ "__llvm_memcpy_element_unordered_atomic_2";
+ Names[RTLIB::MEMCPY_ELEMENT_UNORDERED_ATOMIC_4] =
+ "__llvm_memcpy_element_unordered_atomic_4";
+ Names[RTLIB::MEMCPY_ELEMENT_UNORDERED_ATOMIC_8] =
+ "__llvm_memcpy_element_unordered_atomic_8";
+ Names[RTLIB::MEMCPY_ELEMENT_UNORDERED_ATOMIC_16] =
+ "__llvm_memcpy_element_unordered_atomic_16";
Names[RTLIB::UNWIND_RESUME] = "_Unwind_Resume";
Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1] = "__sync_val_compare_and_swap_1";
Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2] = "__sync_val_compare_and_swap_2";
@@ -781,22 +786,21 @@ RTLIB::Libcall RTLIB::getSYNC(unsigned Opc, MVT VT) {
return UNKNOWN_LIBCALL;
}
-RTLIB::Libcall RTLIB::getMEMCPY_ELEMENT_ATOMIC(uint64_t ElementSize) {
+RTLIB::Libcall RTLIB::getMEMCPY_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize) {
switch (ElementSize) {
case 1:
- return MEMCPY_ELEMENT_ATOMIC_1;
+ return MEMCPY_ELEMENT_UNORDERED_ATOMIC_1;
case 2:
- return MEMCPY_ELEMENT_ATOMIC_2;
+ return MEMCPY_ELEMENT_UNORDERED_ATOMIC_2;
case 4:
- return MEMCPY_ELEMENT_ATOMIC_4;
+ return MEMCPY_ELEMENT_UNORDERED_ATOMIC_4;
case 8:
- return MEMCPY_ELEMENT_ATOMIC_8;
+ return MEMCPY_ELEMENT_UNORDERED_ATOMIC_8;
case 16:
- return MEMCPY_ELEMENT_ATOMIC_16;
+ return MEMCPY_ELEMENT_UNORDERED_ATOMIC_16;
default:
return UNKNOWN_LIBCALL;
}
-
}
/// InitCmpLibcallCCs - Set default comparison libcall CC.
diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp
index 81b02946e1d..b7fa07c6ffa 100644
--- a/llvm/lib/IR/IRBuilder.cpp
+++ b/llvm/lib/IR/IRBuilder.cpp
@@ -134,18 +134,17 @@ CreateMemCpy(Value *Dst, Value *Src, Value *Size, unsigned Align,
return CI;
}
-CallInst *IRBuilderBase::CreateElementAtomicMemCpy(
- Value *Dst, Value *Src, Value *NumElements, uint32_t ElementSize,
- MDNode *TBAATag, MDNode *TBAAStructTag, MDNode *ScopeTag,
- MDNode *NoAliasTag) {
+CallInst *IRBuilderBase::CreateElementUnorderedAtomicMemCpy(
+ Value *Dst, Value *Src, Value *Size, uint32_t ElementSize, MDNode *TBAATag,
+ MDNode *TBAAStructTag, MDNode *ScopeTag, MDNode *NoAliasTag) {
Dst = getCastedInt8PtrValue(Dst);
Src = getCastedInt8PtrValue(Src);
- Value *Ops[] = {Dst, Src, NumElements, getInt32(ElementSize)};
- Type *Tys[] = {Dst->getType(), Src->getType()};
+ Value *Ops[] = {Dst, Src, Size, getInt32(ElementSize)};
+ Type *Tys[] = {Dst->getType(), Src->getType(), Size->getType()};
Module *M = BB->getParent()->getParent();
- Value *TheFn =
- Intrinsic::getDeclaration(M, Intrinsic::memcpy_element_atomic, Tys);
+ Value *TheFn = Intrinsic::getDeclaration(
+ M, Intrinsic::memcpy_element_unordered_atomic, Tys);
CallInst *CI = createCallHelper(TheFn, Ops, this);
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index cf54cc3d6ae..819f63520c7 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -4012,10 +4012,16 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) {
CS);
break;
}
- case Intrinsic::memcpy_element_atomic: {
- ConstantInt *ElementSizeCI = dyn_cast<ConstantInt>(CS.getArgOperand(3));
- Assert(ElementSizeCI, "element size of the element-wise atomic memory "
- "intrinsic must be a constant int",
+ case Intrinsic::memcpy_element_unordered_atomic: {
+ const ElementUnorderedAtomicMemCpyInst *MI =
+ cast<ElementUnorderedAtomicMemCpyInst>(CS.getInstruction());
+ ;
+
+ ConstantInt *ElementSizeCI =
+ dyn_cast<ConstantInt>(MI->getRawElementSizeInBytes());
+ Assert(ElementSizeCI,
+ "element size of the element-wise unordered atomic memory "
+ "intrinsic must be a constant int",
CS);
const APInt &ElementSizeVal = ElementSizeCI->getValue();
Assert(ElementSizeVal.isPowerOf2(),
@@ -4023,19 +4029,24 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) {
"must be a power of 2",
CS);
+ if (auto *LengthCI = dyn_cast<ConstantInt>(MI->getLength())) {
+ uint64_t Length = LengthCI->getZExtValue();
+ uint64_t ElementSize = MI->getElementSizeInBytes();
+ Assert((Length % ElementSize) == 0,
+ "constant length must be a multiple of the element size in the "
+ "element-wise atomic memory intrinsic",
+ CS);
+ }
+
auto IsValidAlignment = [&](uint64_t Alignment) {
return isPowerOf2_64(Alignment) && ElementSizeVal.ule(Alignment);
};
-
uint64_t DstAlignment = CS.getParamAlignment(0),
SrcAlignment = CS.getParamAlignment(1);
-
Assert(IsValidAlignment(DstAlignment),
- "incorrect alignment of the destination argument",
- CS);
+ "incorrect alignment of the destination argument", CS);
Assert(IsValidAlignment(SrcAlignment),
- "incorrect alignment of the source argument",
- CS);
+ "incorrect alignment of the source argument", CS);
break;
}
case Intrinsic::gcroot:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index d29ed49eca0..c0830a5d211 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -94,75 +94,80 @@ static Constant *getNegativeIsTrueBoolVec(ConstantDataVector *V) {
return ConstantVector::get(BoolVec);
}
-Instruction *
-InstCombiner::SimplifyElementAtomicMemCpy(ElementAtomicMemCpyInst *AMI) {
+Instruction *InstCombiner::SimplifyElementUnorderedAtomicMemCpy(
+ ElementUnorderedAtomicMemCpyInst *AMI) {
// Try to unfold this intrinsic into sequence of explicit atomic loads and
// stores.
// First check that number of elements is compile time constant.
- auto *NumElementsCI = dyn_cast<ConstantInt>(AMI->getNumElements());
- if (!NumElementsCI)
+ auto *LengthCI = dyn_cast<ConstantInt>(AMI->getLength());
+ if (!LengthCI)
return nullptr;
// Check that there are not too many elements.
- uint64_t NumElements = NumElementsCI->getZExtValue();
+ uint64_t LengthInBytes = LengthCI->getZExtValue();
+ uint32_t ElementSizeInBytes = AMI->getElementSizeInBytes();
+ uint64_t NumElements = LengthInBytes / ElementSizeInBytes;
if (NumElements >= UnfoldElementAtomicMemcpyMaxElements)
return nullptr;
- // Don't unfold into illegal integers
- uint64_t ElementSizeInBytes = AMI->getElementSizeInBytes() * 8;
- if (!getDataLayout().isLegalInteger(ElementSizeInBytes))
- return nullptr;
+ // Only expand if there are elements to copy.
+ if (NumElements > 0) {
+ // Don't unfold into illegal integers
+ uint64_t ElementSizeInBits = ElementSizeInBytes * 8;
+ if (!getDataLayout().isLegalInteger(ElementSizeInBits))
+ return nullptr;
- // Cast source and destination to the correct type. Intrinsic input arguments
- // are usually represented as i8*.
- // Often operands will be explicitly casted to i8* and we can just strip
- // those casts instead of inserting new ones. However it's easier to rely on
- // other InstCombine rules which will cover trivial cases anyway.
- Value *Src = AMI->getRawSource();
- Value *Dst = AMI->getRawDest();
- Type *ElementPointerType = Type::getIntNPtrTy(
- AMI->getContext(), ElementSizeInBytes, Src->getType()->getPointerAddressSpace());
-
- Value *SrcCasted = Builder->CreatePointerCast(Src, ElementPointerType,
- "memcpy_unfold.src_casted");
- Value *DstCasted = Builder->CreatePointerCast(Dst, ElementPointerType,
- "memcpy_unfold.dst_casted");
-
- for (uint64_t i = 0; i < NumElements; ++i) {
- // Get current element addresses
- ConstantInt *ElementIdxCI =
- ConstantInt::get(AMI->getContext(), APInt(64, i));
- Value *SrcElementAddr =
- Builder->CreateGEP(SrcCasted, ElementIdxCI, "memcpy_unfold.src_addr");
- Value *DstElementAddr =
- Builder->CreateGEP(DstCasted, ElementIdxCI, "memcpy_unfold.dst_addr");
-
- // Load from the source. Transfer alignment information and mark load as
- // unordered atomic.
- LoadInst *Load = Builder->CreateLoad(SrcElementAddr, "memcpy_unfold.val");
- Load->setOrdering(AtomicOrdering::Unordered);
- // We know alignment of the first element. It is also guaranteed by the
- // verifier that element size is less or equal than first element alignment
- // and both of this values are powers of two.
- // This means that all subsequent accesses are at least element size
- // aligned.
- // TODO: We can infer better alignment but there is no evidence that this
- // will matter.
- Load->setAlignment(i == 0 ? AMI->getSrcAlignment()
- : AMI->getElementSizeInBytes());
- Load->setDebugLoc(AMI->getDebugLoc());
-
- // Store loaded value via unordered atomic store.
- StoreInst *Store = Builder->CreateStore(Load, DstElementAddr);
- Store->setOrdering(AtomicOrdering::Unordered);
- Store->setAlignment(i == 0 ? AMI->getDstAlignment()
- : AMI->getElementSizeInBytes());
- Store->setDebugLoc(AMI->getDebugLoc());
+ // Cast source and destination to the correct type. Intrinsic input
+ // arguments are usually represented as i8*. Often operands will be
+ // explicitly casted to i8* and we can just strip those casts instead of
+ // inserting new ones. However it's easier to rely on other InstCombine
+ // rules which will cover trivial cases anyway.
+ Value *Src = AMI->getRawSource();
+ Value *Dst = AMI->getRawDest();
+ Type *ElementPointerType =
+ Type::getIntNPtrTy(AMI->getContext(), ElementSizeInBits,
+ Src->getType()->getPointerAddressSpace());
+
+ Value *SrcCasted = Builder->CreatePointerCast(Src, ElementPointerType,
+ "memcpy_unfold.src_casted");
+ Value *DstCasted = Builder->CreatePointerCast(Dst, ElementPointerType,
+ "memcpy_unfold.dst_casted");
+
+ for (uint64_t i = 0; i < NumElements; ++i) {
+ // Get current element addresses
+ ConstantInt *ElementIdxCI =
+ ConstantInt::get(AMI->getContext(), APInt(64, i));
+ Value *SrcElementAddr =
+ Builder->CreateGEP(SrcCasted, ElementIdxCI, "memcpy_unfold.src_addr");
+ Value *DstElementAddr =
+ Builder->CreateGEP(DstCasted, ElementIdxCI, "memcpy_unfold.dst_addr");
+
+ // Load from the source. Transfer alignment information and mark load as
+ // unordered atomic.
+ LoadInst *Load = Builder->CreateLoad(SrcElementAddr, "memcpy_unfold.val");
+ Load->setOrdering(AtomicOrdering::Unordered);
+ // We know alignment of the first element. It is also guaranteed by the
+ // verifier that element size is less or equal than first element
+ // alignment and both of this values are powers of two. This means that
+ // all subsequent accesses are at least element size aligned.
+ // TODO: We can infer better alignment but there is no evidence that this
+ // will matter.
+ Load->setAlignment(i == 0 ? AMI->getParamAlignment(1)
+ : ElementSizeInBytes);
+ Load->setDebugLoc(AMI->getDebugLoc());
+
+ // Store loaded value via unordered atomic store.
+ StoreInst *Store = Builder->CreateStore(Load, DstElementAddr);
+ Store->setOrdering(AtomicOrdering::Unordered);
+ Store->setAlignment(i == 0 ? AMI->getParamAlignment(0)
+ : ElementSizeInBytes);
+ Store->setDebugLoc(AMI->getDebugLoc());
+ }
}
// Set the number of elements of the copy to 0, it will be deleted on the
// next iteration.
- AMI->setNumElements(Constant::getNullValue(NumElementsCI->getType()));
+ AMI->setLength(Constant::getNullValue(LengthCI->getType()));
return AMI;
}
@@ -1888,12 +1893,12 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
if (Changed) return II;
}
- if (auto *AMI = dyn_cast<ElementAtomicMemCpyInst>(II)) {
- if (Constant *C = dyn_cast<Constant>(AMI->getNumElements()))
+ if (auto *AMI = dyn_cast<ElementUnorderedAtomicMemCpyInst>(II)) {
+ if (Constant *C = dyn_cast<Constant>(AMI->getLength()))
if (C->isNullValue())
return eraseInstFromFunction(*AMI);
- if (Instruction *I = SimplifyElementAtomicMemCpy(AMI))
+ if (Instruction *I = SimplifyElementUnorderedAtomicMemCpy(AMI))
return I;
}
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 8c5aeb2a0cb..1a7db146df4 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -726,7 +726,8 @@ private:
Instruction *MatchBSwap(BinaryOperator &I);
bool SimplifyStoreAtEndOfBlock(StoreInst &SI);
- Instruction *SimplifyElementAtomicMemCpy(ElementAtomicMemCpyInst *AMI);
+ Instruction *
+ SimplifyElementUnorderedAtomicMemCpy(ElementUnorderedAtomicMemCpyInst *AMI);
Instruction *SimplifyMemTransfer(MemIntrinsic *MI);
Instruction *SimplifyMemSet(MemSetInst *MI);
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index b706152f30c..8b435050ac7 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -983,21 +983,21 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
const SCEV *NumBytesS =
SE->getAddExpr(BECount, SE->getOne(IntPtrTy), SCEV::FlagNUW);
+ if (StoreSize != 1)
+ NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtrTy, StoreSize),
+ SCEV::FlagNUW);
+
+ Value *NumBytes =
+ Expander.expandCodeFor(NumBytesS, IntPtrTy, Preheader->getTerminator());
+
unsigned Align = std::min(SI->getAlignment(), LI->getAlignment());
CallInst *NewCall = nullptr;
// Check whether to generate an unordered atomic memcpy:
// If the load or store are atomic, then they must neccessarily be unordered
// by previous checks.
- if (!SI->isAtomic() && !LI->isAtomic()) {
- if (StoreSize != 1)
- NumBytesS = SE->getMulExpr(
- NumBytesS, SE->getConstant(IntPtrTy, StoreSize), SCEV::FlagNUW);
-
- Value *NumBytes =
- Expander.expandCodeFor(NumBytesS, IntPtrTy, Preheader->getTerminator());
-
+ if (!SI->isAtomic() && !LI->isAtomic())
NewCall = Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes, Align);
- } else {
+ else {
// We cannot allow unaligned ops for unordered load/store, so reject
// anything where the alignment isn't at least the element size.
if (Align < StoreSize)
@@ -1010,11 +1010,9 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
if (StoreSize > TTI->getAtomicMemIntrinsicMaxElementSize())
return false;
- Value *NumElements =
- Expander.expandCodeFor(NumBytesS, IntPtrTy, Preheader->getTerminator());
+ NewCall = Builder.CreateElementUnorderedAtomicMemCpy(
+ StoreBasePtr, LoadBasePtr, NumBytes, StoreSize);
- NewCall = Builder.CreateElementAtomicMemCpy(StoreBasePtr, LoadBasePtr,
- NumElements, StoreSize);
// Propagate alignment info onto the pointer args. Note that unordered
// atomic loads/stores are *required* by the spec to have an alignment
// but non-atomic loads/stores may not.
OpenPOWER on IntegriCloud