diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Analysis/TargetTransformInfo.cpp | 12 | ||||
-rw-r--r-- | llvm/lib/CodeGen/BasicTargetTransformInfo.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86TargetTransformInfo.cpp | 57 |
3 files changed, 73 insertions, 0 deletions
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index e3317851714..9184842052b 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -221,6 +221,13 @@ unsigned TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src, return PrevTTI->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace); } +unsigned +TargetTransformInfo::getMaskedMemoryOpCost(unsigned Opcode, Type *Src, + unsigned Alignment, + unsigned AddressSpace) const { + return PrevTTI->getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace); +} + unsigned TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, @@ -623,6 +630,11 @@ struct NoTTI final : ImmutablePass, TargetTransformInfo { return 1; } + unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, + unsigned AddressSpace) const override { + return 1; + } + unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef<Type*> Tys) const override { return 1; diff --git a/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp b/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp index 72da80646c5..4e962b469fb 100644 --- a/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp +++ b/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp @@ -582,6 +582,10 @@ unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: return 0; + case Intrinsic::masked_store: + return TopTTI->getMaskedMemoryOpCost(Instruction::Store, Tys[0], 0, 0); + case Intrinsic::masked_load: + return TopTTI->getMaskedMemoryOpCost(Instruction::Load, RetTy, 0, 0); } const TargetLoweringBase *TLI = getTLI(); diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 67488f7ad79..9d7f1238fff 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -96,6 +96,9 @@ public: unsigned Index) const override; unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace) const override; + unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, + unsigned Alignment, + unsigned AddressSpace) const override; unsigned getAddressComputationCost(Type *PtrTy, bool IsComplex) const override; @@ -917,6 +920,60 @@ unsigned X86TTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, return Cost; } +unsigned X86TTI::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy, + unsigned Alignment, + unsigned AddressSpace) const { + VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy); + if (!SrcVTy) + // To calculate scalar take the regular cost, without mask + return getMemoryOpCost(Opcode, SrcTy, Alignment, AddressSpace); + + unsigned NumElem = SrcVTy->getVectorNumElements(); + VectorType *MaskTy = + VectorType::get(Type::getInt8Ty(getGlobalContext()), NumElem); + if ((Opcode == Instruction::Load && !isLegalMaskedLoad(SrcVTy, 1)) || + (Opcode == Instruction::Store && !isLegalMaskedStore(SrcVTy, 1)) || + !isPowerOf2_32(NumElem)) { + // Scalarization + unsigned MaskSplitCost = getScalarizationOverhead(MaskTy, false, true); + unsigned ScalarCompareCost = + getCmpSelInstrCost(Instruction::ICmp, + Type::getInt8Ty(getGlobalContext()), NULL); + unsigned BranchCost = getCFInstrCost(Instruction::Br); + unsigned MaskCmpCost = NumElem * (BranchCost + ScalarCompareCost); + + unsigned ValueSplitCost = + getScalarizationOverhead(SrcVTy, Opcode == Instruction::Load, + Opcode == Instruction::Store); + unsigned MemopCost = NumElem * + TargetTransformInfo::getMemoryOpCost(Opcode, SrcVTy->getScalarType(), + Alignment, AddressSpace); + return MemopCost + ValueSplitCost + MaskSplitCost + MaskCmpCost; + } + + // Legalize the type. + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(SrcVTy); + unsigned Cost = 0; + if (LT.second != TLI->getValueType(SrcVTy).getSimpleVT() && + LT.second.getVectorNumElements() == NumElem) + // Promotion requires expand/truncate for data and a shuffle for mask. + Cost += getShuffleCost(TargetTransformInfo::SK_Alternate, SrcVTy, 0, 0) + + getShuffleCost(TargetTransformInfo::SK_Alternate, MaskTy, 0, 0); + + else if (LT.second.getVectorNumElements() > NumElem) { + VectorType *NewMaskTy = VectorType::get(MaskTy->getVectorElementType(), + LT.second.getVectorNumElements()); + // Expanding requires fill mask with zeroes + Cost += getShuffleCost(TargetTransformInfo::SK_InsertSubvector, + NewMaskTy, 0, MaskTy); + } + if (!ST->hasAVX512()) + return Cost + LT.first*4; // Each maskmov costs 4 + + // AVX-512 masked load/store is cheapper + return Cost+LT.first; +} + unsigned X86TTI::getAddressComputationCost(Type *Ty, bool IsComplex) const { // Address computations in vectorized code with non-consecutive addresses will // likely result in more instructions compared to scalar code where the |