diff options
| -rw-r--r-- | llvm/include/llvm/Analysis/TargetTransformInfo.h | 17 | ||||
| -rw-r--r-- | llvm/include/llvm/Analysis/TargetTransformInfoImpl.h | 7 | ||||
| -rw-r--r-- | llvm/include/llvm/CodeGen/BasicTTIImpl.h | 64 | ||||
| -rw-r--r-- | llvm/lib/Analysis/TargetTransformInfo.cpp | 11 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h | 4 | ||||
| -rw-r--r-- | llvm/lib/Target/ARM/ARMTargetTransformInfo.h | 4 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86TargetTransformInfo.cpp | 14 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86TargetTransformInfo.h | 2 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 48 |
9 files changed, 92 insertions, 79 deletions
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index 209f05c279d..89f0b22a3bf 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -411,6 +411,11 @@ public: /// containing this constant value for the target. bool shouldBuildLookupTablesForConstant(Constant *C) const; + unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; + + unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, + unsigned VF) const; + /// \brief Don't restrict interleaved unrolling to small loops. bool enableAggressiveInterleaving(bool LoopHasReductions) const; @@ -743,6 +748,10 @@ public: virtual unsigned getJumpBufSize() = 0; virtual bool shouldBuildLookupTables() = 0; virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0; + virtual unsigned + getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) = 0; + virtual unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, + unsigned VF) = 0; virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0; virtual bool enableInterleavedAccessVectorization() = 0; virtual bool isFPVectorizationPotentiallyUnsafe() = 0; @@ -933,6 +942,14 @@ public: bool shouldBuildLookupTablesForConstant(Constant *C) override { return Impl.shouldBuildLookupTablesForConstant(C); } + unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) { + return Impl.getScalarizationOverhead(Ty, Insert, Extract); + } + unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, + unsigned VF) { + return Impl.getOperandsScalarizationOverhead(Args, VF); + } + bool enableAggressiveInterleaving(bool LoopHasReductions) override { return Impl.enableAggressiveInterleaving(LoopHasReductions); } diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index cafc40723c9..42cff56cd39 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -251,6 +251,13 @@ public: bool shouldBuildLookupTables() { return true; } bool shouldBuildLookupTablesForConstant(Constant *C) { return true; } + unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) { + return 0; + } + + unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, + unsigned VF) { return 0; } + bool enableAggressiveInterleaving(bool LoopHasReductions) { return false; } bool enableInterleavedAccessVectorization() { return false; } diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 7efdbcccdef..f4cd21a88f1 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -42,24 +42,6 @@ private: typedef TargetTransformInfoImplCRTPBase<T> BaseT; typedef TargetTransformInfo TTI; - /// Estimate the overhead of scalarizing an instruction. Insert and Extract - /// are set if the result needs to be inserted and/or extracted from vectors. - unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) { - assert(Ty->isVectorTy() && "Can only scalarize vectors"); - unsigned Cost = 0; - - for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) { - if (Insert) - Cost += static_cast<T *>(this) - ->getVectorInstrCost(Instruction::InsertElement, Ty, i); - if (Extract) - Cost += static_cast<T *>(this) - ->getVectorInstrCost(Instruction::ExtractElement, Ty, i); - } - - return Cost; - } - /// Estimate a cost of shuffle as a sequence of extract and insert /// operations. unsigned getPermuteShuffleOverhead(Type *Ty) { @@ -301,6 +283,37 @@ public: unsigned getRegisterBitWidth(bool Vector) { return 32; } + /// Estimate the overhead of scalarizing an instruction. Insert and Extract + /// are set if the result needs to be inserted and/or extracted from vectors. + unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) { + assert(Ty->isVectorTy() && "Can only scalarize vectors"); + unsigned Cost = 0; + + for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) { + if (Insert) + Cost += static_cast<T *>(this) + ->getVectorInstrCost(Instruction::InsertElement, Ty, i); + if (Extract) + Cost += static_cast<T *>(this) + ->getVectorInstrCost(Instruction::ExtractElement, Ty, i); + } + + return Cost; + } + + /// Estimate the overhead of scalarizing an instructions unique operands. + unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, + unsigned VF) { + unsigned Cost = 0; + SmallPtrSet<const Value*, 4> UniqueOperands; + for (const Value *A : Args) { + if (UniqueOperands.insert(A).second) + Cost += getScalarizationOverhead(VectorType::get(A->getType(), VF), + false, true); + } + return Cost; + } + unsigned getMaxInterleaveFactor(unsigned VF) { return 1; } unsigned getArithmeticInstrCost( @@ -341,10 +354,17 @@ public: unsigned Num = Ty->getVectorNumElements(); unsigned Cost = static_cast<T *>(this) ->getArithmeticInstrCost(Opcode, Ty->getScalarType()); - // return the cost of multiple scalar invocation plus the cost of - // inserting - // and extracting the values. - return getScalarizationOverhead(Ty, true, true) + Num * Cost; + // Return the cost of multiple scalar invocation plus the cost of + // inserting and extracting the values. + unsigned TotCost = getScalarizationOverhead(Ty, true, false) + Num * Cost; + if (!Args.empty()) + TotCost += getOperandsScalarizationOverhead(Args, Num); + else + // When no information on arguments is provided, we add the cost + // associated with one argument as a heuristic. + TotCost += getScalarizationOverhead(Ty, false, true); + + return TotCost; } // We don't know anything about this scalar instruction. diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index 5c0d1aac1b9..2727c0c4795 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -182,6 +182,17 @@ bool TargetTransformInfo::shouldBuildLookupTablesForConstant(Constant *C) const return TTIImpl->shouldBuildLookupTablesForConstant(C); } +unsigned TargetTransformInfo:: +getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const { + return TTIImpl->getScalarizationOverhead(Ty, Insert, Extract); +} + +unsigned TargetTransformInfo:: +getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, + unsigned VF) const { + return TTIImpl->getOperandsScalarizationOverhead(Args, VF); +} + bool TargetTransformInfo::enableAggressiveInterleaving(bool LoopHasReductions) const { return TTIImpl->enableAggressiveInterleaving(LoopHasReductions); } diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h index 18287ed6653..a7f7fb9457a 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -34,10 +34,6 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> { const AArch64Subtarget *ST; const AArch64TargetLowering *TLI; - /// Estimate the overhead of scalarizing an instruction. Insert and Extract - /// are set if the result needs to be inserted and/or extracted from vectors. - unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract); - const AArch64Subtarget *getST() const { return ST; } const AArch64TargetLowering *getTLI() const { return TLI; } diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h index 3c83cd92a61..5ee1f7c4b0e 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -33,10 +33,6 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> { const ARMSubtarget *ST; const ARMTargetLowering *TLI; - /// Estimate the overhead of scalarizing an instruction. Insert and Extract - /// are set if the result needs to be inserted and/or extracted from vectors. - unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract); - const ARMSubtarget *getST() const { return ST; } const ARMTargetLowering *getTLI() const { return TLI; } diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index bd7fa9ca617..3ebfdd0ede7 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -1577,20 +1577,6 @@ int X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { return BaseT::getVectorInstrCost(Opcode, Val, Index) + RegisterFileMoveCost; } -int X86TTIImpl::getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) { - assert (Ty->isVectorTy() && "Can only scalarize vectors"); - int Cost = 0; - - for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) { - if (Insert) - Cost += getVectorInstrCost(Instruction::InsertElement, Ty, i); - if (Extract) - Cost += getVectorInstrCost(Instruction::ExtractElement, Ty, i); - } - - return Cost; -} - int X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace) { // Handle non-power-of-two vectors such as <3 x float> diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h index ecaaf951cff..63a1493002f 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.h +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h @@ -33,8 +33,6 @@ class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> { const X86Subtarget *ST; const X86TargetLowering *TLI; - int getScalarizationOverhead(Type *Ty, bool Insert, bool Extract); - const X86Subtarget *getST() const { return ST; } const X86TargetLowering *getTLI() const { return TLI; } diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 6ffe29a5c98..ce3f3b0afef 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3598,37 +3598,18 @@ static Value *addFastMathFlag(Value *V) { return V; } -/// \brief Estimate the overhead of scalarizing a value based on its type. -/// Insert and Extract are set if the result needs to be inserted and/or -/// extracted from vectors. -static unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract, - const TargetTransformInfo &TTI) { - if (Ty->isVoidTy()) - return 0; - - assert(Ty->isVectorTy() && "Can only scalarize vectors"); - unsigned Cost = 0; - - for (unsigned I = 0, E = Ty->getVectorNumElements(); I < E; ++I) { - if (Extract) - Cost += TTI.getVectorInstrCost(Instruction::ExtractElement, Ty, I); - if (Insert) - Cost += TTI.getVectorInstrCost(Instruction::InsertElement, Ty, I); - } - - return Cost; -} - /// \brief Estimate the overhead of scalarizing an Instruction based on the /// types of its operands and return value. static unsigned getScalarizationOverhead(SmallVectorImpl<Type *> &OpTys, Type *RetTy, const TargetTransformInfo &TTI) { - unsigned ScalarizationCost = - getScalarizationOverhead(RetTy, true, false, TTI); + unsigned ScalarizationCost = 0; + + if (!RetTy->isVoidTy()) + ScalarizationCost += TTI.getScalarizationOverhead(RetTy, true, false); for (Type *Ty : OpTys) - ScalarizationCost += getScalarizationOverhead(Ty, false, true, TTI); + ScalarizationCost += TTI.getScalarizationOverhead(Ty, false, true); return ScalarizationCost; } @@ -3640,14 +3621,15 @@ static unsigned getScalarizationOverhead(Instruction *I, unsigned VF, if (VF == 1) return 0; + unsigned Cost = 0; Type *RetTy = ToVectorTy(I->getType(), VF); + if (!RetTy->isVoidTy()) + Cost += TTI.getScalarizationOverhead(RetTy, true, false); - SmallVector<Type *, 4> OpTys; - unsigned OperandsNum = I->getNumOperands(); - for (unsigned OpInd = 0; OpInd < OperandsNum; ++OpInd) - OpTys.push_back(ToVectorTy(I->getOperand(OpInd)->getType(), VF)); + SmallVector<const Value *, 4> Operands(I->operand_values()); + Cost += TTI.getOperandsScalarizationOverhead(Operands, VF); - return getScalarizationOverhead(OpTys, RetTy, TTI); + return Cost; } // Estimate cost of a call instruction CI if it were vectorized with factor VF. @@ -6713,8 +6695,8 @@ int LoopVectorizationCostModel::computePredInstDiscount( // Compute the scalarization overhead of needed insertelement instructions // and phi nodes. if (Legal->isScalarWithPredication(I) && !I->getType()->isVoidTy()) { - ScalarCost += getScalarizationOverhead(ToVectorTy(I->getType(), VF), true, - false, TTI); + ScalarCost += TTI.getScalarizationOverhead(ToVectorTy(I->getType(), VF), + true, false); ScalarCost += VF * TTI.getCFInstrCost(Instruction::PHI); } @@ -6729,8 +6711,8 @@ int LoopVectorizationCostModel::computePredInstDiscount( if (canBeScalarized(J)) Worklist.push_back(J); else if (needsExtract(J)) - ScalarCost += getScalarizationOverhead(ToVectorTy(J->getType(), VF), - false, true, TTI); + ScalarCost += TTI.getScalarizationOverhead( + ToVectorTy(J->getType(),VF), false, true); } // Scale the total scalar cost by block probability. |

