diff options
| -rw-r--r-- | llvm/include/llvm/Analysis/TargetTransformInfo.h | 22 | ||||
| -rw-r--r-- | llvm/include/llvm/Analysis/TargetTransformInfoImpl.h | 4 | ||||
| -rw-r--r-- | llvm/include/llvm/CodeGen/BasicTTIImpl.h | 105 | ||||
| -rw-r--r-- | llvm/lib/Analysis/CostModel.cpp | 6 | ||||
| -rw-r--r-- | llvm/lib/Analysis/TargetTransformInfo.cpp | 10 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 22 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Vectorize/BBVectorize.cpp | 17 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 14 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 8 | ||||
| -rw-r--r-- | llvm/test/CodeGen/ARM/vminmax.ll | 17 |
10 files changed, 151 insertions, 74 deletions
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index 13e0729604f..735b022e309 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -25,6 +25,7 @@ #include "llvm/ADT/Optional.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Operator.h" #include "llvm/IR/PassManager.h" #include "llvm/Pass.h" #include "llvm/Support/DataTypes.h" @@ -518,11 +519,11 @@ public: /// \returns The cost of Intrinsic instructions. Types analysis only. int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef<Type *> Tys) const; + ArrayRef<Type *> Tys, FastMathFlags FMF) const; /// \returns The cost of Intrinsic instructions. Analyses the real arguments. int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef<Value *> Args) const; + ArrayRef<Value *> Args, FastMathFlags FMF) const; /// \returns The cost of Call instructions. int getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) const; @@ -664,9 +665,11 @@ public: virtual int getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm) = 0; virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef<Type *> Tys) = 0; + ArrayRef<Type *> Tys, + FastMathFlags FMF) = 0; virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef<Value *> Args) = 0; + ArrayRef<Value *> Args, + FastMathFlags FMF) = 0; virtual int getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) = 0; virtual unsigned getNumberOfParts(Type *Tp) = 0; @@ -861,13 +864,14 @@ public: bool IsPairwiseForm) override { return Impl.getReductionCost(Opcode, Ty, IsPairwiseForm); } - int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef<Type *> Tys) override { - return Impl.getIntrinsicInstrCost(ID, RetTy, Tys); + int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef<Type *> Tys, + FastMathFlags FMF) override { + return Impl.getIntrinsicInstrCost(ID, RetTy, Tys, FMF); } int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef<Value *> Args) override { - return Impl.getIntrinsicInstrCost(ID, RetTy, Args); + ArrayRef<Value *> Args, + FastMathFlags FMF) override { + return Impl.getIntrinsicInstrCost(ID, RetTy, Args, FMF); } int getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) override { diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 39eba4d6e58..487de0892ff 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -324,11 +324,11 @@ public: } unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef<Type *> Tys) { + ArrayRef<Type *> Tys, FastMathFlags FMF) { return 1; } unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef<Value *> Args) { + ArrayRef<Value *> Args, FastMathFlags FMF) { return 1; } diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 476f0d5fb53..cfe6a25f915 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -587,13 +587,14 @@ public: /// Get intrinsic cost based on arguments unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef<Value *> Args) { + ArrayRef<Value *> Args, FastMathFlags FMF) { switch (IID) { default: { SmallVector<Type *, 4> Types; for (Value *Op : Args) Types.push_back(Op->getType()); - return static_cast<T *>(this)->getIntrinsicInstrCost(IID, RetTy, Types); + return static_cast<T *>(this)->getIntrinsicInstrCost(IID, RetTy, Types, + FMF); } case Intrinsic::masked_scatter: { Value *Mask = Args[3]; @@ -619,8 +620,8 @@ public: /// Get intrinsic cost based on argument types unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef<Type *> Tys) { - unsigned ISD = 0; + ArrayRef<Type *> Tys, FastMathFlags FMF) { + SmallVector<unsigned, 2> ISDs; unsigned SingleCallCost = 10; // Library call cost. Make it expensive. switch (IID) { default: { @@ -647,74 +648,78 @@ public: return 1; // Return cost of a scalar intrinsic. Assume it to be cheap. unsigned ScalarCost = static_cast<T *>(this)->getIntrinsicInstrCost( - IID, ScalarRetTy, ScalarTys); + IID, ScalarRetTy, ScalarTys, FMF); return ScalarCalls * ScalarCost + ScalarizationCost; } // Look for intrinsics that can be lowered directly or turned into a scalar // intrinsic call. case Intrinsic::sqrt: - ISD = ISD::FSQRT; + ISDs.push_back(ISD::FSQRT); break; case Intrinsic::sin: - ISD = ISD::FSIN; + ISDs.push_back(ISD::FSIN); break; case Intrinsic::cos: - ISD = ISD::FCOS; + ISDs.push_back(ISD::FCOS); break; case Intrinsic::exp: - ISD = ISD::FEXP; + ISDs.push_back(ISD::FEXP); break; case Intrinsic::exp2: - ISD = ISD::FEXP2; + ISDs.push_back(ISD::FEXP2); break; case Intrinsic::log: - ISD = ISD::FLOG; + ISDs.push_back(ISD::FLOG); break; case Intrinsic::log10: - ISD = ISD::FLOG10; + ISDs.push_back(ISD::FLOG10); break; case Intrinsic::log2: - ISD = ISD::FLOG2; + ISDs.push_back(ISD::FLOG2); break; case Intrinsic::fabs: - ISD = ISD::FABS; + ISDs.push_back(ISD::FABS); break; case Intrinsic::minnum: - ISD = ISD::FMINNUM; + ISDs.push_back(ISD::FMINNUM); + if (FMF.noNaNs()) + ISDs.push_back(ISD::FMINNAN); break; case Intrinsic::maxnum: - ISD = ISD::FMAXNUM; + ISDs.push_back(ISD::FMAXNUM); + if (FMF.noNaNs()) + ISDs.push_back(ISD::FMAXNAN); break; case Intrinsic::copysign: - ISD = ISD::FCOPYSIGN; + ISDs.push_back(ISD::FCOPYSIGN); break; case Intrinsic::floor: - ISD = ISD::FFLOOR; + ISDs.push_back(ISD::FFLOOR); break; case Intrinsic::ceil: - ISD = ISD::FCEIL; + ISDs.push_back(ISD::FCEIL); break; case Intrinsic::trunc: - ISD = ISD::FTRUNC; + ISDs.push_back(ISD::FTRUNC); break; case Intrinsic::nearbyint: - ISD = ISD::FNEARBYINT; + ISDs.push_back(ISD::FNEARBYINT); break; case Intrinsic::rint: - ISD = ISD::FRINT; + ISDs.push_back(ISD::FRINT); break; case Intrinsic::round: - ISD = ISD::FROUND; + ISDs.push_back(ISD::FROUND); break; case Intrinsic::pow: - ISD = ISD::FPOW; + ISDs.push_back(ISD::FPOW); break; case Intrinsic::fma: - ISD = ISD::FMA; + ISDs.push_back(ISD::FMA); break; case Intrinsic::fmuladd: - ISD = ISD::FMA; + ISDs.push_back(ISD::FMA); break; // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free. case Intrinsic::lifetime_start: @@ -727,7 +732,7 @@ public: return static_cast<T *>(this) ->getMaskedMemoryOpCost(Instruction::Load, RetTy, 0, 0); case Intrinsic::ctpop: - ISD = ISD::CTPOP; + ISDs.push_back(ISD::CTPOP); // In case of legalization use TCC_Expensive. This is cheaper than a // library call but still not a cheap instruction. SingleCallCost = TargetTransformInfo::TCC_Expensive; @@ -738,26 +743,36 @@ public: const TargetLoweringBase *TLI = getTLI(); std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy); - if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { - if (IID == Intrinsic::fabs && - TLI->isFAbsFree(LT.second)) { - return 0; - } + SmallVector<unsigned, 2> LegalCost; + SmallVector<unsigned, 2> CustomCost; + for (unsigned ISD : ISDs) { + if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { + if (IID == Intrinsic::fabs && TLI->isFAbsFree(LT.second)) { + return 0; + } - // The operation is legal. Assume it costs 1. - // If the type is split to multiple registers, assume that there is some - // overhead to this. - // TODO: Once we have extract/insert subvector cost we need to use them. - if (LT.first > 1) - return LT.first * 2; - return LT.first * 1; + // The operation is legal. Assume it costs 1. + // If the type is split to multiple registers, assume that there is some + // overhead to this. + // TODO: Once we have extract/insert subvector cost we need to use them. + if (LT.first > 1) + LegalCost.push_back(LT.first * 2); + else + LegalCost.push_back(LT.first * 1); + } else if (!TLI->isOperationExpand(ISD, LT.second)) { + // If the operation is custom lowered then assume + // that the code is twice as expensive. + CustomCost.push_back(LT.first * 2); + } } - if (!TLI->isOperationExpand(ISD, LT.second)) { - // If the operation is custom lowered then assume - // thare the code is twice as expensive. - return LT.first * 2; - } + auto MinLegalCostI = std::min_element(LegalCost.begin(), LegalCost.end()); + if (MinLegalCostI != LegalCost.end()) + return *MinLegalCostI; + + auto MinCustomCostI = std::min_element(CustomCost.begin(), CustomCost.end()); + if (MinCustomCostI != CustomCost.end()) + return *MinCustomCostI; // If we can't lower fmuladd into an FMA estimate the cost as a floating // point mul followed by an add. @@ -781,7 +796,7 @@ public: ScalarTys.push_back(Ty); } unsigned ScalarCost = static_cast<T *>(this)->getIntrinsicInstrCost( - IID, RetTy->getScalarType(), ScalarTys); + IID, RetTy->getScalarType(), ScalarTys, FMF); for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) { if (Tys[i]->isVectorTy()) { ScalarizationCost += getScalarizationOverhead(Tys[i], false, true); diff --git a/llvm/lib/Analysis/CostModel.cpp b/llvm/lib/Analysis/CostModel.cpp index 0383cbfbbe4..36a1db664e1 100644 --- a/llvm/lib/Analysis/CostModel.cpp +++ b/llvm/lib/Analysis/CostModel.cpp @@ -504,8 +504,12 @@ unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const { for (unsigned J = 0, JE = II->getNumArgOperands(); J != JE; ++J) Args.push_back(II->getArgOperand(J)); + FastMathFlags FMF; + if (auto *FPMO = dyn_cast<FPMathOperator>(II)) + FMF = FPMO->getFastMathFlags(); + return TTI->getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(), - Args); + Args, FMF); } return -1; default: diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index b64d4133420..48e441bac69 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -315,15 +315,17 @@ int TargetTransformInfo::getInterleavedMemoryOpCost( } int TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef<Type *> Tys) const { - int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Tys); + ArrayRef<Type *> Tys, + FastMathFlags FMF) const { + int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Tys, FMF); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef<Value *> Args) const { - int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Args); + ArrayRef<Value *> Args, + FastMathFlags FMF) const { + int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Args, FMF); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 3a4a3301377..ed193198667 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -5143,18 +5143,28 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getValue(I.getArgOperand(0)))); return nullptr; } - case Intrinsic::minnum: - setValue(&I, DAG.getNode(ISD::FMINNUM, sdl, - getValue(I.getArgOperand(0)).getValueType(), + case Intrinsic::minnum: { + auto VT = getValue(I.getArgOperand(0)).getValueType(); + unsigned Opc = + I.hasNoNaNs() && TLI.isOperationLegalOrCustom(ISD::FMINNAN, VT) + ? ISD::FMINNAN + : ISD::FMINNUM; + setValue(&I, DAG.getNode(Opc, sdl, VT, getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)))); return nullptr; - case Intrinsic::maxnum: - setValue(&I, DAG.getNode(ISD::FMAXNUM, sdl, - getValue(I.getArgOperand(0)).getValueType(), + } + case Intrinsic::maxnum: { + auto VT = getValue(I.getArgOperand(0)).getValueType(); + unsigned Opc = + I.hasNoNaNs() && TLI.isOperationLegalOrCustom(ISD::FMAXNAN, VT) + ? ISD::FMAXNAN + : ISD::FMAXNUM; + setValue(&I, DAG.getNode(Opc, sdl, VT, getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)))); return nullptr; + } case Intrinsic::copysign: setValue(&I, DAG.getNode(ISD::FCOPYSIGN, sdl, getValue(I.getArgOperand(0)).getValueType(), diff --git a/llvm/lib/Transforms/Vectorize/BBVectorize.cpp b/llvm/lib/Transforms/Vectorize/BBVectorize.cpp index 5d3af4590d8..b43951d3f40 100644 --- a/llvm/lib/Transforms/Vectorize/BBVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/BBVectorize.cpp @@ -1117,16 +1117,25 @@ namespace { } if (IID && TTI) { + FastMathFlags FMFCI; + if (auto *FPMOCI = dyn_cast<FPMathOperator>(CI)) + FMFCI = FPMOCI->getFastMathFlags(); + SmallVector<Type*, 4> Tys; for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) Tys.push_back(CI->getArgOperand(i)->getType()); - unsigned ICost = TTI->getIntrinsicInstrCost(IID, IT1, Tys); + unsigned ICost = TTI->getIntrinsicInstrCost(IID, IT1, Tys, FMFCI); Tys.clear(); CallInst *CJ = cast<CallInst>(J); + + FastMathFlags FMFCJ; + if (auto *FPMOCJ = dyn_cast<FPMathOperator>(CJ)) + FMFCJ = FPMOCJ->getFastMathFlags(); + for (unsigned i = 0, ie = CJ->getNumArgOperands(); i != ie; ++i) Tys.push_back(CJ->getArgOperand(i)->getType()); - unsigned JCost = TTI->getIntrinsicInstrCost(IID, JT1, Tys); + unsigned JCost = TTI->getIntrinsicInstrCost(IID, JT1, Tys, FMFCJ); Tys.clear(); assert(CI->getNumArgOperands() == CJ->getNumArgOperands() && @@ -1140,8 +1149,10 @@ namespace { CJ->getArgOperand(i)->getType())); } + FastMathFlags FMFV = FMFCI; + FMFV &= FMFCJ; Type *RetTy = getVecTypeForPair(IT1, JT1); - unsigned VCost = TTI->getIntrinsicInstrCost(IID, RetTy, Tys); + unsigned VCost = TTI->getIntrinsicInstrCost(IID, RetTy, Tys, FMFV); if (VCost > ICost + JCost) return false; diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 72e96cdf46d..2c5fec64c18 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3302,7 +3302,11 @@ static unsigned getVectorIntrinsicCost(CallInst *CI, unsigned VF, for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) Tys.push_back(ToVectorTy(CI->getArgOperand(i)->getType(), VF)); - return TTI.getIntrinsicInstrCost(ID, RetTy, Tys); + FastMathFlags FMF; + if (auto *FPMO = dyn_cast<FPMathOperator>(CI)) + FMF = FPMO->getFastMathFlags(); + + return TTI.getIntrinsicInstrCost(ID, RetTy, Tys, FMF); } static Type *smallestIntegerVectorType(Type *T1, Type *T2) { @@ -4269,7 +4273,13 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) { } } assert(VectorF && "Can't create vector function."); - Entry[Part] = Builder.CreateCall(VectorF, Args); + + CallInst *V = Builder.CreateCall(VectorF, Args); + + if (isa<FPMathOperator>(V)) + V->copyFastMathFlags(CI); + + Entry[Part] = V; } addMetadata(Entry, &*it); diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 0254ef47225..fc060f19eb6 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1659,10 +1659,14 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { VecTy->getNumElements())); } + FastMathFlags FMF; + if (auto *FPMO = dyn_cast<FPMathOperator>(CI)) + FMF = FPMO->getFastMathFlags(); + int ScalarCallCost = VecTy->getNumElements() * - TTI->getIntrinsicInstrCost(ID, ScalarTy, ScalarTys); + TTI->getIntrinsicInstrCost(ID, ScalarTy, ScalarTys, FMF); - int VecCallCost = TTI->getIntrinsicInstrCost(ID, VecTy, VecTys); + int VecCallCost = TTI->getIntrinsicInstrCost(ID, VecTy, VecTys, FMF); DEBUG(dbgs() << "SLP: Call cost "<< VecCallCost - ScalarCallCost << " (" << VecCallCost << "-" << ScalarCallCost << ")" diff --git a/llvm/test/CodeGen/ARM/vminmax.ll b/llvm/test/CodeGen/ARM/vminmax.ll index 011bfd7ff88..e4f30faa917 100644 --- a/llvm/test/CodeGen/ARM/vminmax.ll +++ b/llvm/test/CodeGen/ARM/vminmax.ll @@ -291,3 +291,20 @@ declare <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16>, <8 x i16>) nounwind read declare <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>) nounwind readnone + +declare float @llvm.maxnum.f32(float %a, float %b) +declare float @llvm.minnum.f32(float %a, float %b) + +define float @maxnum(float %a, float %b) { +;CHECK-LABEL: maxnum: +;CHECK: vmax.f32 + %r = call nnan float @llvm.maxnum.f32(float %a, float %b) + ret float %r +} + +define float @minnum(float %a, float %b) { +;CHECK-LABEL: minnum: +;CHECK: vmin.f32 + %r = call nnan float @llvm.minnum.f32(float %a, float %b) + ret float %r +} |

