diff options
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUInline.cpp | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp | 35 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h | 3 |
3 files changed, 37 insertions, 3 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp index f4df20b8f03..a83ec23ec05 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp @@ -51,7 +51,7 @@ ArgAllocaCutoff("amdgpu-inline-arg-alloca-cutoff", cl::Hidden, cl::init(256), // Inliner constraint to achieve reasonable compilation time static cl::opt<size_t> -MaxBB("amdgpu-inline-max-bb", cl::Hidden, cl::init(300), +MaxBB("amdgpu-inline-max-bb", cl::Hidden, cl::init(1100), cl::desc("Maximum BB number allowed in a function after inlining" " (compile time constraint)")); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index b36580cdf2f..616196ad5ba 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -57,7 +57,7 @@ using namespace llvm; static cl::opt<unsigned> UnrollThresholdPrivate( "amdgpu-unroll-threshold-private", cl::desc("Unroll threshold for AMDGPU if private memory used in a loop"), - cl::init(2500), cl::Hidden); + cl::init(2000), cl::Hidden); static cl::opt<unsigned> UnrollThresholdLocal( "amdgpu-unroll-threshold-local", @@ -693,6 +693,39 @@ void GCNTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, CommonTTI.getUnrollingPreferences(L, SE, UP); } +unsigned GCNTTIImpl::getUserCost(const User *U, + ArrayRef<const Value *> Operands) { + // Estimate extractelement elimination + if (const ExtractElementInst *EE = dyn_cast<ExtractElementInst>(U)) { + ConstantInt *CI = dyn_cast<ConstantInt>(EE->getOperand(1)); + unsigned Idx = -1; + if (CI) + Idx = CI->getZExtValue(); + return getVectorInstrCost(EE->getOpcode(), EE->getOperand(0)->getType(), + Idx); + } + + // Estimate insertelement elimination + if (const InsertElementInst *IE = dyn_cast<InsertElementInst>(U)) { + ConstantInt *CI = dyn_cast<ConstantInt>(IE->getOperand(2)); + unsigned Idx = -1; + if (CI) + Idx = CI->getZExtValue(); + return getVectorInstrCost(IE->getOpcode(), IE->getType(), Idx); + } + + // Estimate different intrinsics, e.g. llvm.fabs + if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) { + SmallVector<Value *, 4> Args(II->arg_operands()); + FastMathFlags FMF; + if (auto *FPMO = dyn_cast<FPMathOperator>(II)) + FMF = FPMO->getFastMathFlags(); + return getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(), Args, + FMF); + } + return BaseT::getUserCost(U, Operands); +} + unsigned R600TTIImpl::getHardwareNumberOfRegisters(bool Vec) const { return 4 * 128; // XXX - 4 channels. Should these count as vector instead? } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h index c7cd2f2c7bf..67f7f9074f1 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -204,7 +204,7 @@ public: bool areInlineCompatible(const Function *Caller, const Function *Callee) const; - unsigned getInliningThresholdMultiplier() { return 7; } + unsigned getInliningThresholdMultiplier() { return 9; } int getInlinerVectorBonusPercent() { return 0; } @@ -214,6 +214,7 @@ public: int getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwiseForm, bool IsUnsigned); + unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands); }; class R600TTIImpl final : public BasicTTIImplBase<R600TTIImpl> { |

