summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInline.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp35
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h3
3 files changed, 37 insertions, 3 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp
index f4df20b8f03..a83ec23ec05 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp
@@ -51,7 +51,7 @@ ArgAllocaCutoff("amdgpu-inline-arg-alloca-cutoff", cl::Hidden, cl::init(256),
// Inliner constraint to achieve reasonable compilation time
static cl::opt<size_t>
-MaxBB("amdgpu-inline-max-bb", cl::Hidden, cl::init(300),
+MaxBB("amdgpu-inline-max-bb", cl::Hidden, cl::init(1100),
cl::desc("Maximum BB number allowed in a function after inlining"
" (compile time constraint)"));
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index b36580cdf2f..616196ad5ba 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -57,7 +57,7 @@ using namespace llvm;
static cl::opt<unsigned> UnrollThresholdPrivate(
"amdgpu-unroll-threshold-private",
cl::desc("Unroll threshold for AMDGPU if private memory used in a loop"),
- cl::init(2500), cl::Hidden);
+ cl::init(2000), cl::Hidden);
static cl::opt<unsigned> UnrollThresholdLocal(
"amdgpu-unroll-threshold-local",
@@ -693,6 +693,39 @@ void GCNTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
CommonTTI.getUnrollingPreferences(L, SE, UP);
}
+unsigned GCNTTIImpl::getUserCost(const User *U,
+ ArrayRef<const Value *> Operands) {
+ // Estimate extractelement elimination
+ if (const ExtractElementInst *EE = dyn_cast<ExtractElementInst>(U)) {
+ ConstantInt *CI = dyn_cast<ConstantInt>(EE->getOperand(1));
+ unsigned Idx = -1;
+ if (CI)
+ Idx = CI->getZExtValue();
+ return getVectorInstrCost(EE->getOpcode(), EE->getOperand(0)->getType(),
+ Idx);
+ }
+
+ // Estimate insertelement elimination
+ if (const InsertElementInst *IE = dyn_cast<InsertElementInst>(U)) {
+ ConstantInt *CI = dyn_cast<ConstantInt>(IE->getOperand(2));
+ unsigned Idx = -1;
+ if (CI)
+ Idx = CI->getZExtValue();
+ return getVectorInstrCost(IE->getOpcode(), IE->getType(), Idx);
+ }
+
+ // Estimate different intrinsics, e.g. llvm.fabs
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
+ SmallVector<Value *, 4> Args(II->arg_operands());
+ FastMathFlags FMF;
+ if (auto *FPMO = dyn_cast<FPMathOperator>(II))
+ FMF = FPMO->getFastMathFlags();
+ return getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(), Args,
+ FMF);
+ }
+ return BaseT::getUserCost(U, Operands);
+}
+
unsigned R600TTIImpl::getHardwareNumberOfRegisters(bool Vec) const {
return 4 * 128; // XXX - 4 channels. Should these count as vector instead?
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
index c7cd2f2c7bf..67f7f9074f1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -204,7 +204,7 @@ public:
bool areInlineCompatible(const Function *Caller,
const Function *Callee) const;
- unsigned getInliningThresholdMultiplier() { return 7; }
+ unsigned getInliningThresholdMultiplier() { return 9; }
int getInlinerVectorBonusPercent() { return 0; }
@@ -214,6 +214,7 @@ public:
int getMinMaxReductionCost(Type *Ty, Type *CondTy,
bool IsPairwiseForm,
bool IsUnsigned);
+ unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands);
};
class R600TTIImpl final : public BasicTTIImplBase<R600TTIImpl> {
OpenPOWER on IntegriCloud