summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp54
1 files changed, 35 insertions, 19 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index fde2132b40b..0930ed1c8a6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -1,4 +1,4 @@
-//===-- AMDGPUTargetTransformInfo.cpp - AMDGPU specific TTI pass ---------===//
+//===- AMDGPUTargetTransformInfo.cpp - AMDGPU specific TTI pass -----------===//
//
// The LLVM Compiler Infrastructure
//
@@ -16,15 +16,39 @@
//===----------------------------------------------------------------------===//
#include "AMDGPUTargetTransformInfo.h"
+#include "AMDGPUSubtarget.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/CodeGen/BasicTTIImpl.h"
-#include "llvm/IR/Intrinsics.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Target/CostTable.h"
-#include "llvm/Target/TargetLowering.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include <algorithm>
+#include <cassert>
+#include <limits>
+#include <utility>
+
using namespace llvm;
#define DEBUG_TYPE "AMDGPUtti"
@@ -54,7 +78,7 @@ static bool dependsOnLocalPhi(const Loop *L, const Value *Cond,
if (!L->contains(I))
continue;
if (const PHINode *PHI = dyn_cast<PHINode>(V)) {
- if (none_of(L->getSubLoops(), [PHI](const Loop* SubLoop) {
+ if (llvm::none_of(L->getSubLoops(), [PHI](const Loop* SubLoop) {
return SubLoop->contains(PHI); }))
return true;
} else if (Depth < 10 && dependsOnLocalPhi(L, V, Depth+1))
@@ -66,7 +90,7 @@ static bool dependsOnLocalPhi(const Loop *L, const Value *Cond,
void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP) {
UP.Threshold = 300; // Twice the default.
- UP.MaxCount = UINT_MAX;
+ UP.MaxCount = std::numeric_limits<unsigned>::max();
UP.Partial = true;
// TODO: Do we want runtime unrolling?
@@ -81,12 +105,11 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
const DataLayout &DL = BB->getModule()->getDataLayout();
unsigned LocalGEPsSeen = 0;
- if (any_of(L->getSubLoops(), [BB](const Loop* SubLoop) {
+ if (llvm::any_of(L->getSubLoops(), [BB](const Loop* SubLoop) {
return SubLoop->contains(BB); }))
continue; // Block belongs to an inner loop.
for (const Instruction &I : *BB) {
-
// Unroll a loop which contains an "if" statement whose condition
// defined by a PHI belonging to the loop. This may help to eliminate
// if region and potentially even PHI itself, saving on both divergence
@@ -153,7 +176,7 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
if (!Inst || L->isLoopInvariant(Op))
continue;
- if (any_of(L->getSubLoops(), [Inst](const Loop* SubLoop) {
+ if (llvm::any_of(L->getSubLoops(), [Inst](const Loop* SubLoop) {
return SubLoop->contains(Inst); }))
continue;
HasLoopDef = true;
@@ -268,7 +291,6 @@ int AMDGPUTTIImpl::getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args ) {
-
EVT OrigTy = TLI->getValueType(DL, Ty);
if (!OrigTy.isSimple()) {
return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
@@ -289,25 +311,23 @@ int AMDGPUTTIImpl::getArithmeticInstrCost(
switch (ISD) {
case ISD::SHL:
case ISD::SRL:
- case ISD::SRA: {
+ case ISD::SRA:
if (SLT == MVT::i64)
return get64BitInstrCost() * LT.first * NElts;
// i32
return getFullRateInstrCost() * LT.first * NElts;
- }
case ISD::ADD:
case ISD::SUB:
case ISD::AND:
case ISD::OR:
- case ISD::XOR: {
+ case ISD::XOR:
if (SLT == MVT::i64){
// and, or and xor are typically split into 2 VALU instructions.
return 2 * getFullRateInstrCost() * LT.first * NElts;
}
return LT.first * NElts * getFullRateInstrCost();
- }
case ISD::MUL: {
const int QuarterRateCost = getQuarterRateInstrCost();
if (SLT == MVT::i64) {
@@ -327,7 +347,6 @@ int AMDGPUTTIImpl::getArithmeticInstrCost(
if (SLT == MVT::f32 || SLT == MVT::f16)
return LT.first * NElts * getFullRateInstrCost();
break;
-
case ISD::FDIV:
case ISD::FREM:
// FIXME: frem should be handled separately. The fdiv in it is most of it,
@@ -348,7 +367,6 @@ int AMDGPUTTIImpl::getArithmeticInstrCost(
int Cost = 7 * getFullRateInstrCost() + 1 * getQuarterRateInstrCost();
return LT.first * NElts * Cost;
}
-
break;
default:
break;
@@ -465,11 +483,9 @@ static bool isArgPassedInSGPR(const Argument *A) {
}
}
-///
/// \returns true if the result of the value could potentially be
/// different across workitems in a wavefront.
bool AMDGPUTTIImpl::isSourceOfDivergence(const Value *V) const {
-
if (const Argument *A = dyn_cast<Argument>(V))
return !isArgPassedInSGPR(A);
OpenPOWER on IntegriCloud