summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp1
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp14
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp67
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h10
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp2
5 files changed, 53 insertions, 41 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index afd3d506ade..b3589a02fcd 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -139,7 +139,6 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AMDGPUArgumentUsageInfo>();
- AU.addRequired<AMDGPUPerfHintAnalysis>();
AU.addRequired<LegacyDivergenceAnalysis>();
#ifdef EXPENSIVE_CHECKS
AU.addRequired<DominatorTreeWrapperPass>();
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
index 05d5e2ec400..0d3a1f1a769 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
@@ -29,13 +29,13 @@ AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) :
// except reserved size is not correctly aligned.
const Function &F = MF.getFunction();
- if (auto *Resolver = MF.getMMI().getResolver()) {
- if (AMDGPUPerfHintAnalysis *PHA = static_cast<AMDGPUPerfHintAnalysis*>(
- Resolver->getAnalysisIfAvailable(&AMDGPUPerfHintAnalysisID, true))) {
- MemoryBound = PHA->isMemoryBound(&F);
- WaveLimiter = PHA->needsWaveLimiter(&F);
- }
- }
+ Attribute MemBoundAttr = F.getFnAttribute("amdgpu-memory-bound");
+ MemoryBound = MemBoundAttr.isStringAttribute() &&
+ MemBoundAttr.getValueAsString() == "true";
+
+ Attribute WaveLimitAttr = F.getFnAttribute("amdgpu-wave-limiter");
+ WaveLimiter = WaveLimitAttr.isStringAttribute() &&
+ WaveLimitAttr.getValueAsString() == "true";
CallingConv::ID CC = F.getCallingConv();
if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp
index e9ebb912b08..e6d41236d44 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp
@@ -17,6 +17,7 @@
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
@@ -71,7 +72,7 @@ public:
const TargetLowering *TLI_)
: FIM(FIM_), DL(nullptr), TLI(TLI_) {}
- void runOnFunction(Function &F);
+ bool runOnFunction(Function &F);
private:
struct MemAccessInfo {
@@ -100,7 +101,7 @@ private:
const TargetLowering *TLI;
- void visit(const Function &F);
+ AMDGPUPerfHintAnalysis::FuncInfo *visit(const Function &F);
static bool isMemBound(const AMDGPUPerfHintAnalysis::FuncInfo &F);
static bool needLimitWave(const AMDGPUPerfHintAnalysis::FuncInfo &F);
@@ -202,12 +203,8 @@ bool AMDGPUPerfHint::isIndirectAccess(const Instruction *Inst) const {
return false;
}
-void AMDGPUPerfHint::visit(const Function &F) {
- auto FIP = FIM.insert(std::make_pair(&F, AMDGPUPerfHintAnalysis::FuncInfo()));
- if (!FIP.second)
- return;
-
- AMDGPUPerfHintAnalysis::FuncInfo &FI = FIP.first->second;
+AMDGPUPerfHintAnalysis::FuncInfo *AMDGPUPerfHint::visit(const Function &F) {
+ AMDGPUPerfHintAnalysis::FuncInfo &FI = FIM[&F];
LLVM_DEBUG(dbgs() << "[AMDGPUPerfHint] process " << F.getName() << '\n');
@@ -233,7 +230,6 @@ void AMDGPUPerfHint::visit(const Function &F) {
if (&F == Callee) // Handle immediate recursion
continue;
- visit(*Callee);
auto Loc = FIM.find(Callee);
assert(Loc != FIM.end() && "No func info");
@@ -256,36 +252,39 @@ void AMDGPUPerfHint::visit(const Function &F) {
}
}
}
-}
-void AMDGPUPerfHint::runOnFunction(Function &F) {
- if (FIM.find(&F) != FIM.end())
- return;
+ return &FI;
+}
+bool AMDGPUPerfHint::runOnFunction(Function &F) {
const Module &M = *F.getParent();
DL = &M.getDataLayout();
- visit(F);
- auto Loc = FIM.find(&F);
+ if (F.hasFnAttribute("amdgpu-wave-limiter") &&
+ F.hasFnAttribute("amdgpu-memory-bound"))
+ return false;
- assert(Loc != FIM.end() && "No func info");
- LLVM_DEBUG(dbgs() << F.getName() << " MemInst: " << Loc->second.MemInstCount
- << '\n'
- << " IAMInst: " << Loc->second.IAMInstCount << '\n'
- << " LSMInst: " << Loc->second.LSMInstCount << '\n'
- << " TotalInst: " << Loc->second.InstCount << '\n');
+ const AMDGPUPerfHintAnalysis::FuncInfo *Info = visit(F);
- auto &FI = Loc->second;
+ LLVM_DEBUG(dbgs() << F.getName() << " MemInst: " << Info->MemInstCount
+ << '\n'
+ << " IAMInst: " << Info->IAMInstCount << '\n'
+ << " LSMInst: " << Info->LSMInstCount << '\n'
+ << " TotalInst: " << Info->InstCount << '\n');
- if (isMemBound(FI)) {
+ if (isMemBound(*Info)) {
LLVM_DEBUG(dbgs() << F.getName() << " is memory bound\n");
NumMemBound++;
+ F.addFnAttr("amdgpu-memory-bound", "true");
}
- if (AMDGPU::isEntryFunctionCC(F.getCallingConv()) && needLimitWave(FI)) {
+ if (AMDGPU::isEntryFunctionCC(F.getCallingConv()) && needLimitWave(*Info)) {
LLVM_DEBUG(dbgs() << F.getName() << " needs limit wave\n");
NumLimitWave++;
+ F.addFnAttr("amdgpu-wave-limiter", "true");
}
+
+ return true;
}
bool AMDGPUPerfHint::isMemBound(const AMDGPUPerfHintAnalysis::FuncInfo &FI) {
@@ -364,17 +363,27 @@ bool AMDGPUPerfHint::MemAccessInfo::isLargeStride(
}
} // namespace
-bool AMDGPUPerfHintAnalysis::runOnFunction(Function &F) {
+bool AMDGPUPerfHintAnalysis::runOnSCC(CallGraphSCC &SCC) {
auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
if (!TPC)
return false;
const TargetMachine &TM = TPC->getTM<TargetMachine>();
- const TargetSubtargetInfo *ST = TM.getSubtargetImpl(F);
- AMDGPUPerfHint Analyzer(FIM, ST->getTargetLowering());
- Analyzer.runOnFunction(F);
- return false;
+ bool Changed = false;
+ for (CallGraphNode *I : SCC) {
+ Function *F = I->getFunction();
+ if (!F || F->isDeclaration())
+ continue;
+
+ const TargetSubtargetInfo *ST = TM.getSubtargetImpl(*F);
+ AMDGPUPerfHint Analyzer(FIM, ST->getTargetLowering());
+
+ if (Analyzer.runOnFunction(*F))
+ Changed = true;
+ }
+
+ return Changed;
}
bool AMDGPUPerfHintAnalysis::isMemoryBound(const Function *F) const {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h
index 88b9c96a914..9599e09fbd9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h
@@ -1,4 +1,4 @@
-//===- AMDGPUPerfHintAnalysis.h - analysis of functions memory traffic ----===//
+//===- AMDGPUPerfHintAnalysis.h ---- analysis of memory traffic -*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -14,18 +14,20 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_MDGPUPERFHINTANALYSIS_H
#define LLVM_LIB_TARGET_AMDGPU_MDGPUPERFHINTANALYSIS_H
+
+#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/IR/ValueMap.h"
#include "llvm/Pass.h"
namespace llvm {
-struct AMDGPUPerfHintAnalysis : public FunctionPass {
+struct AMDGPUPerfHintAnalysis : public CallGraphSCCPass {
static char ID;
public:
- AMDGPUPerfHintAnalysis() : FunctionPass(ID) {}
+ AMDGPUPerfHintAnalysis() : CallGraphSCCPass(ID) {}
- bool runOnFunction(Function &F) override;
+ bool runOnSCC(CallGraphSCC &SCC) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 87a820a395d..7414519aee1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -740,6 +740,8 @@ void AMDGPUPassConfig::addCodeGenPrepare() {
EnableLowerKernelArguments)
addPass(createAMDGPULowerKernelArgumentsPass());
+ addPass(&AMDGPUPerfHintAnalysisID);
+
TargetPassConfig::addCodeGenPrepare();
if (EnableLoadStoreVectorizer)
OpenPOWER on IntegriCloud