summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2018-07-09 19:22:22 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2018-07-09 19:22:22 +0000
commit40cb6cab563372d0a0b1bc8c127503508dc114e6 (patch)
tree33a3ddf313e3cb7eff250174be7c0bfd7d85db29 /llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
parentdc73f512ae77888fac7f80af9bdc44a7208fbce8 (diff)
downloadbcm5719-llvm-40cb6cab563372d0a0b1bc8c127503508dc114e6.tar.gz
bcm5719-llvm-40cb6cab563372d0a0b1bc8c127503508dc114e6.zip
AMDGPU: Force inlining if LDS global address is used
These won't work for the forseeable future. These aren't allowed from OpenCL, but IPO optimizations can make them appear. Also directly set the attributes on functions, regardless of the linkage rather than cloning functions like before. llvm-svn: 336587
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp108
1 files changed, 87 insertions, 21 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
index c27425443ab..d4bbb2c1eb8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
@@ -14,6 +14,9 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
+#include "AMDGPUTargetMachine.h"
+#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/IR/Module.h"
#include "llvm/Transforms/Utils/Cloning.h"
@@ -30,13 +33,18 @@ static cl::opt<bool> StressCalls(
class AMDGPUAlwaysInline : public ModulePass {
bool GlobalOpt;
+ void recursivelyVisitUsers(GlobalValue &GV,
+ SmallPtrSetImpl<Function *> &FuncsToAlwaysInline);
public:
static char ID;
AMDGPUAlwaysInline(bool GlobalOpt = false) :
ModulePass(ID), GlobalOpt(GlobalOpt) { }
bool runOnModule(Module &M) override;
- StringRef getPassName() const override { return "AMDGPU Always Inline Pass"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ }
};
} // End anonymous namespace
@@ -46,15 +54,53 @@ INITIALIZE_PASS(AMDGPUAlwaysInline, "amdgpu-always-inline",
char AMDGPUAlwaysInline::ID = 0;
+void AMDGPUAlwaysInline::recursivelyVisitUsers(
+ GlobalValue &GV,
+ SmallPtrSetImpl<Function *> &FuncsToAlwaysInline) {
+ SmallVector<User *, 16> Stack;
+
+ SmallPtrSet<const Value *, 8> Visited;
+
+ for (User *U : GV.users())
+ Stack.push_back(U);
+
+ while (!Stack.empty()) {
+ User *U = Stack.pop_back_val();
+ if (!Visited.insert(U).second)
+ continue;
+
+ if (Instruction *I = dyn_cast<Instruction>(U)) {
+ Function *F = I->getParent()->getParent();
+ if (!AMDGPU::isEntryFunctionCC(F->getCallingConv())) {
+ FuncsToAlwaysInline.insert(F);
+ Stack.push_back(F);
+ }
+
+ // No need to look at further users, but we do need to inline any callers.
+ continue;
+ }
+
+ for (User *UU : U->users())
+ Stack.push_back(UU);
+ }
+}
+
bool AMDGPUAlwaysInline::runOnModule(Module &M) {
+ AMDGPUAS AMDGPUAS = AMDGPU::getAMDGPUAS(M);
+
std::vector<GlobalAlias*> AliasesToRemove;
- std::vector<Function *> FuncsToClone;
+
+ SmallPtrSet<Function *, 8> FuncsToAlwaysInline;
+ SmallPtrSet<Function *, 8> FuncsToNoInline;
for (GlobalAlias &A : M.aliases()) {
if (Function* F = dyn_cast<Function>(A.getAliasee())) {
A.replaceAllUsesWith(F);
AliasesToRemove.push_back(&A);
}
+
+ // FIXME: If the aliasee isn't a function, it's some kind of constant expr
+ // cast that won't be inlined through.
}
if (GlobalOpt) {
@@ -63,31 +109,51 @@ bool AMDGPUAlwaysInline::runOnModule(Module &M) {
}
}
- auto NewAttr = StressCalls ? Attribute::NoInline : Attribute::AlwaysInline;
- auto IncompatAttr
- = StressCalls ? Attribute::AlwaysInline : Attribute::NoInline;
-
- for (Function &F : M) {
- if (!F.hasLocalLinkage() && !F.isDeclaration() && !F.use_empty() &&
- !F.hasFnAttribute(IncompatAttr))
- FuncsToClone.push_back(&F);
- }
-
- for (Function *F : FuncsToClone) {
- ValueToValueMapTy VMap;
- Function *NewFunc = CloneFunction(F, VMap);
- NewFunc->setLinkage(GlobalValue::InternalLinkage);
- F->replaceAllUsesWith(NewFunc);
+ // Always force inlining of any function that uses an LDS global address. This
+ // is something of a workaround because we don't have a way of supporting LDS
+ // objects defined in functions. LDS is always allocated by a kernel, and it
+ // is difficult to manage LDS usage if a function may be used by multiple
+ // kernels.
+ //
+ // OpenCL doesn't allow declaring LDS in non-kernels, so in practice this
+ // should only appear when IPO passes manages to move LDs defined in a kernel
+ // into a single user function.
+
+ for (GlobalVariable &GV : M.globals()) {
+ // TODO: Region address
+ unsigned AS = GV.getType()->getAddressSpace();
+ if (AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS.REGION_ADDRESS)
+ continue;
+
+ recursivelyVisitUsers(GV, FuncsToAlwaysInline);
}
- for (Function &F : M) {
- if (F.hasLocalLinkage() && !F.hasFnAttribute(IncompatAttr)) {
- F.addFnAttr(NewAttr);
+ if (!AMDGPUTargetMachine::EnableFunctionCalls || StressCalls) {
+ auto IncompatAttr
+ = StressCalls ? Attribute::AlwaysInline : Attribute::NoInline;
+
+ for (Function &F : M) {
+ if (!F.isDeclaration() && !F.use_empty() &&
+ !F.hasFnAttribute(IncompatAttr)) {
+ if (StressCalls) {
+ if (!FuncsToAlwaysInline.count(&F))
+ FuncsToNoInline.insert(&F);
+ } else
+ FuncsToAlwaysInline.insert(&F);
+ }
}
}
- return false;
+
+ for (Function *F : FuncsToAlwaysInline)
+ F->addFnAttr(Attribute::AlwaysInline);
+
+ for (Function *F : FuncsToNoInline)
+ F->addFnAttr(Attribute::NoInline);
+
+ return !FuncsToAlwaysInline.empty() || !FuncsToNoInline.empty();
}
ModulePass *llvm::createAMDGPUAlwaysInlinePass(bool GlobalOpt) {
return new AMDGPUAlwaysInline(GlobalOpt);
}
+
OpenPOWER on IntegriCloud