diff options
| author | Vlad Tsyrklevich <vlad@tsyrklevich.net> | 2018-07-10 00:46:07 +0000 |
|---|---|---|
| committer | Vlad Tsyrklevich <vlad@tsyrklevich.net> | 2018-07-10 00:46:07 +0000 |
| commit | 688e752207acc4308eae8a83b489366ec88793c9 (patch) | |
| tree | 7828e023c257b5d6fd5a0b6a31935ec187e47638 /llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp | |
| parent | 638426fc36e08ccee78605a4d8136757ca0faf12 (diff) | |
| download | bcm5719-llvm-688e752207acc4308eae8a83b489366ec88793c9.tar.gz bcm5719-llvm-688e752207acc4308eae8a83b489366ec88793c9.zip | |
Revert "AMDGPU: Force inlining if LDS global address is used"
This reverts commit r336587, it was causing test failures on the
sanitizer bots.
llvm-svn: 336623
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp | 108 |
1 files changed, 21 insertions, 87 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp index d4bbb2c1eb8..c27425443ab 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp @@ -14,9 +14,6 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUTargetMachine.h" -#include "Utils/AMDGPUBaseInfo.h" -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/IR/Module.h" #include "llvm/Transforms/Utils/Cloning.h" @@ -33,18 +30,13 @@ static cl::opt<bool> StressCalls( class AMDGPUAlwaysInline : public ModulePass { bool GlobalOpt; - void recursivelyVisitUsers(GlobalValue &GV, - SmallPtrSetImpl<Function *> &FuncsToAlwaysInline); public: static char ID; AMDGPUAlwaysInline(bool GlobalOpt = false) : ModulePass(ID), GlobalOpt(GlobalOpt) { } bool runOnModule(Module &M) override; - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesAll(); - } + StringRef getPassName() const override { return "AMDGPU Always Inline Pass"; } }; } // End anonymous namespace @@ -54,53 +46,15 @@ INITIALIZE_PASS(AMDGPUAlwaysInline, "amdgpu-always-inline", char AMDGPUAlwaysInline::ID = 0; -void AMDGPUAlwaysInline::recursivelyVisitUsers( - GlobalValue &GV, - SmallPtrSetImpl<Function *> &FuncsToAlwaysInline) { - SmallVector<User *, 16> Stack; - - SmallPtrSet<const Value *, 8> Visited; - - for (User *U : GV.users()) - Stack.push_back(U); - - while (!Stack.empty()) { - User *U = Stack.pop_back_val(); - if (!Visited.insert(U).second) - continue; - - if (Instruction *I = dyn_cast<Instruction>(U)) { - Function *F = I->getParent()->getParent(); - if (!AMDGPU::isEntryFunctionCC(F->getCallingConv())) { - FuncsToAlwaysInline.insert(F); - Stack.push_back(F); - } - - // No need to look at further users, but we do need to inline any callers. - continue; - } - - for (User *UU : U->users()) - Stack.push_back(UU); - } -} - bool AMDGPUAlwaysInline::runOnModule(Module &M) { - AMDGPUAS AMDGPUAS = AMDGPU::getAMDGPUAS(M); - std::vector<GlobalAlias*> AliasesToRemove; - - SmallPtrSet<Function *, 8> FuncsToAlwaysInline; - SmallPtrSet<Function *, 8> FuncsToNoInline; + std::vector<Function *> FuncsToClone; for (GlobalAlias &A : M.aliases()) { if (Function* F = dyn_cast<Function>(A.getAliasee())) { A.replaceAllUsesWith(F); AliasesToRemove.push_back(&A); } - - // FIXME: If the aliasee isn't a function, it's some kind of constant expr - // cast that won't be inlined through. } if (GlobalOpt) { @@ -109,51 +63,31 @@ bool AMDGPUAlwaysInline::runOnModule(Module &M) { } } - // Always force inlining of any function that uses an LDS global address. This - // is something of a workaround because we don't have a way of supporting LDS - // objects defined in functions. LDS is always allocated by a kernel, and it - // is difficult to manage LDS usage if a function may be used by multiple - // kernels. - // - // OpenCL doesn't allow declaring LDS in non-kernels, so in practice this - // should only appear when IPO passes manages to move LDs defined in a kernel - // into a single user function. - - for (GlobalVariable &GV : M.globals()) { - // TODO: Region address - unsigned AS = GV.getType()->getAddressSpace(); - if (AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS.REGION_ADDRESS) - continue; - - recursivelyVisitUsers(GV, FuncsToAlwaysInline); - } + auto NewAttr = StressCalls ? Attribute::NoInline : Attribute::AlwaysInline; + auto IncompatAttr + = StressCalls ? Attribute::AlwaysInline : Attribute::NoInline; - if (!AMDGPUTargetMachine::EnableFunctionCalls || StressCalls) { - auto IncompatAttr - = StressCalls ? Attribute::AlwaysInline : Attribute::NoInline; - - for (Function &F : M) { - if (!F.isDeclaration() && !F.use_empty() && - !F.hasFnAttribute(IncompatAttr)) { - if (StressCalls) { - if (!FuncsToAlwaysInline.count(&F)) - FuncsToNoInline.insert(&F); - } else - FuncsToAlwaysInline.insert(&F); - } - } + for (Function &F : M) { + if (!F.hasLocalLinkage() && !F.isDeclaration() && !F.use_empty() && + !F.hasFnAttribute(IncompatAttr)) + FuncsToClone.push_back(&F); } - for (Function *F : FuncsToAlwaysInline) - F->addFnAttr(Attribute::AlwaysInline); - - for (Function *F : FuncsToNoInline) - F->addFnAttr(Attribute::NoInline); + for (Function *F : FuncsToClone) { + ValueToValueMapTy VMap; + Function *NewFunc = CloneFunction(F, VMap); + NewFunc->setLinkage(GlobalValue::InternalLinkage); + F->replaceAllUsesWith(NewFunc); + } - return !FuncsToAlwaysInline.empty() || !FuncsToNoInline.empty(); + for (Function &F : M) { + if (F.hasLocalLinkage() && !F.hasFnAttribute(IncompatAttr)) { + F.addFnAttr(NewAttr); + } + } + return false; } ModulePass *llvm::createAMDGPUAlwaysInlinePass(bool GlobalOpt) { return new AMDGPUAlwaysInline(GlobalOpt); } - |

