diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2018-07-09 19:22:22 +0000 | 
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2018-07-09 19:22:22 +0000 | 
| commit | 40cb6cab563372d0a0b1bc8c127503508dc114e6 (patch) | |
| tree | 33a3ddf313e3cb7eff250174be7c0bfd7d85db29 /llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp | |
| parent | dc73f512ae77888fac7f80af9bdc44a7208fbce8 (diff) | |
| download | bcm5719-llvm-40cb6cab563372d0a0b1bc8c127503508dc114e6.tar.gz bcm5719-llvm-40cb6cab563372d0a0b1bc8c127503508dc114e6.zip | |
AMDGPU: Force inlining if LDS global address is used
These won't work for the forseeable future. These aren't allowed
from OpenCL, but IPO optimizations can make them appear.
Also directly set the attributes on functions, regardless
of the linkage rather than cloning functions like before.
llvm-svn: 336587
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp | 108 | 
1 files changed, 87 insertions, 21 deletions
| diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp index c27425443ab..d4bbb2c1eb8 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp @@ -14,6 +14,9 @@  //===----------------------------------------------------------------------===//  #include "AMDGPU.h" +#include "AMDGPUTargetMachine.h" +#include "Utils/AMDGPUBaseInfo.h" +#include "llvm/ADT/SmallPtrSet.h"  #include "llvm/IR/Module.h"  #include "llvm/Transforms/Utils/Cloning.h" @@ -30,13 +33,18 @@ static cl::opt<bool> StressCalls(  class AMDGPUAlwaysInline : public ModulePass {    bool GlobalOpt; +  void recursivelyVisitUsers(GlobalValue &GV, +                             SmallPtrSetImpl<Function *> &FuncsToAlwaysInline);  public:    static char ID;    AMDGPUAlwaysInline(bool GlobalOpt = false) :      ModulePass(ID), GlobalOpt(GlobalOpt) { }    bool runOnModule(Module &M) override; -  StringRef getPassName() const override { return "AMDGPU Always Inline Pass"; } + +  void getAnalysisUsage(AnalysisUsage &AU) const override { +    AU.setPreservesAll(); + }  };  } // End anonymous namespace @@ -46,15 +54,53 @@ INITIALIZE_PASS(AMDGPUAlwaysInline, "amdgpu-always-inline",  char AMDGPUAlwaysInline::ID = 0; +void AMDGPUAlwaysInline::recursivelyVisitUsers( +  GlobalValue &GV, +  SmallPtrSetImpl<Function *> &FuncsToAlwaysInline) { +  SmallVector<User *, 16> Stack; + +  SmallPtrSet<const Value *, 8> Visited; + +  for (User *U : GV.users()) +    Stack.push_back(U); + +  while (!Stack.empty()) { +    User *U = Stack.pop_back_val(); +    if (!Visited.insert(U).second) +      continue; + +    if (Instruction *I = dyn_cast<Instruction>(U)) { +      Function *F = I->getParent()->getParent(); +      if (!AMDGPU::isEntryFunctionCC(F->getCallingConv())) { +        FuncsToAlwaysInline.insert(F); +        Stack.push_back(F); +      } + +      // No need to look at further users, but we do need to inline any callers. +      continue; +    } + +    for (User *UU : U->users()) +      Stack.push_back(UU); +  } +} +  bool AMDGPUAlwaysInline::runOnModule(Module &M) { +  AMDGPUAS AMDGPUAS = AMDGPU::getAMDGPUAS(M); +    std::vector<GlobalAlias*> AliasesToRemove; -  std::vector<Function *> FuncsToClone; + +  SmallPtrSet<Function *, 8> FuncsToAlwaysInline; +  SmallPtrSet<Function *, 8> FuncsToNoInline;    for (GlobalAlias &A : M.aliases()) {      if (Function* F = dyn_cast<Function>(A.getAliasee())) {        A.replaceAllUsesWith(F);        AliasesToRemove.push_back(&A);      } + +    // FIXME: If the aliasee isn't a function, it's some kind of constant expr +    // cast that won't be inlined through.    }    if (GlobalOpt) { @@ -63,31 +109,51 @@ bool AMDGPUAlwaysInline::runOnModule(Module &M) {      }    } -  auto NewAttr = StressCalls ? Attribute::NoInline : Attribute::AlwaysInline; -  auto IncompatAttr -    = StressCalls ? Attribute::AlwaysInline : Attribute::NoInline; - -  for (Function &F : M) { -    if (!F.hasLocalLinkage() && !F.isDeclaration() && !F.use_empty() && -        !F.hasFnAttribute(IncompatAttr)) -      FuncsToClone.push_back(&F); -  } - -  for (Function *F : FuncsToClone) { -    ValueToValueMapTy VMap; -    Function *NewFunc = CloneFunction(F, VMap); -    NewFunc->setLinkage(GlobalValue::InternalLinkage); -    F->replaceAllUsesWith(NewFunc); +  // Always force inlining of any function that uses an LDS global address. This +  // is something of a workaround because we don't have a way of supporting LDS +  // objects defined in functions. LDS is always allocated by a kernel, and it +  // is difficult to manage LDS usage if a function may be used by multiple +  // kernels. +  // +  // OpenCL doesn't allow declaring LDS in non-kernels, so in practice this +  // should only appear when IPO passes manages to move LDs defined in a kernel +  // into a single user function. + +  for (GlobalVariable &GV : M.globals()) { +    // TODO: Region address +    unsigned AS = GV.getType()->getAddressSpace(); +    if (AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS.REGION_ADDRESS) +      continue; + +    recursivelyVisitUsers(GV, FuncsToAlwaysInline);    } -  for (Function &F : M) { -    if (F.hasLocalLinkage() && !F.hasFnAttribute(IncompatAttr)) { -      F.addFnAttr(NewAttr); +  if (!AMDGPUTargetMachine::EnableFunctionCalls || StressCalls) { +    auto IncompatAttr +      = StressCalls ? Attribute::AlwaysInline : Attribute::NoInline; + +    for (Function &F : M) { +      if (!F.isDeclaration() && !F.use_empty() && +          !F.hasFnAttribute(IncompatAttr)) { +        if (StressCalls) { +          if (!FuncsToAlwaysInline.count(&F)) +            FuncsToNoInline.insert(&F); +        } else +          FuncsToAlwaysInline.insert(&F); +      }      }    } -  return false; + +  for (Function *F : FuncsToAlwaysInline) +    F->addFnAttr(Attribute::AlwaysInline); + +  for (Function *F : FuncsToNoInline) +    F->addFnAttr(Attribute::NoInline); + +  return !FuncsToAlwaysInline.empty() || !FuncsToNoInline.empty();  }  ModulePass *llvm::createAMDGPUAlwaysInlinePass(bool GlobalOpt) {    return new AMDGPUAlwaysInline(GlobalOpt);  } + | 

