diff options
| author | Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> | 2019-06-17 17:47:28 +0000 | 
|---|---|---|
| committer | Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> | 2019-06-17 17:47:28 +0000 | 
| commit | ad04e7ad42663161ebc164cb0098826b38a4e0d2 (patch) | |
| tree | e43faf244c4be39838c6d877485adf7e9defd667 /llvm/lib | |
| parent | b8e8b1769ffa28fb09eb5e5e761840ece5531ba5 (diff) | |
| download | bcm5719-llvm-ad04e7ad42663161ebc164cb0098826b38a4e0d2.tar.gz bcm5719-llvm-ad04e7ad42663161ebc164cb0098826b38a4e0d2.zip | |
[AMDGPU] Pass to propagate ABI attributes from kernels to the functions
The pass works in two modes:
Mode 1: Just set attributes starting from kernels. This can work at
the very beginning of opt and llc pipeline, but cannot clone functions
because it must be a function pass.
Mode 2: Actually clone functions for new attributes. This can only work
after all function passes in the opt pipeline because it has to be a
module pass.
Differential Revision: https://reviews.llvm.org/D63208
llvm-svn: 363586
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPU.h | 8 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp | 336 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 15 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/CMakeLists.txt | 1 | 
4 files changed, 356 insertions, 4 deletions
| diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index adac93dc11d..94dad0d7470 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -57,6 +57,8 @@ FunctionPass *createAMDGPUSimplifyLibCallsPass(const TargetOptions &);  FunctionPass *createAMDGPUUseNativeCallsPass();  FunctionPass *createAMDGPUCodeGenPreparePass();  FunctionPass *createAMDGPUMachineCFGStructurizerPass(); +FunctionPass *createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *); +ModulePass *createAMDGPUPropagateAttributesLatePass(const TargetMachine *);  FunctionPass *createAMDGPURewriteOutArgumentsPass();  FunctionPass *createSIModeRegisterPass(); @@ -91,6 +93,12 @@ ModulePass *createAMDGPULowerKernelAttributesPass();  void initializeAMDGPULowerKernelAttributesPass(PassRegistry &);  extern char &AMDGPULowerKernelAttributesID; +void initializeAMDGPUPropagateAttributesEarlyPass(PassRegistry &); +extern char &AMDGPUPropagateAttributesEarlyID; + +void initializeAMDGPUPropagateAttributesLatePass(PassRegistry &); +extern char &AMDGPUPropagateAttributesLateID; +  void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &);  extern char &AMDGPURewriteOutArgumentsID; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp new file mode 100644 index 00000000000..5e356013bb2 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp @@ -0,0 +1,336 @@ +//===--- AMDGPUPropagateAttributes.cpp --------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief This pass propagates attributes from kernels to the non-entry +/// functions. Most of the library functions were not compiled for specific ABI, +/// yet will be correctly compiled if proper attrbutes are propagated from the +/// caller. +/// +/// The pass analyzes call graph and propagates ABI target features through the +/// call graph. +/// +/// It can run in two modes: as a function or module pass. A function pass +/// simply propagates attributes. A module pass clones functions if there are +/// callers with different ABI. If a function is clonned all call sites will +/// be updated to use a correct clone. +/// +/// A function pass is limited in functionality but can run early in the +/// pipeline. A module pass is more powerful but has to run late, so misses +/// library folding opportunities. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "amdgpu-propagate-attributes" + +#include "AMDGPU.h" +#include "AMDGPUSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "Utils/AMDGPUBaseInfo.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include <string> + +using namespace llvm; + +namespace llvm { +extern const SubtargetFeatureKV AMDGPUFeatureKV[AMDGPU::NumSubtargetFeatures-1]; +} + +namespace { + +class AMDGPUPropagateAttributes { +  const FeatureBitset TargetFeatures = { +    AMDGPU::FeatureWavefrontSize16, +    AMDGPU::FeatureWavefrontSize32, +    AMDGPU::FeatureWavefrontSize64 +  }; + +  class Clone{ +  public: +    Clone(FeatureBitset FeatureMask, Function *OrigF, Function *NewF) : +      FeatureMask(FeatureMask), OrigF(OrigF), NewF(NewF) {} + +    FeatureBitset FeatureMask; +    Function *OrigF; +    Function *NewF; +  }; + +  const TargetMachine *TM; + +  // Clone functions as needed or just set attributes. +  bool AllowClone; + +  // Option propagation roots. +  SmallSet<Function *, 32> Roots; + +  // Clones of functions with their attributes. +  SmallVector<Clone, 32> Clones; + +  // Find a clone with required features. +  Function *findFunction(const FeatureBitset &FeaturesNeeded, +                         Function *OrigF); + +  // Clone function F and set NewFeatures on the clone. +  // Cole takes the name of original function. +  Function *cloneWithFeatures(Function &F, +                              const FeatureBitset &NewFeatures); + +  // Set new function's features in place. +  void setFeatures(Function &F, const FeatureBitset &NewFeatures); + +  std::string getFeatureString(const FeatureBitset &Features) const; + +  // Propagate attributes from Roots. +  bool process(); + +public: +  AMDGPUPropagateAttributes(const TargetMachine *TM, bool AllowClone) : +    TM(TM), AllowClone(AllowClone) {} + +  // Use F as a root and propagate its attributes. +  bool process(Function &F); + +  // Propagate attributes starting from kernel functions. +  bool process(Module &M); +}; + +// Allows to propagate attributes early, but no clonning is allowed as it must +// be a function pass to run before any optimizations. +// TODO: We shall only need a one instance of module pass, but that needs to be +// in the linker pipeline which is currently not possible. +class AMDGPUPropagateAttributesEarly : public FunctionPass { +  const TargetMachine *TM; + +public: +  static char ID; // Pass identification + +  AMDGPUPropagateAttributesEarly(const TargetMachine *TM = nullptr) : +    FunctionPass(ID), TM(TM) { +    initializeAMDGPUPropagateAttributesEarlyPass( +      *PassRegistry::getPassRegistry()); +  } + +  bool runOnFunction(Function &F) override; +}; + +// Allows to propagate attributes with clonning but does that late in the +// pipeline. +class AMDGPUPropagateAttributesLate : public ModulePass { +  const TargetMachine *TM; + +public: +  static char ID; // Pass identification + +  AMDGPUPropagateAttributesLate(const TargetMachine *TM = nullptr) : +    ModulePass(ID), TM(TM) { +    initializeAMDGPUPropagateAttributesLatePass( +      *PassRegistry::getPassRegistry()); +  } + +  bool runOnModule(Module &M) override; +}; + +}  // end anonymous namespace. + +char AMDGPUPropagateAttributesEarly::ID = 0; +char AMDGPUPropagateAttributesLate::ID = 0; + +INITIALIZE_PASS(AMDGPUPropagateAttributesEarly, +                "amdgpu-propagate-attributes-early", +                "Early propagate attributes from kernels to functions", +                false, false) +INITIALIZE_PASS(AMDGPUPropagateAttributesLate, +                "amdgpu-propagate-attributes-late", +                "Late propagate attributes from kernels to functions", +                false, false) + +Function * +AMDGPUPropagateAttributes::findFunction(const FeatureBitset &FeaturesNeeded, +                                        Function *OrigF) { +  // TODO: search for clone's clones. +  for (Clone &C : Clones) +    if (C.OrigF == OrigF && FeaturesNeeded == C.FeatureMask) +      return C.NewF; + +  return nullptr; +} + +bool AMDGPUPropagateAttributes::process(Module &M) { +  for (auto &F : M.functions()) +    if (AMDGPU::isEntryFunctionCC(F.getCallingConv())) +      Roots.insert(&F); + +  return process(); +} + +bool AMDGPUPropagateAttributes::process(Function &F) { +  Roots.insert(&F); +  return process(); +} + +bool AMDGPUPropagateAttributes::process() { +  bool Changed = false; +  SmallSet<Function *, 32> NewRoots; +  SmallSet<Function *, 32> Replaced; + +  if (Roots.empty()) +    return false; +  Module &M = *(*Roots.begin())->getParent(); + +  do { +    Roots.insert(NewRoots.begin(), NewRoots.end()); +    NewRoots.clear(); + +    for (auto &F : M.functions()) { +      if (F.isDeclaration() || Roots.count(&F) || Roots.count(&F)) +        continue; + +      const FeatureBitset &CalleeBits = +        TM->getSubtargetImpl(F)->getFeatureBits(); +      SmallVector<std::pair<CallBase *, Function *>, 32> ToReplace; + +      for (User *U : F.users()) { +        Instruction *I = dyn_cast<Instruction>(U); +        if (!I) +          continue; +        CallBase *CI = dyn_cast<CallBase>(I); +        if (!CI) +          continue; +        Function *Caller = CI->getCaller(); +        if (!Caller) +          continue; +        if (!Roots.count(Caller)) +          continue; + +        const FeatureBitset &CallerBits = +          TM->getSubtargetImpl(*Caller)->getFeatureBits() & TargetFeatures; + +        if (CallerBits == (CalleeBits  & TargetFeatures)) { +          NewRoots.insert(&F); +          continue; +        } + +        Function *NewF = findFunction(CallerBits, &F); +        if (!NewF) { +          FeatureBitset NewFeatures((CalleeBits & ~TargetFeatures) | +                                    CallerBits); +          if (!AllowClone) { +            // This may set different features on different iteartions if +            // there is a contradiction in callers' attributes. In this case +            // we rely on a second pass running on Module, which is allowed +            // to clone. +            setFeatures(F, NewFeatures); +            NewRoots.insert(&F); +            Changed = true; +            break; +          } + +          NewF = cloneWithFeatures(F, NewFeatures); +          Clones.push_back(Clone(CallerBits, &F, NewF)); +          NewRoots.insert(NewF); +        } + +        ToReplace.push_back(std::make_pair(CI, NewF)); +        Replaced.insert(&F); + +        Changed = true; +      } + +      while (!ToReplace.empty()) { +        auto R = ToReplace.pop_back_val(); +        R.first->setCalledFunction(R.second); +      } +    } +  } while (!NewRoots.empty()); + +  for (Function *F : Replaced) { +    if (F->use_empty()) +      F->eraseFromParent(); +  } + +  return Changed; +} + +Function * +AMDGPUPropagateAttributes::cloneWithFeatures(Function &F, +                                             const FeatureBitset &NewFeatures) { +  LLVM_DEBUG(dbgs() << "Cloning " << F.getName() << '\n'); + +  ValueToValueMapTy dummy; +  Function *NewF = CloneFunction(&F, dummy); +  setFeatures(*NewF, NewFeatures); + +  // Swap names. If that is the only clone it will retain the name of now +  // dead value. +  if (F.hasName()) { +    std::string NewName = NewF->getName(); +    NewF->takeName(&F); +    F.setName(NewName); + +    // Name has changed, it does not need an external symbol. +    F.setVisibility(GlobalValue::DefaultVisibility); +    F.setLinkage(GlobalValue::InternalLinkage); +  } + +  return NewF; +} + +void AMDGPUPropagateAttributes::setFeatures(Function &F, +                                            const FeatureBitset &NewFeatures) { +  std::string NewFeatureStr = getFeatureString(NewFeatures); + +  LLVM_DEBUG(dbgs() << "Set features " +                    << getFeatureString(NewFeatures & TargetFeatures) +                    << " on " << F.getName() << '\n'); + +  F.removeFnAttr("target-features"); +  F.addFnAttr("target-features", NewFeatureStr); +} + +std::string +AMDGPUPropagateAttributes::getFeatureString(const FeatureBitset &Features) const +{ +  std::string Ret; +  for (const SubtargetFeatureKV &KV : AMDGPUFeatureKV) { +    if (Features[KV.Value]) +      Ret += (StringRef("+") + KV.Key + ",").str(); +    else if (TargetFeatures[KV.Value]) +      Ret += (StringRef("-") + KV.Key + ",").str(); +  } +  Ret.pop_back(); // Remove last comma. +  return Ret; +} + +bool AMDGPUPropagateAttributesEarly::runOnFunction(Function &F) { +  if (!TM || !AMDGPU::isEntryFunctionCC(F.getCallingConv())) +    return false; + +  return AMDGPUPropagateAttributes(TM, false).process(F); +} + +bool AMDGPUPropagateAttributesLate::runOnModule(Module &M) { +  if (!TM) +    return false; + +  return AMDGPUPropagateAttributes(TM, true).process(M); +} + +FunctionPass +*llvm::createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *TM) { +  return new AMDGPUPropagateAttributesEarly(TM); +} + +ModulePass +*llvm::createAMDGPUPropagateAttributesLatePass(const TargetMachine *TM) { +  return new AMDGPUPropagateAttributesLate(TM); +} diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 41876eb7125..41a075756ee 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -217,6 +217,8 @@ extern "C" void LLVMInitializeAMDGPUTarget() {    initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(*PR);    initializeAMDGPUPromoteAllocaPass(*PR);    initializeAMDGPUCodeGenPreparePass(*PR); +  initializeAMDGPUPropagateAttributesEarlyPass(*PR); +  initializeAMDGPUPropagateAttributesLatePass(*PR);    initializeAMDGPURewriteOutArgumentsPass(*PR);    initializeAMDGPUUnifyMetadataPass(*PR);    initializeSIAnnotateControlFlowPass(*PR); @@ -402,13 +404,14 @@ void AMDGPUTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {    Builder.addExtension(      PassManagerBuilder::EP_ModuleOptimizerEarly, -    [Internalize, EarlyInline, AMDGPUAA](const PassManagerBuilder &, -                                         legacy::PassManagerBase &PM) { +    [Internalize, EarlyInline, AMDGPUAA, this](const PassManagerBuilder &, +                                               legacy::PassManagerBase &PM) {        if (AMDGPUAA) {          PM.add(createAMDGPUAAWrapperPass());          PM.add(createAMDGPUExternalAAWrapperPass());        }        PM.add(createAMDGPUUnifyMetadataPass()); +      PM.add(createAMDGPUPropagateAttributesLatePass(this));        if (Internalize) {          PM.add(createInternalizePass(mustPreserveGV));          PM.add(createGlobalDCEPass()); @@ -420,12 +423,13 @@ void AMDGPUTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {    const auto &Opt = Options;    Builder.addExtension(      PassManagerBuilder::EP_EarlyAsPossible, -    [AMDGPUAA, LibCallSimplify, &Opt](const PassManagerBuilder &, -                                      legacy::PassManagerBase &PM) { +    [AMDGPUAA, LibCallSimplify, &Opt, this](const PassManagerBuilder &, +                                            legacy::PassManagerBase &PM) {        if (AMDGPUAA) {          PM.add(createAMDGPUAAWrapperPass());          PM.add(createAMDGPUExternalAAWrapperPass());        } +      PM.add(llvm::createAMDGPUPropagateAttributesEarlyPass(this));        PM.add(llvm::createAMDGPUUseNativeCallsPass());        if (LibCallSimplify)          PM.add(llvm::createAMDGPUSimplifyLibCallsPass(Opt)); @@ -654,6 +658,9 @@ void AMDGPUPassConfig::addIRPasses() {    disablePass(&FuncletLayoutID);    disablePass(&PatchableFunctionID); +  // A call to propagate attributes pass in the backend in case opt was not run. +  addPass(createAMDGPUPropagateAttributesEarlyPass(&TM)); +    addPass(createAtomicExpandPass());    // This must occur before inlining, as the inliner will not look through diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt index a229090c788..c1fa0ecd841 100644 --- a/llvm/lib/Target/AMDGPU/CMakeLists.txt +++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt @@ -58,6 +58,7 @@ add_llvm_target(AMDGPUCodeGen    AMDGPUMCInstLower.cpp    AMDGPUOpenCLEnqueuedBlockLowering.cpp    AMDGPUPromoteAlloca.cpp +  AMDGPUPropagateAttributes.cpp    AMDGPURegAsmNames.inc.cpp    AMDGPURegisterBankInfo.cpp    AMDGPURegisterInfo.cpp | 

