diff options
| author | Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> | 2019-06-17 17:47:28 +0000 | 
|---|---|---|
| committer | Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> | 2019-06-17 17:47:28 +0000 | 
| commit | ad04e7ad42663161ebc164cb0098826b38a4e0d2 (patch) | |
| tree | e43faf244c4be39838c6d877485adf7e9defd667 /llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp | |
| parent | b8e8b1769ffa28fb09eb5e5e761840ece5531ba5 (diff) | |
| download | bcm5719-llvm-ad04e7ad42663161ebc164cb0098826b38a4e0d2.tar.gz bcm5719-llvm-ad04e7ad42663161ebc164cb0098826b38a4e0d2.zip | |
[AMDGPU] Pass to propagate ABI attributes from kernels to the functions
The pass works in two modes:
Mode 1: Just set attributes starting from kernels. This can work at
the very beginning of opt and llc pipeline, but cannot clone functions
because it must be a function pass.
Mode 2: Actually clone functions for new attributes. This can only work
after all function passes in the opt pipeline because it has to be a
module pass.
Differential Revision: https://reviews.llvm.org/D63208
llvm-svn: 363586
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp | 336 | 
1 files changed, 336 insertions, 0 deletions
| diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp new file mode 100644 index 00000000000..5e356013bb2 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp @@ -0,0 +1,336 @@ +//===--- AMDGPUPropagateAttributes.cpp --------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief This pass propagates attributes from kernels to the non-entry +/// functions. Most of the library functions were not compiled for specific ABI, +/// yet will be correctly compiled if proper attrbutes are propagated from the +/// caller. +/// +/// The pass analyzes call graph and propagates ABI target features through the +/// call graph. +/// +/// It can run in two modes: as a function or module pass. A function pass +/// simply propagates attributes. A module pass clones functions if there are +/// callers with different ABI. If a function is clonned all call sites will +/// be updated to use a correct clone. +/// +/// A function pass is limited in functionality but can run early in the +/// pipeline. A module pass is more powerful but has to run late, so misses +/// library folding opportunities. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "amdgpu-propagate-attributes" + +#include "AMDGPU.h" +#include "AMDGPUSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "Utils/AMDGPUBaseInfo.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include <string> + +using namespace llvm; + +namespace llvm { +extern const SubtargetFeatureKV AMDGPUFeatureKV[AMDGPU::NumSubtargetFeatures-1]; +} + +namespace { + +class AMDGPUPropagateAttributes { +  const FeatureBitset TargetFeatures = { +    AMDGPU::FeatureWavefrontSize16, +    AMDGPU::FeatureWavefrontSize32, +    AMDGPU::FeatureWavefrontSize64 +  }; + +  class Clone{ +  public: +    Clone(FeatureBitset FeatureMask, Function *OrigF, Function *NewF) : +      FeatureMask(FeatureMask), OrigF(OrigF), NewF(NewF) {} + +    FeatureBitset FeatureMask; +    Function *OrigF; +    Function *NewF; +  }; + +  const TargetMachine *TM; + +  // Clone functions as needed or just set attributes. +  bool AllowClone; + +  // Option propagation roots. +  SmallSet<Function *, 32> Roots; + +  // Clones of functions with their attributes. +  SmallVector<Clone, 32> Clones; + +  // Find a clone with required features. +  Function *findFunction(const FeatureBitset &FeaturesNeeded, +                         Function *OrigF); + +  // Clone function F and set NewFeatures on the clone. +  // Cole takes the name of original function. +  Function *cloneWithFeatures(Function &F, +                              const FeatureBitset &NewFeatures); + +  // Set new function's features in place. +  void setFeatures(Function &F, const FeatureBitset &NewFeatures); + +  std::string getFeatureString(const FeatureBitset &Features) const; + +  // Propagate attributes from Roots. +  bool process(); + +public: +  AMDGPUPropagateAttributes(const TargetMachine *TM, bool AllowClone) : +    TM(TM), AllowClone(AllowClone) {} + +  // Use F as a root and propagate its attributes. +  bool process(Function &F); + +  // Propagate attributes starting from kernel functions. +  bool process(Module &M); +}; + +// Allows to propagate attributes early, but no clonning is allowed as it must +// be a function pass to run before any optimizations. +// TODO: We shall only need a one instance of module pass, but that needs to be +// in the linker pipeline which is currently not possible. +class AMDGPUPropagateAttributesEarly : public FunctionPass { +  const TargetMachine *TM; + +public: +  static char ID; // Pass identification + +  AMDGPUPropagateAttributesEarly(const TargetMachine *TM = nullptr) : +    FunctionPass(ID), TM(TM) { +    initializeAMDGPUPropagateAttributesEarlyPass( +      *PassRegistry::getPassRegistry()); +  } + +  bool runOnFunction(Function &F) override; +}; + +// Allows to propagate attributes with clonning but does that late in the +// pipeline. +class AMDGPUPropagateAttributesLate : public ModulePass { +  const TargetMachine *TM; + +public: +  static char ID; // Pass identification + +  AMDGPUPropagateAttributesLate(const TargetMachine *TM = nullptr) : +    ModulePass(ID), TM(TM) { +    initializeAMDGPUPropagateAttributesLatePass( +      *PassRegistry::getPassRegistry()); +  } + +  bool runOnModule(Module &M) override; +}; + +}  // end anonymous namespace. + +char AMDGPUPropagateAttributesEarly::ID = 0; +char AMDGPUPropagateAttributesLate::ID = 0; + +INITIALIZE_PASS(AMDGPUPropagateAttributesEarly, +                "amdgpu-propagate-attributes-early", +                "Early propagate attributes from kernels to functions", +                false, false) +INITIALIZE_PASS(AMDGPUPropagateAttributesLate, +                "amdgpu-propagate-attributes-late", +                "Late propagate attributes from kernels to functions", +                false, false) + +Function * +AMDGPUPropagateAttributes::findFunction(const FeatureBitset &FeaturesNeeded, +                                        Function *OrigF) { +  // TODO: search for clone's clones. +  for (Clone &C : Clones) +    if (C.OrigF == OrigF && FeaturesNeeded == C.FeatureMask) +      return C.NewF; + +  return nullptr; +} + +bool AMDGPUPropagateAttributes::process(Module &M) { +  for (auto &F : M.functions()) +    if (AMDGPU::isEntryFunctionCC(F.getCallingConv())) +      Roots.insert(&F); + +  return process(); +} + +bool AMDGPUPropagateAttributes::process(Function &F) { +  Roots.insert(&F); +  return process(); +} + +bool AMDGPUPropagateAttributes::process() { +  bool Changed = false; +  SmallSet<Function *, 32> NewRoots; +  SmallSet<Function *, 32> Replaced; + +  if (Roots.empty()) +    return false; +  Module &M = *(*Roots.begin())->getParent(); + +  do { +    Roots.insert(NewRoots.begin(), NewRoots.end()); +    NewRoots.clear(); + +    for (auto &F : M.functions()) { +      if (F.isDeclaration() || Roots.count(&F) || Roots.count(&F)) +        continue; + +      const FeatureBitset &CalleeBits = +        TM->getSubtargetImpl(F)->getFeatureBits(); +      SmallVector<std::pair<CallBase *, Function *>, 32> ToReplace; + +      for (User *U : F.users()) { +        Instruction *I = dyn_cast<Instruction>(U); +        if (!I) +          continue; +        CallBase *CI = dyn_cast<CallBase>(I); +        if (!CI) +          continue; +        Function *Caller = CI->getCaller(); +        if (!Caller) +          continue; +        if (!Roots.count(Caller)) +          continue; + +        const FeatureBitset &CallerBits = +          TM->getSubtargetImpl(*Caller)->getFeatureBits() & TargetFeatures; + +        if (CallerBits == (CalleeBits  & TargetFeatures)) { +          NewRoots.insert(&F); +          continue; +        } + +        Function *NewF = findFunction(CallerBits, &F); +        if (!NewF) { +          FeatureBitset NewFeatures((CalleeBits & ~TargetFeatures) | +                                    CallerBits); +          if (!AllowClone) { +            // This may set different features on different iteartions if +            // there is a contradiction in callers' attributes. In this case +            // we rely on a second pass running on Module, which is allowed +            // to clone. +            setFeatures(F, NewFeatures); +            NewRoots.insert(&F); +            Changed = true; +            break; +          } + +          NewF = cloneWithFeatures(F, NewFeatures); +          Clones.push_back(Clone(CallerBits, &F, NewF)); +          NewRoots.insert(NewF); +        } + +        ToReplace.push_back(std::make_pair(CI, NewF)); +        Replaced.insert(&F); + +        Changed = true; +      } + +      while (!ToReplace.empty()) { +        auto R = ToReplace.pop_back_val(); +        R.first->setCalledFunction(R.second); +      } +    } +  } while (!NewRoots.empty()); + +  for (Function *F : Replaced) { +    if (F->use_empty()) +      F->eraseFromParent(); +  } + +  return Changed; +} + +Function * +AMDGPUPropagateAttributes::cloneWithFeatures(Function &F, +                                             const FeatureBitset &NewFeatures) { +  LLVM_DEBUG(dbgs() << "Cloning " << F.getName() << '\n'); + +  ValueToValueMapTy dummy; +  Function *NewF = CloneFunction(&F, dummy); +  setFeatures(*NewF, NewFeatures); + +  // Swap names. If that is the only clone it will retain the name of now +  // dead value. +  if (F.hasName()) { +    std::string NewName = NewF->getName(); +    NewF->takeName(&F); +    F.setName(NewName); + +    // Name has changed, it does not need an external symbol. +    F.setVisibility(GlobalValue::DefaultVisibility); +    F.setLinkage(GlobalValue::InternalLinkage); +  } + +  return NewF; +} + +void AMDGPUPropagateAttributes::setFeatures(Function &F, +                                            const FeatureBitset &NewFeatures) { +  std::string NewFeatureStr = getFeatureString(NewFeatures); + +  LLVM_DEBUG(dbgs() << "Set features " +                    << getFeatureString(NewFeatures & TargetFeatures) +                    << " on " << F.getName() << '\n'); + +  F.removeFnAttr("target-features"); +  F.addFnAttr("target-features", NewFeatureStr); +} + +std::string +AMDGPUPropagateAttributes::getFeatureString(const FeatureBitset &Features) const +{ +  std::string Ret; +  for (const SubtargetFeatureKV &KV : AMDGPUFeatureKV) { +    if (Features[KV.Value]) +      Ret += (StringRef("+") + KV.Key + ",").str(); +    else if (TargetFeatures[KV.Value]) +      Ret += (StringRef("-") + KV.Key + ",").str(); +  } +  Ret.pop_back(); // Remove last comma. +  return Ret; +} + +bool AMDGPUPropagateAttributesEarly::runOnFunction(Function &F) { +  if (!TM || !AMDGPU::isEntryFunctionCC(F.getCallingConv())) +    return false; + +  return AMDGPUPropagateAttributes(TM, false).process(F); +} + +bool AMDGPUPropagateAttributesLate::runOnModule(Module &M) { +  if (!TM) +    return false; + +  return AMDGPUPropagateAttributes(TM, true).process(M); +} + +FunctionPass +*llvm::createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *TM) { +  return new AMDGPUPropagateAttributesEarly(TM); +} + +ModulePass +*llvm::createAMDGPUPropagateAttributesLatePass(const TargetMachine *TM) { +  return new AMDGPUPropagateAttributesLate(TM); +} | 

