diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp | 68 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 4 |
2 files changed, 7 insertions, 65 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp index 4540d51532e..896ac9c8777 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp @@ -46,11 +46,8 @@ namespace { class AMDGPUAnnotateKernelFeatures : public CallGraphSCCPass { private: const TargetMachine *TM = nullptr; - SmallVector<CallGraphNode*, 8> NodeList; bool addFeatureAttributes(Function &F); - bool processUniformWorkGroupAttribute(); - bool propagateUniformWorkGroupAttribute(Function &Caller, Function &Callee); public: static char ID; @@ -189,6 +186,7 @@ static bool handleAttr(Function &Parent, const Function &Callee, Parent.addFnAttr(Name); return true; } + return false; } @@ -215,56 +213,6 @@ static void copyFeaturesToFunction(Function &Parent, const Function &Callee, handleAttr(Parent, Callee, AttrName); } -bool AMDGPUAnnotateKernelFeatures::processUniformWorkGroupAttribute() { - bool Changed = false; - - for (auto *Node : reverse(NodeList)) { - Function *Caller = Node->getFunction(); - - for (auto I : *Node) { - Function *Callee = std::get<1>(I)->getFunction(); - if (Callee) - Changed = propagateUniformWorkGroupAttribute(*Caller, *Callee); - } - } - - return Changed; -} - -bool AMDGPUAnnotateKernelFeatures::propagateUniformWorkGroupAttribute( - Function &Caller, Function &Callee) { - - // Check for externally defined function - if (!Callee.hasExactDefinition()) { - Callee.addFnAttr("uniform-work-group-size", "false"); - if (!Caller.hasFnAttribute("uniform-work-group-size")) - Caller.addFnAttr("uniform-work-group-size", "false"); - - return true; - } - // Check if the Caller has the attribute - if (Caller.hasFnAttribute("uniform-work-group-size")) { - // Check if the value of the attribute is true - if (Caller.getFnAttribute("uniform-work-group-size") - .getValueAsString().equals("true")) { - // Propagate the attribute to the Callee, if it does not have it - if (!Callee.hasFnAttribute("uniform-work-group-size")) { - Callee.addFnAttr("uniform-work-group-size", "true"); - return true; - } - } else { - Callee.addFnAttr("uniform-work-group-size", "false"); - return true; - } - } else { - // If the attribute is absent, set it as false - Caller.addFnAttr("uniform-work-group-size", "false"); - Callee.addFnAttr("uniform-work-group-size", "false"); - return true; - } - return false; -} - bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) { const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(F); bool HasFlat = ST.hasFlatAddressSpace(); @@ -345,19 +293,15 @@ bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) { } bool AMDGPUAnnotateKernelFeatures::runOnSCC(CallGraphSCC &SCC) { + Module &M = SCC.getCallGraph().getModule(); + Triple TT(M.getTargetTriple()); + bool Changed = false; - for (CallGraphNode *I : SCC) { - // Build a list of CallGraphNodes from most number of uses to least - if (I->getNumReferences()) - NodeList.push_back(I); - else - processUniformWorkGroupAttribute(); - - Function *F = I->getFunction(); - // Add feature attributes + Function *F = I->getFunction(); if (!F || F->isDeclaration()) continue; + Changed |= addFeatureAttributes(*F); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index ef3540b499a..70d365f4ad7 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -683,9 +683,6 @@ void AMDGPUPassConfig::addIRPasses() { } void AMDGPUPassConfig::addCodeGenPrepare() { - if (TM->getTargetTriple().getArch() == Triple::amdgcn) - addPass(createAMDGPUAnnotateKernelFeaturesPass()); - if (TM->getTargetTriple().getArch() == Triple::amdgcn && EnableLowerKernelArguments) addPass(createAMDGPULowerKernelArgumentsPass()); @@ -773,6 +770,7 @@ bool GCNPassConfig::addPreISel() { // FIXME: We need to run a pass to propagate the attributes when calls are // supported. + addPass(createAMDGPUAnnotateKernelFeaturesPass()); // Merge divergent exit nodes. StructurizeCFG won't recognize the multi-exit // regions formed by them. |