2 files changed, 7 insertions, 65 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
index 4540d51532e..896ac9c8777 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
@@ -46,11 +46,8 @@ namespace {
 class AMDGPUAnnotateKernelFeatures : public CallGraphSCCPass {
 private:
   const TargetMachine *TM = nullptr;
-  SmallVector<CallGraphNode*, 8> NodeList;
 
   bool addFeatureAttributes(Function &F);
-  bool processUniformWorkGroupAttribute();
-  bool propagateUniformWorkGroupAttribute(Function &Caller, Function &Callee);
 
 public:
   static char ID;
@@ -189,6 +186,7 @@ static bool handleAttr(Function &Parent, const Function &Callee,
     Parent.addFnAttr(Name);
     return true;
   }
+
   return false;
 }
 
@@ -215,56 +213,6 @@ static void copyFeaturesToFunction(Function &Parent, const Function &Callee,
     handleAttr(Parent, Callee, AttrName);
 }
 
-bool AMDGPUAnnotateKernelFeatures::processUniformWorkGroupAttribute() {
-  bool Changed = false;
-
-  for (auto *Node : reverse(NodeList)) {
-    Function *Caller = Node->getFunction();
-
-    for (auto I : *Node) {
-      Function *Callee = std::get<1>(I)->getFunction();
-      if (Callee)
-        Changed = propagateUniformWorkGroupAttribute(*Caller, *Callee);
-    }
-  }
-
-  return Changed;
-}
-
-bool AMDGPUAnnotateKernelFeatures::propagateUniformWorkGroupAttribute(
-       Function &Caller, Function &Callee) {
-
-  // Check for externally defined function
-  if (!Callee.hasExactDefinition()) {
-    Callee.addFnAttr("uniform-work-group-size", "false");
-    if (!Caller.hasFnAttribute("uniform-work-group-size")) 
-      Caller.addFnAttr("uniform-work-group-size", "false");
-     
-    return true;
-  }
-  // Check if the Caller has the attribute
-  if (Caller.hasFnAttribute("uniform-work-group-size")) {
-    // Check if the value of the attribute is true
-    if (Caller.getFnAttribute("uniform-work-group-size")
-        .getValueAsString().equals("true")) {
-      // Propagate the attribute to the Callee, if it does not have it
-      if (!Callee.hasFnAttribute("uniform-work-group-size")) {
-        Callee.addFnAttr("uniform-work-group-size", "true");
-        return true;
-      }
-    } else {
-      Callee.addFnAttr("uniform-work-group-size", "false");
-      return true;
-    }
-  } else {
-    // If the attribute is absent, set it as false
-    Caller.addFnAttr("uniform-work-group-size", "false");
-    Callee.addFnAttr("uniform-work-group-size", "false");
-    return true;
-  }
-  return false;
-}
-
 bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
   const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(F);
   bool HasFlat = ST.hasFlatAddressSpace();
@@ -345,19 +293,15 @@ bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
 }
 
 bool AMDGPUAnnotateKernelFeatures::runOnSCC(CallGraphSCC &SCC) {
+  Module &M = SCC.getCallGraph().getModule();
+  Triple TT(M.getTargetTriple());
+
   bool Changed = false;
- 
   for (CallGraphNode *I : SCC) {
-    // Build a list of CallGraphNodes from most number of uses to least
-    if (I->getNumReferences())
-      NodeList.push_back(I);
-    else
-      processUniformWorkGroupAttribute();
-
-    Function *F = I->getFunction();    
-    // Add feature attributes
+    Function *F = I->getFunction();
     if (!F || F->isDeclaration())
       continue;
+
     Changed |= addFeatureAttributes(*F);
   }
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index ef3540b499a..70d365f4ad7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -683,9 +683,6 @@ void AMDGPUPassConfig::addIRPasses() {
 }
 
 void AMDGPUPassConfig::addCodeGenPrepare() {
-  if (TM->getTargetTriple().getArch() == Triple::amdgcn)
-    addPass(createAMDGPUAnnotateKernelFeaturesPass());
-
   if (TM->getTargetTriple().getArch() == Triple::amdgcn &&
       EnableLowerKernelArguments)
     addPass(createAMDGPULowerKernelArgumentsPass());
@@ -773,6 +770,7 @@ bool GCNPassConfig::addPreISel() {
 
   // FIXME: We need to run a pass to propagate the attributes when calls are
   // supported.
+  addPass(createAMDGPUAnnotateKernelFeaturesPass());
 
   // Merge divergent exit nodes. StructurizeCFG won't recognize the multi-exit
   // regions formed by them.