summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp68
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp4
2 files changed, 7 insertions, 65 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
index 4540d51532e..896ac9c8777 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
@@ -46,11 +46,8 @@ namespace {
class AMDGPUAnnotateKernelFeatures : public CallGraphSCCPass {
private:
const TargetMachine *TM = nullptr;
- SmallVector<CallGraphNode*, 8> NodeList;
bool addFeatureAttributes(Function &F);
- bool processUniformWorkGroupAttribute();
- bool propagateUniformWorkGroupAttribute(Function &Caller, Function &Callee);
public:
static char ID;
@@ -189,6 +186,7 @@ static bool handleAttr(Function &Parent, const Function &Callee,
Parent.addFnAttr(Name);
return true;
}
+
return false;
}
@@ -215,56 +213,6 @@ static void copyFeaturesToFunction(Function &Parent, const Function &Callee,
handleAttr(Parent, Callee, AttrName);
}
-bool AMDGPUAnnotateKernelFeatures::processUniformWorkGroupAttribute() {
- bool Changed = false;
-
- for (auto *Node : reverse(NodeList)) {
- Function *Caller = Node->getFunction();
-
- for (auto I : *Node) {
- Function *Callee = std::get<1>(I)->getFunction();
- if (Callee)
- Changed = propagateUniformWorkGroupAttribute(*Caller, *Callee);
- }
- }
-
- return Changed;
-}
-
-bool AMDGPUAnnotateKernelFeatures::propagateUniformWorkGroupAttribute(
- Function &Caller, Function &Callee) {
-
- // Check for externally defined function
- if (!Callee.hasExactDefinition()) {
- Callee.addFnAttr("uniform-work-group-size", "false");
- if (!Caller.hasFnAttribute("uniform-work-group-size"))
- Caller.addFnAttr("uniform-work-group-size", "false");
-
- return true;
- }
- // Check if the Caller has the attribute
- if (Caller.hasFnAttribute("uniform-work-group-size")) {
- // Check if the value of the attribute is true
- if (Caller.getFnAttribute("uniform-work-group-size")
- .getValueAsString().equals("true")) {
- // Propagate the attribute to the Callee, if it does not have it
- if (!Callee.hasFnAttribute("uniform-work-group-size")) {
- Callee.addFnAttr("uniform-work-group-size", "true");
- return true;
- }
- } else {
- Callee.addFnAttr("uniform-work-group-size", "false");
- return true;
- }
- } else {
- // If the attribute is absent, set it as false
- Caller.addFnAttr("uniform-work-group-size", "false");
- Callee.addFnAttr("uniform-work-group-size", "false");
- return true;
- }
- return false;
-}
-
bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(F);
bool HasFlat = ST.hasFlatAddressSpace();
@@ -345,19 +293,15 @@ bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
}
bool AMDGPUAnnotateKernelFeatures::runOnSCC(CallGraphSCC &SCC) {
+ Module &M = SCC.getCallGraph().getModule();
+ Triple TT(M.getTargetTriple());
+
bool Changed = false;
-
for (CallGraphNode *I : SCC) {
- // Build a list of CallGraphNodes from most number of uses to least
- if (I->getNumReferences())
- NodeList.push_back(I);
- else
- processUniformWorkGroupAttribute();
-
- Function *F = I->getFunction();
- // Add feature attributes
+ Function *F = I->getFunction();
if (!F || F->isDeclaration())
continue;
+
Changed |= addFeatureAttributes(*F);
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index ef3540b499a..70d365f4ad7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -683,9 +683,6 @@ void AMDGPUPassConfig::addIRPasses() {
}
void AMDGPUPassConfig::addCodeGenPrepare() {
- if (TM->getTargetTriple().getArch() == Triple::amdgcn)
- addPass(createAMDGPUAnnotateKernelFeaturesPass());
-
if (TM->getTargetTriple().getArch() == Triple::amdgcn &&
EnableLowerKernelArguments)
addPass(createAMDGPULowerKernelArgumentsPass());
@@ -773,6 +770,7 @@ bool GCNPassConfig::addPreISel() {
// FIXME: We need to run a pass to propagate the attributes when calls are
// supported.
+ addPass(createAMDGPUAnnotateKernelFeaturesPass());
// Merge divergent exit nodes. StructurizeCFG won't recognize the multi-exit
// regions formed by them.
OpenPOWER on IntegriCloud