diff options
9 files changed, 31 insertions, 263 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp index 4540d51532e..896ac9c8777 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp @@ -46,11 +46,8 @@ namespace { class AMDGPUAnnotateKernelFeatures : public CallGraphSCCPass { private: const TargetMachine *TM = nullptr; - SmallVector<CallGraphNode*, 8> NodeList; bool addFeatureAttributes(Function &F); - bool processUniformWorkGroupAttribute(); - bool propagateUniformWorkGroupAttribute(Function &Caller, Function &Callee); public: static char ID; @@ -189,6 +186,7 @@ static bool handleAttr(Function &Parent, const Function &Callee, Parent.addFnAttr(Name); return true; } + return false; } @@ -215,56 +213,6 @@ static void copyFeaturesToFunction(Function &Parent, const Function &Callee, handleAttr(Parent, Callee, AttrName); } -bool AMDGPUAnnotateKernelFeatures::processUniformWorkGroupAttribute() { - bool Changed = false; - - for (auto *Node : reverse(NodeList)) { - Function *Caller = Node->getFunction(); - - for (auto I : *Node) { - Function *Callee = std::get<1>(I)->getFunction(); - if (Callee) - Changed = propagateUniformWorkGroupAttribute(*Caller, *Callee); - } - } - - return Changed; -} - -bool AMDGPUAnnotateKernelFeatures::propagateUniformWorkGroupAttribute( - Function &Caller, Function &Callee) { - - // Check for externally defined function - if (!Callee.hasExactDefinition()) { - Callee.addFnAttr("uniform-work-group-size", "false"); - if (!Caller.hasFnAttribute("uniform-work-group-size")) - Caller.addFnAttr("uniform-work-group-size", "false"); - - return true; - } - // Check if the Caller has the attribute - if (Caller.hasFnAttribute("uniform-work-group-size")) { - // Check if the value of the attribute is true - if (Caller.getFnAttribute("uniform-work-group-size") - .getValueAsString().equals("true")) { - // Propagate the attribute to the Callee, if it does not have it - if (!Callee.hasFnAttribute("uniform-work-group-size")) { - Callee.addFnAttr("uniform-work-group-size", "true"); - return true; - } - } else { - Callee.addFnAttr("uniform-work-group-size", "false"); - return true; - } - } else { - // If the attribute is absent, set it as false - Caller.addFnAttr("uniform-work-group-size", "false"); - Callee.addFnAttr("uniform-work-group-size", "false"); - return true; - } - return false; -} - bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) { const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(F); bool HasFlat = ST.hasFlatAddressSpace(); @@ -345,19 +293,15 @@ bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) { } bool AMDGPUAnnotateKernelFeatures::runOnSCC(CallGraphSCC &SCC) { + Module &M = SCC.getCallGraph().getModule(); + Triple TT(M.getTargetTriple()); + bool Changed = false; - for (CallGraphNode *I : SCC) { - // Build a list of CallGraphNodes from most number of uses to least - if (I->getNumReferences()) - NodeList.push_back(I); - else - processUniformWorkGroupAttribute(); - - Function *F = I->getFunction(); - // Add feature attributes + Function *F = I->getFunction(); if (!F || F->isDeclaration()) continue; + Changed |= addFeatureAttributes(*F); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index ef3540b499a..70d365f4ad7 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -683,9 +683,6 @@ void AMDGPUPassConfig::addIRPasses() { } void AMDGPUPassConfig::addCodeGenPrepare() { - if (TM->getTargetTriple().getArch() == Triple::amdgcn) - addPass(createAMDGPUAnnotateKernelFeaturesPass()); - if (TM->getTargetTriple().getArch() == Triple::amdgcn && EnableLowerKernelArguments) addPass(createAMDGPULowerKernelArgumentsPass()); @@ -773,6 +770,7 @@ bool GCNPassConfig::addPreISel() { // FIXME: We need to run a pass to propagate the attributes when calls are // supported. + addPass(createAMDGPUAnnotateKernelFeaturesPass()); // Merge divergent exit nodes. StructurizeCFG won't recognize the multi-exit // regions formed by them. diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll index ca6739f5d37..e68b1794f28 100644 --- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll +++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll @@ -244,52 +244,52 @@ define amdgpu_kernel void @kern_use_implicitarg_ptr() #1 { ret void } -; HSA: define void @use_implicitarg_ptr() #16 { +; HSA: define void @use_implicitarg_ptr() #15 { define void @use_implicitarg_ptr() #1 { %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() store volatile i8 addrspace(4)* %implicitarg.ptr, i8 addrspace(4)* addrspace(1)* undef ret void } -; HSA: define void @func_indirect_use_implicitarg_ptr() #16 { +; HSA: define void @func_indirect_use_implicitarg_ptr() #15 { define void @func_indirect_use_implicitarg_ptr() #1 { call void @use_implicitarg_ptr() ret void } -; HSA: declare void @external.func() #17 +; HSA: declare void @external.func() #16 declare void @external.func() #3 -; HSA: define internal void @defined.func() #17 { +; HSA: define internal void @defined.func() #16 { define internal void @defined.func() #3 { ret void } -; HSA: define void @func_call_external() #17 { +; HSA: define void @func_call_external() #16 { define void @func_call_external() #3 { call void @external.func() ret void } -; HSA: define void @func_call_defined() #17 { +; HSA: define void @func_call_defined() #16 { define void @func_call_defined() #3 { call void @defined.func() ret void } -; HSA: define void @func_call_asm() #18 { +; HSA: define void @func_call_asm() #16 { define void @func_call_asm() #3 { call void asm sideeffect "", ""() #3 ret void } -; HSA: define amdgpu_kernel void @kern_call_external() #19 { +; HSA: define amdgpu_kernel void @kern_call_external() #17 { define amdgpu_kernel void @kern_call_external() #3 { call void @external.func() ret void } -; HSA: define amdgpu_kernel void @func_kern_defined() #19 { +; HSA: define amdgpu_kernel void @func_kern_defined() #17 { define amdgpu_kernel void @func_kern_defined() #3 { call void @defined.func() ret void @@ -301,22 +301,20 @@ attributes #2 = { nounwind "target-cpu"="gfx900" } attributes #3 = { nounwind } ; HSA: attributes #0 = { nounwind readnone speculatable } -; HSA: attributes #1 = { nounwind "amdgpu-work-item-id-x" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; HSA: attributes #2 = { nounwind "amdgpu-work-item-id-y" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; HSA: attributes #3 = { nounwind "amdgpu-work-item-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; HSA: attributes #4 = { nounwind "amdgpu-work-group-id-x" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; HSA: attributes #5 = { nounwind "amdgpu-work-group-id-y" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; HSA: attributes #6 = { nounwind "amdgpu-work-group-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; HSA: attributes #7 = { nounwind "amdgpu-dispatch-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; HSA: attributes #8 = { nounwind "amdgpu-queue-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; HSA: attributes #9 = { nounwind "amdgpu-dispatch-id" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; HSA: attributes #1 = { nounwind "amdgpu-work-item-id-x" "target-cpu"="fiji" } +; HSA: attributes #2 = { nounwind "amdgpu-work-item-id-y" "target-cpu"="fiji" } +; HSA: attributes #3 = { nounwind "amdgpu-work-item-id-z" "target-cpu"="fiji" } +; HSA: attributes #4 = { nounwind "amdgpu-work-group-id-x" "target-cpu"="fiji" } +; HSA: attributes #5 = { nounwind "amdgpu-work-group-id-y" "target-cpu"="fiji" } +; HSA: attributes #6 = { nounwind "amdgpu-work-group-id-z" "target-cpu"="fiji" } +; HSA: attributes #7 = { nounwind "amdgpu-dispatch-ptr" "target-cpu"="fiji" } +; HSA: attributes #8 = { nounwind "amdgpu-queue-ptr" "target-cpu"="fiji" } +; HSA: attributes #9 = { nounwind "amdgpu-dispatch-id" "target-cpu"="fiji" } ; HSA: attributes #10 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "target-cpu"="fiji" } -; HSA: attributes #11 = { nounwind "target-cpu"="fiji" "uniform-work-group-size"="false" } -; HSA: attributes #12 = { nounwind "target-cpu"="gfx900" "uniform-work-group-size"="false" } -; HSA: attributes #13 = { nounwind "amdgpu-queue-ptr" "target-cpu"="gfx900" "uniform-work-group-size"="false" } -; HSA: attributes #14 = { nounwind "amdgpu-kernarg-segment-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; HSA: attributes #11 = { nounwind "target-cpu"="fiji" } +; HSA: attributes #12 = { nounwind "target-cpu"="gfx900" } +; HSA: attributes #13 = { nounwind "amdgpu-queue-ptr" "target-cpu"="gfx900" } +; HSA: attributes #14 = { nounwind "amdgpu-kernarg-segment-ptr" "target-cpu"="fiji" } ; HSA: attributes #15 = { nounwind "amdgpu-implicitarg-ptr" "target-cpu"="fiji" } -; HSA: attributes #16 = { nounwind "amdgpu-implicitarg-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; HSA: attributes #17 = { nounwind "uniform-work-group-size"="false" } -; HSA: attributes #18 = { nounwind } -; HSA: attributes #19 = { nounwind "amdgpu-flat-scratch" "uniform-work-group-size"="false" } +; HSA: attributes #16 = { nounwind } +; HSA: attributes #17 = { nounwind "amdgpu-flat-scratch" } diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll deleted file mode 100644 index 51002e8aee1..00000000000 --- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll +++ /dev/null @@ -1,18 +0,0 @@ -; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s - -; If the kernel does not have the uniform-work-group-attribute, set both callee and caller as false - -; CHECK: define void @foo() #[[FOO:[0-9]+]] { -define void @foo() #0 { - ret void -} - -; CHECK: define amdgpu_kernel void @kernel1() #[[FOO]] { -define amdgpu_kernel void @kernel1() #1 { - call void @foo() - ret void -} - -attributes #0 = { "uniform-work-group-size"="true" } - -; CHECK: attributes #[[FOO]] = { "uniform-work-group-size"="false" } diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll deleted file mode 100644 index 83761148043..00000000000 --- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll +++ /dev/null @@ -1,24 +0,0 @@ -; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s - -; Test to verify if the attribute gets propagated across nested function calls - -; CHECK: define void @func1() #[[FUNC:[0-9]+]] { -define void @func1() #0 { - ret void -} - -; CHECK: define void @func2() #[[FUNC]] { -define void @func2() #1 { - call void @func1() - ret void -} - -; CHECK: define amdgpu_kernel void @kernel3() #[[FUNC:[0-9]+]] { -define amdgpu_kernel void @kernel3() #2 { - call void @func2() - ret void -} - -attributes #2 = { "uniform-work-group-size"="true" } - -; CHECK: attributes #[[FUNC]] = { "uniform-work-group-size"="true" } diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll deleted file mode 100644 index 4a332f66321..00000000000 --- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll +++ /dev/null @@ -1,25 +0,0 @@ -; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s - -; Two kernels with different values of the uniform-work-group-attribute call the same function - -; CHECK: define void @func() #[[FUNC:[0-9]+]] { -define void @func() #0 { - ret void -} - -; CHECK: define amdgpu_kernel void @kernel1() #[[KERNEL1:[0-9]+]] { -define amdgpu_kernel void @kernel1() #1 { - call void @func() - ret void -} - -; CHECK: define amdgpu_kernel void @kernel2() #[[FUNC]] { -define amdgpu_kernel void @kernel2() #2 { - call void @func() - ret void -} - -attributes #1 = { "uniform-work-group-size"="true" } - -; CHECK: attributes #[[FUNC]] = { "uniform-work-group-size"="false" } -; CHECK: attributes #[[KERNEL1]] = { "uniform-work-group-size"="true" } diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll deleted file mode 100644 index 15131a4e31e..00000000000 --- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll +++ /dev/null @@ -1,33 +0,0 @@ -; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s - -; Propagate the uniform-work-group-attribute from the kernel to callee if it doesn't have it -; CHECK: define void @func() #[[FUNC:[0-9]+]] { -define void @func() #0 { - ret void -} - -; CHECK: define amdgpu_kernel void @kernel1() #[[KERNEL1:[0-9]+]] { -define amdgpu_kernel void @kernel1() #1 { - call void @func() - ret void -} - -; External declaration of a function -; CHECK: define weak_odr void @weak_func() #[[FUNC]] { -define weak_odr void @weak_func() #0 { - ret void -} - -; CHECK: define amdgpu_kernel void @kernel2() #[[KERNEL2:[0-9]+]] { -define amdgpu_kernel void @kernel2() #2 { - call void @weak_func() - ret void -} - -attributes #0 = { nounwind } -attributes #1 = { "uniform-work-group-size"="false" } -attributes #2 = { "uniform-work-group-size"="true" } - -; CHECK: attributes #[[FUNC]] = { nounwind "uniform-work-group-size"="false" } -; CHECK: attributes #[[KERNEL1]] = { "uniform-work-group-size"="false" } -; CHECK: attributes #[[KERNEL2]] = { "uniform-work-group-size"="true" } diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll deleted file mode 100644 index 9d07a887aa1..00000000000 --- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll +++ /dev/null @@ -1,37 +0,0 @@ -; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s - -; Test to ensure recursive functions exhibit proper behaviour -; Test to generate fibonacci numbers - -; CHECK: define i32 @fib(i32 %n) #[[FIB:[0-9]+]] { -define i32 @fib(i32 %n) #0 { - %cmp1 = icmp eq i32 %n, 0 - br i1 %cmp1, label %exit, label %cont1 - -cont1: - %cmp2 = icmp eq i32 %n, 1 - br i1 %cmp2, label %exit, label %cont2 - -cont2: - %nm1 = sub i32 %n, 1 - %fibm1 = call i32 @fib(i32 %nm1) - %nm2 = sub i32 %n, 2 - %fibm2 = call i32 @fib(i32 %nm2) - %retval = add i32 %fibm1, %fibm2 - - ret i32 %retval - -exit: - ret i32 1 -} - -; CHECK: define amdgpu_kernel void @kernel(i32 addrspace(1)* %m) #[[FIB]] { -define amdgpu_kernel void @kernel(i32 addrspace(1)* %m) #1 { - %r = call i32 @fib(i32 5) - store i32 %r, i32 addrspace(1)* %m - ret void -} - -attributes #1 = { "uniform-work-group-size"="true" } - -; CHECK: attributes #[[FIB]] = { "uniform-work-group-size"="true" } diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll deleted file mode 100644 index 28ed8141f0d..00000000000 --- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll +++ /dev/null @@ -1,35 +0,0 @@ -; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s - -; CHECK: define void @func1() #[[FUNC:[0-9]+]] { -define void @func1() #0 { - ret void -} - -; CHECK: define void @func4() #[[FUNC]] { -define void @func4() #1 { - ret void -} - -; CHECK: define void @func2() #[[FUNC]] { -define void @func2() #1 { - call void @func4() - call void @func1() - ret void -} - -; CHECK: define void @func3() #[[FUNC]] { -define void @func3() #1 { - call void @func1() - ret void -} - -; CHECK: define amdgpu_kernel void @kernel3() #[[FUNC]] { -define amdgpu_kernel void @kernel3() #2 { - call void @func2() - call void @func3() - ret void -} - -attributes #2 = { "uniform-work-group-size"="true" } - -; CHECK: attributes #[[FUNC]] = { "uniform-work-group-size"="true" } |