summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAakanksha Patil <aakanksha555@gmail.com>2018-12-13 21:23:12 +0000
committerAakanksha Patil <aakanksha555@gmail.com>2018-12-13 21:23:12 +0000
commitbc568766b213b718b45ca113011419cc07bed8ec (patch)
tree3328fb1755da87d091b70cec49c3b00278c36e81
parent55fa567bb70b3d2c7f4771df3e30fa9dc930571d (diff)
downloadbcm5719-llvm-bc568766b213b718b45ca113011419cc07bed8ec.tar.gz
bcm5719-llvm-bc568766b213b718b45ca113011419cc07bed8ec.zip
Revert r348971: [AMDGPU] Support for "uniform-work-group-size" attribute
This patch breaks RADV (and probably RadeonSI as well) llvm-svn: 349084
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp68
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp4
-rw-r--r--llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll50
-rw-r--r--llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll18
-rw-r--r--llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll24
-rw-r--r--llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll25
-rw-r--r--llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll33
-rw-r--r--llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll37
-rw-r--r--llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll35
9 files changed, 31 insertions, 263 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
index 4540d51532e..896ac9c8777 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
@@ -46,11 +46,8 @@ namespace {
class AMDGPUAnnotateKernelFeatures : public CallGraphSCCPass {
private:
const TargetMachine *TM = nullptr;
- SmallVector<CallGraphNode*, 8> NodeList;
bool addFeatureAttributes(Function &F);
- bool processUniformWorkGroupAttribute();
- bool propagateUniformWorkGroupAttribute(Function &Caller, Function &Callee);
public:
static char ID;
@@ -189,6 +186,7 @@ static bool handleAttr(Function &Parent, const Function &Callee,
Parent.addFnAttr(Name);
return true;
}
+
return false;
}
@@ -215,56 +213,6 @@ static void copyFeaturesToFunction(Function &Parent, const Function &Callee,
handleAttr(Parent, Callee, AttrName);
}
-bool AMDGPUAnnotateKernelFeatures::processUniformWorkGroupAttribute() {
- bool Changed = false;
-
- for (auto *Node : reverse(NodeList)) {
- Function *Caller = Node->getFunction();
-
- for (auto I : *Node) {
- Function *Callee = std::get<1>(I)->getFunction();
- if (Callee)
- Changed = propagateUniformWorkGroupAttribute(*Caller, *Callee);
- }
- }
-
- return Changed;
-}
-
-bool AMDGPUAnnotateKernelFeatures::propagateUniformWorkGroupAttribute(
- Function &Caller, Function &Callee) {
-
- // Check for externally defined function
- if (!Callee.hasExactDefinition()) {
- Callee.addFnAttr("uniform-work-group-size", "false");
- if (!Caller.hasFnAttribute("uniform-work-group-size"))
- Caller.addFnAttr("uniform-work-group-size", "false");
-
- return true;
- }
- // Check if the Caller has the attribute
- if (Caller.hasFnAttribute("uniform-work-group-size")) {
- // Check if the value of the attribute is true
- if (Caller.getFnAttribute("uniform-work-group-size")
- .getValueAsString().equals("true")) {
- // Propagate the attribute to the Callee, if it does not have it
- if (!Callee.hasFnAttribute("uniform-work-group-size")) {
- Callee.addFnAttr("uniform-work-group-size", "true");
- return true;
- }
- } else {
- Callee.addFnAttr("uniform-work-group-size", "false");
- return true;
- }
- } else {
- // If the attribute is absent, set it as false
- Caller.addFnAttr("uniform-work-group-size", "false");
- Callee.addFnAttr("uniform-work-group-size", "false");
- return true;
- }
- return false;
-}
-
bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(F);
bool HasFlat = ST.hasFlatAddressSpace();
@@ -345,19 +293,15 @@ bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
}
bool AMDGPUAnnotateKernelFeatures::runOnSCC(CallGraphSCC &SCC) {
+ Module &M = SCC.getCallGraph().getModule();
+ Triple TT(M.getTargetTriple());
+
bool Changed = false;
-
for (CallGraphNode *I : SCC) {
- // Build a list of CallGraphNodes from most number of uses to least
- if (I->getNumReferences())
- NodeList.push_back(I);
- else
- processUniformWorkGroupAttribute();
-
- Function *F = I->getFunction();
- // Add feature attributes
+ Function *F = I->getFunction();
if (!F || F->isDeclaration())
continue;
+
Changed |= addFeatureAttributes(*F);
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index ef3540b499a..70d365f4ad7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -683,9 +683,6 @@ void AMDGPUPassConfig::addIRPasses() {
}
void AMDGPUPassConfig::addCodeGenPrepare() {
- if (TM->getTargetTriple().getArch() == Triple::amdgcn)
- addPass(createAMDGPUAnnotateKernelFeaturesPass());
-
if (TM->getTargetTriple().getArch() == Triple::amdgcn &&
EnableLowerKernelArguments)
addPass(createAMDGPULowerKernelArgumentsPass());
@@ -773,6 +770,7 @@ bool GCNPassConfig::addPreISel() {
// FIXME: We need to run a pass to propagate the attributes when calls are
// supported.
+ addPass(createAMDGPUAnnotateKernelFeaturesPass());
// Merge divergent exit nodes. StructurizeCFG won't recognize the multi-exit
// regions formed by them.
diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
index ca6739f5d37..e68b1794f28 100644
--- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
@@ -244,52 +244,52 @@ define amdgpu_kernel void @kern_use_implicitarg_ptr() #1 {
ret void
}
-; HSA: define void @use_implicitarg_ptr() #16 {
+; HSA: define void @use_implicitarg_ptr() #15 {
define void @use_implicitarg_ptr() #1 {
%implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
store volatile i8 addrspace(4)* %implicitarg.ptr, i8 addrspace(4)* addrspace(1)* undef
ret void
}
-; HSA: define void @func_indirect_use_implicitarg_ptr() #16 {
+; HSA: define void @func_indirect_use_implicitarg_ptr() #15 {
define void @func_indirect_use_implicitarg_ptr() #1 {
call void @use_implicitarg_ptr()
ret void
}
-; HSA: declare void @external.func() #17
+; HSA: declare void @external.func() #16
declare void @external.func() #3
-; HSA: define internal void @defined.func() #17 {
+; HSA: define internal void @defined.func() #16 {
define internal void @defined.func() #3 {
ret void
}
-; HSA: define void @func_call_external() #17 {
+; HSA: define void @func_call_external() #16 {
define void @func_call_external() #3 {
call void @external.func()
ret void
}
-; HSA: define void @func_call_defined() #17 {
+; HSA: define void @func_call_defined() #16 {
define void @func_call_defined() #3 {
call void @defined.func()
ret void
}
-; HSA: define void @func_call_asm() #18 {
+; HSA: define void @func_call_asm() #16 {
define void @func_call_asm() #3 {
call void asm sideeffect "", ""() #3
ret void
}
-; HSA: define amdgpu_kernel void @kern_call_external() #19 {
+; HSA: define amdgpu_kernel void @kern_call_external() #17 {
define amdgpu_kernel void @kern_call_external() #3 {
call void @external.func()
ret void
}
-; HSA: define amdgpu_kernel void @func_kern_defined() #19 {
+; HSA: define amdgpu_kernel void @func_kern_defined() #17 {
define amdgpu_kernel void @func_kern_defined() #3 {
call void @defined.func()
ret void
@@ -301,22 +301,20 @@ attributes #2 = { nounwind "target-cpu"="gfx900" }
attributes #3 = { nounwind }
; HSA: attributes #0 = { nounwind readnone speculatable }
-; HSA: attributes #1 = { nounwind "amdgpu-work-item-id-x" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; HSA: attributes #2 = { nounwind "amdgpu-work-item-id-y" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; HSA: attributes #3 = { nounwind "amdgpu-work-item-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; HSA: attributes #4 = { nounwind "amdgpu-work-group-id-x" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; HSA: attributes #5 = { nounwind "amdgpu-work-group-id-y" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; HSA: attributes #6 = { nounwind "amdgpu-work-group-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; HSA: attributes #7 = { nounwind "amdgpu-dispatch-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; HSA: attributes #8 = { nounwind "amdgpu-queue-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; HSA: attributes #9 = { nounwind "amdgpu-dispatch-id" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; HSA: attributes #1 = { nounwind "amdgpu-work-item-id-x" "target-cpu"="fiji" }
+; HSA: attributes #2 = { nounwind "amdgpu-work-item-id-y" "target-cpu"="fiji" }
+; HSA: attributes #3 = { nounwind "amdgpu-work-item-id-z" "target-cpu"="fiji" }
+; HSA: attributes #4 = { nounwind "amdgpu-work-group-id-x" "target-cpu"="fiji" }
+; HSA: attributes #5 = { nounwind "amdgpu-work-group-id-y" "target-cpu"="fiji" }
+; HSA: attributes #6 = { nounwind "amdgpu-work-group-id-z" "target-cpu"="fiji" }
+; HSA: attributes #7 = { nounwind "amdgpu-dispatch-ptr" "target-cpu"="fiji" }
+; HSA: attributes #8 = { nounwind "amdgpu-queue-ptr" "target-cpu"="fiji" }
+; HSA: attributes #9 = { nounwind "amdgpu-dispatch-id" "target-cpu"="fiji" }
; HSA: attributes #10 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "target-cpu"="fiji" }
-; HSA: attributes #11 = { nounwind "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; HSA: attributes #12 = { nounwind "target-cpu"="gfx900" "uniform-work-group-size"="false" }
-; HSA: attributes #13 = { nounwind "amdgpu-queue-ptr" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
-; HSA: attributes #14 = { nounwind "amdgpu-kernarg-segment-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; HSA: attributes #11 = { nounwind "target-cpu"="fiji" }
+; HSA: attributes #12 = { nounwind "target-cpu"="gfx900" }
+; HSA: attributes #13 = { nounwind "amdgpu-queue-ptr" "target-cpu"="gfx900" }
+; HSA: attributes #14 = { nounwind "amdgpu-kernarg-segment-ptr" "target-cpu"="fiji" }
; HSA: attributes #15 = { nounwind "amdgpu-implicitarg-ptr" "target-cpu"="fiji" }
-; HSA: attributes #16 = { nounwind "amdgpu-implicitarg-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; HSA: attributes #17 = { nounwind "uniform-work-group-size"="false" }
-; HSA: attributes #18 = { nounwind }
-; HSA: attributes #19 = { nounwind "amdgpu-flat-scratch" "uniform-work-group-size"="false" }
+; HSA: attributes #16 = { nounwind }
+; HSA: attributes #17 = { nounwind "amdgpu-flat-scratch" }
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll
deleted file mode 100644
index 51002e8aee1..00000000000
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s
-
-; If the kernel does not have the uniform-work-group-attribute, set both callee and caller as false
-
-; CHECK: define void @foo() #[[FOO:[0-9]+]] {
-define void @foo() #0 {
- ret void
-}
-
-; CHECK: define amdgpu_kernel void @kernel1() #[[FOO]] {
-define amdgpu_kernel void @kernel1() #1 {
- call void @foo()
- ret void
-}
-
-attributes #0 = { "uniform-work-group-size"="true" }
-
-; CHECK: attributes #[[FOO]] = { "uniform-work-group-size"="false" }
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll
deleted file mode 100644
index 83761148043..00000000000
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll
+++ /dev/null
@@ -1,24 +0,0 @@
-; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s
-
-; Test to verify if the attribute gets propagated across nested function calls
-
-; CHECK: define void @func1() #[[FUNC:[0-9]+]] {
-define void @func1() #0 {
- ret void
-}
-
-; CHECK: define void @func2() #[[FUNC]] {
-define void @func2() #1 {
- call void @func1()
- ret void
-}
-
-; CHECK: define amdgpu_kernel void @kernel3() #[[FUNC:[0-9]+]] {
-define amdgpu_kernel void @kernel3() #2 {
- call void @func2()
- ret void
-}
-
-attributes #2 = { "uniform-work-group-size"="true" }
-
-; CHECK: attributes #[[FUNC]] = { "uniform-work-group-size"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll
deleted file mode 100644
index 4a332f66321..00000000000
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s
-
-; Two kernels with different values of the uniform-work-group-attribute call the same function
-
-; CHECK: define void @func() #[[FUNC:[0-9]+]] {
-define void @func() #0 {
- ret void
-}
-
-; CHECK: define amdgpu_kernel void @kernel1() #[[KERNEL1:[0-9]+]] {
-define amdgpu_kernel void @kernel1() #1 {
- call void @func()
- ret void
-}
-
-; CHECK: define amdgpu_kernel void @kernel2() #[[FUNC]] {
-define amdgpu_kernel void @kernel2() #2 {
- call void @func()
- ret void
-}
-
-attributes #1 = { "uniform-work-group-size"="true" }
-
-; CHECK: attributes #[[FUNC]] = { "uniform-work-group-size"="false" }
-; CHECK: attributes #[[KERNEL1]] = { "uniform-work-group-size"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll
deleted file mode 100644
index 15131a4e31e..00000000000
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll
+++ /dev/null
@@ -1,33 +0,0 @@
-; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s
-
-; Propagate the uniform-work-group-attribute from the kernel to callee if it doesn't have it
-; CHECK: define void @func() #[[FUNC:[0-9]+]] {
-define void @func() #0 {
- ret void
-}
-
-; CHECK: define amdgpu_kernel void @kernel1() #[[KERNEL1:[0-9]+]] {
-define amdgpu_kernel void @kernel1() #1 {
- call void @func()
- ret void
-}
-
-; External declaration of a function
-; CHECK: define weak_odr void @weak_func() #[[FUNC]] {
-define weak_odr void @weak_func() #0 {
- ret void
-}
-
-; CHECK: define amdgpu_kernel void @kernel2() #[[KERNEL2:[0-9]+]] {
-define amdgpu_kernel void @kernel2() #2 {
- call void @weak_func()
- ret void
-}
-
-attributes #0 = { nounwind }
-attributes #1 = { "uniform-work-group-size"="false" }
-attributes #2 = { "uniform-work-group-size"="true" }
-
-; CHECK: attributes #[[FUNC]] = { nounwind "uniform-work-group-size"="false" }
-; CHECK: attributes #[[KERNEL1]] = { "uniform-work-group-size"="false" }
-; CHECK: attributes #[[KERNEL2]] = { "uniform-work-group-size"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll
deleted file mode 100644
index 9d07a887aa1..00000000000
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll
+++ /dev/null
@@ -1,37 +0,0 @@
-; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s
-
-; Test to ensure recursive functions exhibit proper behaviour
-; Test to generate fibonacci numbers
-
-; CHECK: define i32 @fib(i32 %n) #[[FIB:[0-9]+]] {
-define i32 @fib(i32 %n) #0 {
- %cmp1 = icmp eq i32 %n, 0
- br i1 %cmp1, label %exit, label %cont1
-
-cont1:
- %cmp2 = icmp eq i32 %n, 1
- br i1 %cmp2, label %exit, label %cont2
-
-cont2:
- %nm1 = sub i32 %n, 1
- %fibm1 = call i32 @fib(i32 %nm1)
- %nm2 = sub i32 %n, 2
- %fibm2 = call i32 @fib(i32 %nm2)
- %retval = add i32 %fibm1, %fibm2
-
- ret i32 %retval
-
-exit:
- ret i32 1
-}
-
-; CHECK: define amdgpu_kernel void @kernel(i32 addrspace(1)* %m) #[[FIB]] {
-define amdgpu_kernel void @kernel(i32 addrspace(1)* %m) #1 {
- %r = call i32 @fib(i32 5)
- store i32 %r, i32 addrspace(1)* %m
- ret void
-}
-
-attributes #1 = { "uniform-work-group-size"="true" }
-
-; CHECK: attributes #[[FIB]] = { "uniform-work-group-size"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll
deleted file mode 100644
index 28ed8141f0d..00000000000
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll
+++ /dev/null
@@ -1,35 +0,0 @@
-; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s
-
-; CHECK: define void @func1() #[[FUNC:[0-9]+]] {
-define void @func1() #0 {
- ret void
-}
-
-; CHECK: define void @func4() #[[FUNC]] {
-define void @func4() #1 {
- ret void
-}
-
-; CHECK: define void @func2() #[[FUNC]] {
-define void @func2() #1 {
- call void @func4()
- call void @func1()
- ret void
-}
-
-; CHECK: define void @func3() #[[FUNC]] {
-define void @func3() #1 {
- call void @func1()
- ret void
-}
-
-; CHECK: define amdgpu_kernel void @kernel3() #[[FUNC]] {
-define amdgpu_kernel void @kernel3() #2 {
- call void @func2()
- call void @func3()
- ret void
-}
-
-attributes #2 = { "uniform-work-group-size"="true" }
-
-; CHECK: attributes #[[FUNC]] = { "uniform-work-group-size"="true" }
OpenPOWER on IntegriCloud