summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYaxun Liu <Yaxun.Liu@amd.com>2019-09-03 18:50:24 +0000
committerYaxun Liu <Yaxun.Liu@amd.com>2019-09-03 18:50:24 +0000
commit1bea97c971d60f261f1bdfaa7b6d9cb30a6962fd (patch)
tree6ebff5f97e28459a528341b0ec311cec51a1bc47
parent0581a44e02e4d7170d3205d6109e84b101cbb71a (diff)
downloadbcm5719-llvm-1bea97c971d60f261f1bdfaa7b6d9cb30a6962fd.tar.gz
bcm5719-llvm-1bea97c971d60f261f1bdfaa7b6d9cb30a6962fd.zip
[AMDGPU] Set default flat work group size to (1,256) for HIP
Differential Revision: https://reviews.llvm.org/D67048 llvm-svn: 370808
-rw-r--r--clang/lib/CodeGen/TargetInfo.cpp7
-rw-r--r--clang/test/CodeGenCUDA/kernel-amdgcn.cu5
2 files changed, 7 insertions, 5 deletions
diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp
index 231a20c68f5..760895a493f 100644
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -7915,8 +7915,9 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes(
const bool IsOpenCLKernel = M.getLangOpts().OpenCL &&
FD->hasAttr<OpenCLKernelAttr>();
- if ((IsOpenCLKernel ||
- (M.getLangOpts().HIP && FD->hasAttr<CUDAGlobalAttr>())) &&
+ const bool IsHIPKernel = M.getLangOpts().HIP &&
+ FD->hasAttr<CUDAGlobalAttr>();
+ if ((IsOpenCLKernel || IsHIPKernel) &&
(M.getTriple().getOS() == llvm::Triple::AMDHSA))
F->addFnAttr("amdgpu-implicitarg-num-bytes", "56");
@@ -7942,7 +7943,7 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes(
F->addFnAttr("amdgpu-flat-work-group-size", AttrVal);
} else
assert(Max == 0 && "Max must be zero");
- } else if (IsOpenCLKernel) {
+ } else if (IsOpenCLKernel || IsHIPKernel) {
// By default, restrict the maximum size to 256.
F->addFnAttr("amdgpu-flat-work-group-size", "1,256");
}
diff --git a/clang/test/CodeGenCUDA/kernel-amdgcn.cu b/clang/test/CodeGenCUDA/kernel-amdgcn.cu
index ffa6c9549f0..135d3030480 100644
--- a/clang/test/CodeGenCUDA/kernel-amdgcn.cu
+++ b/clang/test/CodeGenCUDA/kernel-amdgcn.cu
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device -emit-llvm -x hip %s -o - | FileCheck %s
#include "Inputs/cuda.h"
// CHECK: define amdgpu_kernel void @_ZN1A6kernelEv
@@ -25,7 +25,7 @@ struct Dummy {
EmptyKernelPtr Empty() { return EmptyKernel<void>; }
};
-// CHECK: define amdgpu_kernel void @_Z15template_kernelI1AEvT_
+// CHECK: define amdgpu_kernel void @_Z15template_kernelI1AEvT_{{.*}} #[[ATTR:[0-9][0-9]*]]
template<class T>
__global__ void template_kernel(T x) {}
@@ -39,3 +39,4 @@ int main() {
launch((void*)D.Empty());
return 0;
}
+// CHECK: attributes #[[ATTR]] = {{.*}}"amdgpu-flat-work-group-size"="1,256"
OpenPOWER on IntegriCloud