diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-08-27 19:25:40 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-08-27 19:25:40 +0000 |
commit | eac783a900d7ed5643ea30038df36684a6e46654 (patch) | |
tree | 37029034093520d00551ca030a53a1cc286b666a /clang/lib/CodeGen | |
parent | 9fac4a5d3522682e3799f407ff783cad1cde0e7e (diff) | |
download | bcm5719-llvm-eac783a900d7ed5643ea30038df36684a6e46654.tar.gz bcm5719-llvm-eac783a900d7ed5643ea30038df36684a6e46654.zip |
AMDGPU: Always emit amdgpu-flat-work-group-size
The backend default maximum should be the hardware maximum, so the
frontend should set the implementation defined default maximum.
llvm-svn: 370101
Diffstat (limited to 'clang/lib/CodeGen')
-rw-r--r-- | clang/lib/CodeGen/TargetInfo.cpp | 10 |
1 files changed, 8 insertions, 2 deletions
diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index 11241544477..231a20c68f5 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -7912,8 +7912,11 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes( const auto *ReqdWGS = M.getLangOpts().OpenCL ? FD->getAttr<ReqdWorkGroupSizeAttr>() : nullptr; - if (((M.getLangOpts().OpenCL && FD->hasAttr<OpenCLKernelAttr>()) || - (M.getLangOpts().HIP && FD->hasAttr<CUDAGlobalAttr>())) && + + const bool IsOpenCLKernel = M.getLangOpts().OpenCL && + FD->hasAttr<OpenCLKernelAttr>(); + if ((IsOpenCLKernel || + (M.getLangOpts().HIP && FD->hasAttr<CUDAGlobalAttr>())) && (M.getTriple().getOS() == llvm::Triple::AMDHSA)) F->addFnAttr("amdgpu-implicitarg-num-bytes", "56"); @@ -7939,6 +7942,9 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes( F->addFnAttr("amdgpu-flat-work-group-size", AttrVal); } else assert(Max == 0 && "Max must be zero"); + } else if (IsOpenCLKernel) { + // By default, restrict the maximum size to 256. + F->addFnAttr("amdgpu-flat-work-group-size", "1,256"); } if (const auto *Attr = FD->getAttr<AMDGPUWavesPerEUAttr>()) { |