summaryrefslogtreecommitdiffstats
path: root/clang/lib/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'clang/lib/CodeGen')
-rw-r--r--clang/lib/CodeGen/TargetInfo.cpp10
1 files changed, 8 insertions, 2 deletions
diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp
index 11241544477..231a20c68f5 100644
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -7912,8 +7912,11 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes(
const auto *ReqdWGS = M.getLangOpts().OpenCL ?
FD->getAttr<ReqdWorkGroupSizeAttr>() : nullptr;
- if (((M.getLangOpts().OpenCL && FD->hasAttr<OpenCLKernelAttr>()) ||
- (M.getLangOpts().HIP && FD->hasAttr<CUDAGlobalAttr>())) &&
+
+ const bool IsOpenCLKernel = M.getLangOpts().OpenCL &&
+ FD->hasAttr<OpenCLKernelAttr>();
+ if ((IsOpenCLKernel ||
+ (M.getLangOpts().HIP && FD->hasAttr<CUDAGlobalAttr>())) &&
(M.getTriple().getOS() == llvm::Triple::AMDHSA))
F->addFnAttr("amdgpu-implicitarg-num-bytes", "56");
@@ -7939,6 +7942,9 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes(
F->addFnAttr("amdgpu-flat-work-group-size", AttrVal);
} else
assert(Max == 0 && "Max must be zero");
+ } else if (IsOpenCLKernel) {
+ // By default, restrict the maximum size to 256.
+ F->addFnAttr("amdgpu-flat-work-group-size", "1,256");
}
if (const auto *Attr = FD->getAttr<AMDGPUWavesPerEUAttr>()) {
OpenPOWER on IntegriCloud