diff options
| -rw-r--r-- | clang/lib/CodeGen/TargetInfo.cpp | 3 | ||||
| -rw-r--r-- | clang/test/CodeGenCUDA/amdgpu-hip-implicit-kernarg.cu | 8 |
2 files changed, 10 insertions, 1 deletions
diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index f39764d1a46..4ef57786f44 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -7868,7 +7868,8 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes( const auto *ReqdWGS = M.getLangOpts().OpenCL ? FD->getAttr<ReqdWorkGroupSizeAttr>() : nullptr; - if (M.getLangOpts().OpenCL && FD->hasAttr<OpenCLKernelAttr>() && + if (((M.getLangOpts().OpenCL && FD->hasAttr<OpenCLKernelAttr>()) || + (M.getLangOpts().HIP && FD->hasAttr<CUDAGlobalAttr>())) && (M.getTriple().getOS() == llvm::Triple::AMDHSA)) F->addFnAttr("amdgpu-implicitarg-num-bytes", "48"); diff --git a/clang/test/CodeGenCUDA/amdgpu-hip-implicit-kernarg.cu b/clang/test/CodeGenCUDA/amdgpu-hip-implicit-kernarg.cu new file mode 100644 index 00000000000..8f730ac14c5 --- /dev/null +++ b/clang/test/CodeGenCUDA/amdgpu-hip-implicit-kernarg.cu @@ -0,0 +1,8 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -x hip -o - %s | FileCheck %s +#include "Inputs/cuda.h" + +__global__ void hip_kernel_temp() { +} + +// CHECK: attributes {{.*}} = {{.*}} "amdgpu-implicitarg-num-bytes"="48" |

