summaryrefslogtreecommitdiffstats
path: root/clang/lib/CodeGen/CGCUDANV.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'clang/lib/CodeGen/CGCUDANV.cpp')
-rw-r--r--clang/lib/CodeGen/CGCUDANV.cpp17
1 files changed, 11 insertions, 6 deletions
diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp
index 4d4038dae9c..05aeef4194d 100644
--- a/clang/lib/CodeGen/CGCUDANV.cpp
+++ b/clang/lib/CodeGen/CGCUDANV.cpp
@@ -236,7 +236,8 @@ void CGNVCUDARuntime::emitDeviceStub(CodeGenFunction &CGF,
EmittedKernels.push_back({CGF.CurFn, CGF.CurFuncDecl});
if (CudaFeatureEnabled(CGM.getTarget().getSDKVersion(),
- CudaFeature::CUDA_USES_NEW_LAUNCH))
+ CudaFeature::CUDA_USES_NEW_LAUNCH) ||
+ CGF.getLangOpts().HIPUseNewLaunchAPI)
emitDeviceStubBodyNew(CGF, Args);
else
emitDeviceStubBodyLegacy(CGF, Args);
@@ -264,14 +265,18 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF,
llvm::BasicBlock *EndBlock = CGF.createBasicBlock("setup.end");
- // Lookup cudaLaunchKernel function.
+ // Lookup cudaLaunchKernel/hipLaunchKernel function.
// cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim,
// void **args, size_t sharedMem,
// cudaStream_t stream);
+ // hipError_t hipLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim,
+ // void **args, size_t sharedMem,
+ // hipStream_t stream);
TranslationUnitDecl *TUDecl = CGM.getContext().getTranslationUnitDecl();
DeclContext *DC = TranslationUnitDecl::castToDeclContext(TUDecl);
+ auto LaunchKernelName = addPrefixToName("LaunchKernel");
IdentifierInfo &cudaLaunchKernelII =
- CGM.getContext().Idents.get("cudaLaunchKernel");
+ CGM.getContext().Idents.get(LaunchKernelName);
FunctionDecl *cudaLaunchKernelFD = nullptr;
for (const auto &Result : DC->lookup(&cudaLaunchKernelII)) {
if (FunctionDecl *FD = dyn_cast<FunctionDecl>(Result))
@@ -280,7 +285,7 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF,
if (cudaLaunchKernelFD == nullptr) {
CGM.Error(CGF.CurFuncDecl->getLocation(),
- "Can't find declaration for cudaLaunchKernel()");
+ "Can't find declaration for " + LaunchKernelName);
return;
}
// Create temporary dim3 grid_dim, block_dim.
@@ -301,7 +306,7 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF,
/*ShmemSize=*/ShmemSize.getType(),
/*Stream=*/Stream.getType()},
/*isVarArg=*/false),
- "__cudaPopCallConfiguration");
+ addUnderscoredPrefixToName("PopCallConfiguration"));
CGF.EmitRuntimeCallOrInvoke(cudaPopConfigFn,
{GridDim.getPointer(), BlockDim.getPointer(),
@@ -329,7 +334,7 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF,
const CGFunctionInfo &FI =
CGM.getTypes().arrangeFunctionDeclaration(cudaLaunchKernelFD);
llvm::FunctionCallee cudaLaunchKernelFn =
- CGM.CreateRuntimeFunction(FTy, "cudaLaunchKernel");
+ CGM.CreateRuntimeFunction(FTy, LaunchKernelName);
CGF.EmitCall(FI, CGCallee::forDirect(cudaLaunchKernelFn), ReturnValueSlot(),
LaunchKernelArgs);
CGF.EmitBranch(EndBlock);
OpenPOWER on IntegriCloud