summaryrefslogtreecommitdiffstats
path: root/clang/lib/CodeGen/CGCUDANV.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'clang/lib/CodeGen/CGCUDANV.cpp')
-rw-r--r--clang/lib/CodeGen/CGCUDANV.cpp135
1 files changed, 60 insertions, 75 deletions
diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp
index d24ef0a8a97..4272aef0586 100644
--- a/clang/lib/CodeGen/CGCUDANV.cpp
+++ b/clang/lib/CodeGen/CGCUDANV.cpp
@@ -41,10 +41,10 @@ private:
/// Keeps track of kernel launch stubs emitted in this module
llvm::SmallVector<llvm::Function *, 16> EmittedKernels;
llvm::SmallVector<std::pair<llvm::GlobalVariable *, unsigned>, 16> DeviceVars;
- /// Keeps track of variables containing handles of GPU binaries. Populated by
+ /// Keeps track of variable containing handle of GPU binary. Populated by
/// ModuleCtorFunction() and used to create corresponding cleanup calls in
/// ModuleDtorFunction()
- llvm::SmallVector<llvm::GlobalVariable *, 16> GpuBinaryHandles;
+ llvm::GlobalVariable *GpuBinaryHandle = nullptr;
llvm::Constant *getSetupArgumentFn() const;
llvm::Constant *getLaunchFn() const;
@@ -245,16 +245,14 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
/// Creates a global constructor function for the module:
/// \code
/// void __cuda_module_ctor(void*) {
-/// Handle0 = __cudaRegisterFatBinary(GpuBinaryBlob0);
-/// __cuda_register_globals(Handle0);
-/// ...
-/// HandleN = __cudaRegisterFatBinary(GpuBinaryBlobN);
-/// __cuda_register_globals(HandleN);
+/// Handle = __cudaRegisterFatBinary(GpuBinaryBlob);
+/// __cuda_register_globals(Handle);
/// }
/// \endcode
llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
- // No need to generate ctors/dtors if there are no GPU binaries.
- if (CGM.getCodeGenOpts().CudaGpuBinaryFileNames.empty())
+ // No need to generate ctors/dtors if there is no GPU binary.
+ std::string GpuBinaryFileName = CGM.getCodeGenOpts().CudaGpuBinaryFileName;
+ if (GpuBinaryFileName.empty())
return nullptr;
// void __cuda_register_globals(void* handle);
@@ -267,6 +265,18 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
llvm::StructType *FatbinWrapperTy =
llvm::StructType::get(IntTy, IntTy, VoidPtrTy, VoidPtrTy);
+ // Register GPU binary with the CUDA runtime, store returned handle in a
+ // global variable and save a reference in GpuBinaryHandle to be cleaned up
+ // in destructor on exit. Then associate all known kernels with the GPU binary
+ // handle so CUDA runtime can figure out what to call on the GPU side.
+ llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> GpuBinaryOrErr =
+ llvm::MemoryBuffer::getFileOrSTDIN(GpuBinaryFileName);
+ if (std::error_code EC = GpuBinaryOrErr.getError()) {
+ CGM.getDiags().Report(diag::err_cannot_open_file)
+ << GpuBinaryFileName << EC.message();
+ return nullptr;
+ }
+
llvm::Function *ModuleCtorFunc = llvm::Function::Create(
llvm::FunctionType::get(VoidTy, VoidPtrTy, false),
llvm::GlobalValue::InternalLinkage, "__cuda_module_ctor", &TheModule);
@@ -276,79 +286,56 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
CtorBuilder.SetInsertPoint(CtorEntryBB);
- // For each GPU binary, register it with the CUDA runtime and store returned
- // handle in a global variable and save the handle in GpuBinaryHandles vector
- // to be cleaned up in destructor on exit. Then associate all known kernels
- // with the GPU binary handle so CUDA runtime can figure out what to call on
- // the GPU side.
- for (const std::string &GpuBinaryFileName :
- CGM.getCodeGenOpts().CudaGpuBinaryFileNames) {
- llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> GpuBinaryOrErr =
- llvm::MemoryBuffer::getFileOrSTDIN(GpuBinaryFileName);
- if (std::error_code EC = GpuBinaryOrErr.getError()) {
- CGM.getDiags().Report(diag::err_cannot_open_file) << GpuBinaryFileName
- << EC.message();
- continue;
- }
-
- const char *FatbinConstantName =
- CGM.getTriple().isMacOSX() ? "__NV_CUDA,__nv_fatbin" : ".nv_fatbin";
- // NVIDIA's cuobjdump looks for fatbins in this section.
- const char *FatbinSectionName =
- CGM.getTriple().isMacOSX() ? "__NV_CUDA,__fatbin" : ".nvFatBinSegment";
-
- // Create initialized wrapper structure that points to the loaded GPU binary
- ConstantInitBuilder Builder(CGM);
- auto Values = Builder.beginStruct(FatbinWrapperTy);
- // Fatbin wrapper magic.
- Values.addInt(IntTy, 0x466243b1);
- // Fatbin version.
- Values.addInt(IntTy, 1);
- // Data.
- Values.add(makeConstantString(GpuBinaryOrErr.get()->getBuffer(),
- "", FatbinConstantName, 8));
- // Unused in fatbin v1.
- Values.add(llvm::ConstantPointerNull::get(VoidPtrTy));
- llvm::GlobalVariable *FatbinWrapper =
- Values.finishAndCreateGlobal("__cuda_fatbin_wrapper",
- CGM.getPointerAlign(),
- /*constant*/ true);
- FatbinWrapper->setSection(FatbinSectionName);
-
- // GpuBinaryHandle = __cudaRegisterFatBinary(&FatbinWrapper);
- llvm::CallInst *RegisterFatbinCall = CtorBuilder.CreateCall(
- RegisterFatbinFunc,
- CtorBuilder.CreateBitCast(FatbinWrapper, VoidPtrTy));
- llvm::GlobalVariable *GpuBinaryHandle = new llvm::GlobalVariable(
- TheModule, VoidPtrPtrTy, false, llvm::GlobalValue::InternalLinkage,
- llvm::ConstantPointerNull::get(VoidPtrPtrTy), "__cuda_gpubin_handle");
- CtorBuilder.CreateAlignedStore(RegisterFatbinCall, GpuBinaryHandle,
- CGM.getPointerAlign());
-
- // Call __cuda_register_globals(GpuBinaryHandle);
- if (RegisterGlobalsFunc)
- CtorBuilder.CreateCall(RegisterGlobalsFunc, RegisterFatbinCall);
-
- // Save GpuBinaryHandle so we can unregister it in destructor.
- GpuBinaryHandles.push_back(GpuBinaryHandle);
- }
+ const char *FatbinConstantName =
+ CGM.getTriple().isMacOSX() ? "__NV_CUDA,__nv_fatbin" : ".nv_fatbin";
+ // NVIDIA's cuobjdump looks for fatbins in this section.
+ const char *FatbinSectionName =
+ CGM.getTriple().isMacOSX() ? "__NV_CUDA,__fatbin" : ".nvFatBinSegment";
+
+ // Create initialized wrapper structure that points to the loaded GPU binary
+ ConstantInitBuilder Builder(CGM);
+ auto Values = Builder.beginStruct(FatbinWrapperTy);
+ // Fatbin wrapper magic.
+ Values.addInt(IntTy, 0x466243b1);
+ // Fatbin version.
+ Values.addInt(IntTy, 1);
+ // Data.
+ Values.add(makeConstantString(GpuBinaryOrErr.get()->getBuffer(), "",
+ FatbinConstantName, 8));
+ // Unused in fatbin v1.
+ Values.add(llvm::ConstantPointerNull::get(VoidPtrTy));
+ llvm::GlobalVariable *FatbinWrapper = Values.finishAndCreateGlobal(
+ "__cuda_fatbin_wrapper", CGM.getPointerAlign(),
+ /*constant*/ true);
+ FatbinWrapper->setSection(FatbinSectionName);
+
+ // GpuBinaryHandle = __cudaRegisterFatBinary(&FatbinWrapper);
+ llvm::CallInst *RegisterFatbinCall = CtorBuilder.CreateCall(
+ RegisterFatbinFunc, CtorBuilder.CreateBitCast(FatbinWrapper, VoidPtrTy));
+ GpuBinaryHandle = new llvm::GlobalVariable(
+ TheModule, VoidPtrPtrTy, false, llvm::GlobalValue::InternalLinkage,
+ llvm::ConstantPointerNull::get(VoidPtrPtrTy), "__cuda_gpubin_handle");
+ CtorBuilder.CreateAlignedStore(RegisterFatbinCall, GpuBinaryHandle,
+ CGM.getPointerAlign());
+
+ // Call __cuda_register_globals(GpuBinaryHandle);
+ if (RegisterGlobalsFunc)
+ CtorBuilder.CreateCall(RegisterGlobalsFunc, RegisterFatbinCall);
CtorBuilder.CreateRetVoid();
return ModuleCtorFunc;
}
-/// Creates a global destructor function that unregisters all GPU code blobs
+/// Creates a global destructor function that unregisters the GPU code blob
/// registered by constructor.
/// \code
/// void __cuda_module_dtor(void*) {
-/// __cudaUnregisterFatBinary(Handle0);
-/// ...
-/// __cudaUnregisterFatBinary(HandleN);
+/// __cudaUnregisterFatBinary(Handle);
/// }
/// \endcode
llvm::Function *CGNVCUDARuntime::makeModuleDtorFunction() {
- // No need for destructor if we don't have handles to unregister.
- if (GpuBinaryHandles.empty())
+ // No need for destructor if we don't have a handle to unregister.
+ if (!GpuBinaryHandle)
return nullptr;
// void __cudaUnregisterFatBinary(void ** handle);
@@ -364,11 +351,9 @@ llvm::Function *CGNVCUDARuntime::makeModuleDtorFunction() {
CGBuilderTy DtorBuilder(CGM, Context);
DtorBuilder.SetInsertPoint(DtorEntryBB);
- for (llvm::GlobalVariable *GpuBinaryHandle : GpuBinaryHandles) {
- auto HandleValue =
+ auto HandleValue =
DtorBuilder.CreateAlignedLoad(GpuBinaryHandle, CGM.getPointerAlign());
- DtorBuilder.CreateCall(UnregisterFatbinFunc, HandleValue);
- }
+ DtorBuilder.CreateCall(UnregisterFatbinFunc, HandleValue);
DtorBuilder.CreateRetVoid();
return ModuleDtorFunc;
OpenPOWER on IntegriCloud