diff options
| author | Alex Zinenko <zinenko@google.com> | 2019-10-10 01:33:33 -0700 |
|---|---|---|
| committer | A. Unique TensorFlower <gardener@tensorflow.org> | 2019-10-10 01:34:06 -0700 |
| commit | 5e7959a3531c8019052bae3a84a42a67c5857bc9 (patch) | |
| tree | 1eb248dba17a3c4bfd2f9c815865254681ca78c8 /mlir/lib/Conversion/GPUToCUDA | |
| parent | 309b4556d00f531988f34930eedb546512ee619f (diff) | |
| download | bcm5719-llvm-5e7959a3531c8019052bae3a84a42a67c5857bc9.tar.gz bcm5719-llvm-5e7959a3531c8019052bae3a84a42a67c5857bc9.zip | |
Use llvm.func to define functions with wrapped LLVM IR function type
This function-like operation allows one to define functions that have wrapped
LLVM IR function type, in particular variadic functions. The operation was
added in parallel to the existing lowering flow, this commit only switches the
flow to use it.
Using a custom function type makes the LLVM IR dialect type system more
consistent and avoids complex conversion rules for functions that previously
had to use the built-in function type instead of a wrapped LLVM IR dialect type
and perform conversions during the analysis.
PiperOrigin-RevId: 273910855
Diffstat (limited to 'mlir/lib/Conversion/GPUToCUDA')
| -rw-r--r-- | mlir/lib/Conversion/GPUToCUDA/ConvertLaunchFuncToCudaCalls.cpp | 116 |
1 files changed, 63 insertions, 53 deletions
diff --git a/mlir/lib/Conversion/GPUToCUDA/ConvertLaunchFuncToCudaCalls.cpp b/mlir/lib/Conversion/GPUToCUDA/ConvertLaunchFuncToCudaCalls.cpp index 450269dbdac..d9332428425 100644 --- a/mlir/lib/Conversion/GPUToCUDA/ConvertLaunchFuncToCudaCalls.cpp +++ b/mlir/lib/Conversion/GPUToCUDA/ConvertLaunchFuncToCudaCalls.cpp @@ -80,6 +80,7 @@ private: void initializeCachedTypes() { const llvm::Module &module = llvmDialect->getLLVMModule(); + llvmVoidType = LLVM::LLVMType::getVoidTy(llvmDialect); llvmPointerType = LLVM::LLVMType::getInt8PtrTy(llvmDialect); llvmPointerPointerType = llvmPointerType.getPointerTo(); llvmInt8Type = LLVM::LLVMType::getInt8Ty(llvmDialect); @@ -89,6 +90,8 @@ private: llvmDialect, module.getDataLayout().getPointerSizeInBits()); } + LLVM::LLVMType getVoidType() { return llvmVoidType; } + LLVM::LLVMType getPointerType() { return llvmPointerType; } LLVM::LLVMType getPointerPointerType() { return llvmPointerPointerType; } @@ -120,7 +123,7 @@ private: void declareCudaFunctions(Location loc); Value *setupParamsArray(gpu::LaunchFuncOp launchOp, OpBuilder &builder); - Value *generateKernelNameConstant(StringRef name, Location &loc, + Value *generateKernelNameConstant(StringRef name, Location loc, OpBuilder &builder); void translateGpuLaunchCalls(mlir::gpu::LaunchFuncOp launchOp); @@ -145,6 +148,7 @@ public: private: LLVM::LLVMDialect *llvmDialect; + LLVM::LLVMType llvmVoidType; LLVM::LLVMType llvmPointerType; LLVM::LLVMType llvmPointerPointerType; LLVM::LLVMType llvmInt8Type; @@ -160,38 +164,41 @@ private: // uses void pointers. This is fine as they have the same linkage in C. void GpuLaunchFuncToCudaCallsPass::declareCudaFunctions(Location loc) { ModuleOp module = getModule(); - Builder builder(module); - if (!module.lookupSymbol<FuncOp>(cuModuleLoadName)) { - module.push_back( - FuncOp::create(loc, cuModuleLoadName, - builder.getFunctionType( - { - getPointerPointerType(), /* CUmodule *module */ - getPointerType() /* void *cubin */ - }, - getCUResultType()))); + OpBuilder builder(module.getBody()->getTerminator()); + if (!module.lookupSymbol(cuModuleLoadName)) { + builder.create<LLVM::LLVMFuncOp>( + loc, cuModuleLoadName, + LLVM::LLVMType::getFunctionTy( + getCUResultType(), + { + getPointerPointerType(), /* CUmodule *module */ + getPointerType() /* void *cubin */ + }, + /*isVarArg=*/false)); } - if (!module.lookupSymbol<FuncOp>(cuModuleGetFunctionName)) { + if (!module.lookupSymbol(cuModuleGetFunctionName)) { // The helper uses void* instead of CUDA's opaque CUmodule and // CUfunction. - module.push_back( - FuncOp::create(loc, cuModuleGetFunctionName, - builder.getFunctionType( - { - getPointerPointerType(), /* void **function */ - getPointerType(), /* void *module */ - getPointerType() /* char *name */ - }, - getCUResultType()))); + builder.create<LLVM::LLVMFuncOp>( + loc, cuModuleGetFunctionName, + LLVM::LLVMType::getFunctionTy( + getCUResultType(), + { + getPointerPointerType(), /* void **function */ + getPointerType(), /* void *module */ + getPointerType() /* char *name */ + }, + /*isVarArg=*/false)); } - if (!module.lookupSymbol<FuncOp>(cuLaunchKernelName)) { + if (!module.lookupSymbol(cuLaunchKernelName)) { // Other than the CUDA api, the wrappers use uintptr_t to match the // LLVM type if MLIR's index type, which the GPU dialect uses. // Furthermore, they use void* instead of CUDA's opaque CUfunction and // CUstream. - module.push_back(FuncOp::create( + builder.create<LLVM::LLVMFuncOp>( loc, cuLaunchKernelName, - builder.getFunctionType( + LLVM::LLVMType::getFunctionTy( + getCUResultType(), { getPointerType(), /* void* f */ getIntPtrType(), /* intptr_t gridXDim */ @@ -205,32 +212,31 @@ void GpuLaunchFuncToCudaCallsPass::declareCudaFunctions(Location loc) { getPointerPointerType(), /* void **kernelParams */ getPointerPointerType() /* void **extra */ }, - getCUResultType()))); + /*isVarArg=*/false)); } - if (!module.lookupSymbol<FuncOp>(cuGetStreamHelperName)) { + if (!module.lookupSymbol(cuGetStreamHelperName)) { // Helper function to get the current CUDA stream. Uses void* instead of // CUDAs opaque CUstream. - module.push_back(FuncOp::create( + builder.create<LLVM::LLVMFuncOp>( loc, cuGetStreamHelperName, - builder.getFunctionType({}, getPointerType() /* void *stream */))); + LLVM::LLVMType::getFunctionTy(getPointerType(), /*isVarArg=*/false)); } - if (!module.lookupSymbol<FuncOp>(cuStreamSynchronizeName)) { - module.push_back( - FuncOp::create(loc, cuStreamSynchronizeName, - builder.getFunctionType( - { - getPointerType() /* CUstream stream */ - }, - getCUResultType()))); + if (!module.lookupSymbol(cuStreamSynchronizeName)) { + builder.create<LLVM::LLVMFuncOp>( + loc, cuStreamSynchronizeName, + LLVM::LLVMType::getFunctionTy(getCUResultType(), + getPointerType() /* CUstream stream */, + /*isVarArg=*/false)); } - if (!module.lookupSymbol<FuncOp>(kMcuMemHostRegisterPtr)) { - module.push_back(FuncOp::create(loc, kMcuMemHostRegisterPtr, - builder.getFunctionType( - { - getPointerType(), /* void *ptr */ - getInt32Type() /* int32 flags*/ - }, - {}))); + if (!module.lookupSymbol(kMcuMemHostRegisterPtr)) { + builder.create<LLVM::LLVMFuncOp>( + loc, kMcuMemHostRegisterPtr, + LLVM::LLVMType::getFunctionTy(getVoidType(), + { + getPointerType(), /* void *ptr */ + getInt32Type() /* int32 flags*/ + }, + /*isVarArg=*/false)); } } @@ -271,7 +277,7 @@ GpuLaunchFuncToCudaCallsPass::setupParamsArray(gpu::LaunchFuncOp launchOp, // the descriptor pointer is registered via @mcuMemHostRegisterPtr if (llvmType.isStructTy()) { auto registerFunc = - getModule().lookupSymbol<FuncOp>(kMcuMemHostRegisterPtr); + getModule().lookupSymbol<LLVM::LLVMFuncOp>(kMcuMemHostRegisterPtr); auto zero = builder.create<LLVM::ConstantOp>( loc, getInt32Type(), builder.getI32IntegerAttr(0)); builder.create<LLVM::CallOp>(loc, ArrayRef<Type>{}, @@ -304,7 +310,7 @@ GpuLaunchFuncToCudaCallsPass::setupParamsArray(gpu::LaunchFuncOp launchOp, // %2 = llvm.getelementptr %0[%1, %1] : !llvm<"i8*"> // } Value *GpuLaunchFuncToCudaCallsPass::generateKernelNameConstant( - StringRef name, Location &loc, OpBuilder &builder) { + StringRef name, Location loc, OpBuilder &builder) { // Make sure the trailing zero is included in the constant. std::vector<char> kernelName(name.begin(), name.end()); kernelName.push_back('\0'); @@ -355,6 +361,7 @@ void GpuLaunchFuncToCudaCallsPass::translateGpuLaunchCalls( << "missing " << kCubinAnnotation << " attribute"; return signalPassFailure(); } + assert(kernelModule.getName() && "expected a named module"); SmallString<128> nameBuffer(*kernelModule.getName()); nameBuffer.append(kCubinStorageSuffix); @@ -364,7 +371,8 @@ void GpuLaunchFuncToCudaCallsPass::translateGpuLaunchCalls( // Emit the load module call to load the module data. Error checking is done // in the called helper function. auto cuModule = allocatePointer(builder, loc); - FuncOp cuModuleLoad = getModule().lookupSymbol<FuncOp>(cuModuleLoadName); + auto cuModuleLoad = + getModule().lookupSymbol<LLVM::LLVMFuncOp>(cuModuleLoadName); builder.create<LLVM::CallOp>(loc, ArrayRef<Type>{getCUResultType()}, builder.getSymbolRefAttr(cuModuleLoad), ArrayRef<Value *>{cuModule, data}); @@ -374,20 +382,21 @@ void GpuLaunchFuncToCudaCallsPass::translateGpuLaunchCalls( builder.create<LLVM::LoadOp>(loc, getPointerType(), cuModule); auto kernelName = generateKernelNameConstant(launchOp.kernel(), loc, builder); auto cuFunction = allocatePointer(builder, loc); - FuncOp cuModuleGetFunction = - getModule().lookupSymbol<FuncOp>(cuModuleGetFunctionName); + auto cuModuleGetFunction = + getModule().lookupSymbol<LLVM::LLVMFuncOp>(cuModuleGetFunctionName); builder.create<LLVM::CallOp>( loc, ArrayRef<Type>{getCUResultType()}, builder.getSymbolRefAttr(cuModuleGetFunction), ArrayRef<Value *>{cuFunction, cuOwningModuleRef, kernelName}); // Grab the global stream needed for execution. - FuncOp cuGetStreamHelper = - getModule().lookupSymbol<FuncOp>(cuGetStreamHelperName); + auto cuGetStreamHelper = + getModule().lookupSymbol<LLVM::LLVMFuncOp>(cuGetStreamHelperName); auto cuStream = builder.create<LLVM::CallOp>( loc, ArrayRef<Type>{getPointerType()}, builder.getSymbolRefAttr(cuGetStreamHelper), ArrayRef<Value *>{}); // Invoke the function with required arguments. - auto cuLaunchKernel = getModule().lookupSymbol<FuncOp>(cuLaunchKernelName); + auto cuLaunchKernel = + getModule().lookupSymbol<LLVM::LLVMFuncOp>(cuLaunchKernelName); auto cuFunctionRef = builder.create<LLVM::LoadOp>(loc, getPointerType(), cuFunction); auto paramsArray = setupParamsArray(launchOp, builder); @@ -404,7 +413,8 @@ void GpuLaunchFuncToCudaCallsPass::translateGpuLaunchCalls( paramsArray, /* kernel params */ nullpointer /* extra */}); // Sync on the stream to make it synchronous. - auto cuStreamSync = getModule().lookupSymbol<FuncOp>(cuStreamSynchronizeName); + auto cuStreamSync = + getModule().lookupSymbol<LLVM::LLVMFuncOp>(cuStreamSynchronizeName); builder.create<LLVM::CallOp>(loc, ArrayRef<Type>{getCUResultType()}, builder.getSymbolRefAttr(cuStreamSync), ArrayRef<Value *>(cuStream.getResult(0))); |

