summaryrefslogtreecommitdiffstats
path: root/mlir/lib/Conversion/GPUToCUDA
diff options
context:
space:
mode:
authorAlex Zinenko <zinenko@google.com>2019-10-10 01:33:33 -0700
committerA. Unique TensorFlower <gardener@tensorflow.org>2019-10-10 01:34:06 -0700
commit5e7959a3531c8019052bae3a84a42a67c5857bc9 (patch)
tree1eb248dba17a3c4bfd2f9c815865254681ca78c8 /mlir/lib/Conversion/GPUToCUDA
parent309b4556d00f531988f34930eedb546512ee619f (diff)
downloadbcm5719-llvm-5e7959a3531c8019052bae3a84a42a67c5857bc9.tar.gz
bcm5719-llvm-5e7959a3531c8019052bae3a84a42a67c5857bc9.zip
Use llvm.func to define functions with wrapped LLVM IR function type
This function-like operation allows one to define functions that have wrapped LLVM IR function type, in particular variadic functions. The operation was added in parallel to the existing lowering flow, this commit only switches the flow to use it. Using a custom function type makes the LLVM IR dialect type system more consistent and avoids complex conversion rules for functions that previously had to use the built-in function type instead of a wrapped LLVM IR dialect type and perform conversions during the analysis. PiperOrigin-RevId: 273910855
Diffstat (limited to 'mlir/lib/Conversion/GPUToCUDA')
-rw-r--r--mlir/lib/Conversion/GPUToCUDA/ConvertLaunchFuncToCudaCalls.cpp116
1 files changed, 63 insertions, 53 deletions
diff --git a/mlir/lib/Conversion/GPUToCUDA/ConvertLaunchFuncToCudaCalls.cpp b/mlir/lib/Conversion/GPUToCUDA/ConvertLaunchFuncToCudaCalls.cpp
index 450269dbdac..d9332428425 100644
--- a/mlir/lib/Conversion/GPUToCUDA/ConvertLaunchFuncToCudaCalls.cpp
+++ b/mlir/lib/Conversion/GPUToCUDA/ConvertLaunchFuncToCudaCalls.cpp
@@ -80,6 +80,7 @@ private:
void initializeCachedTypes() {
const llvm::Module &module = llvmDialect->getLLVMModule();
+ llvmVoidType = LLVM::LLVMType::getVoidTy(llvmDialect);
llvmPointerType = LLVM::LLVMType::getInt8PtrTy(llvmDialect);
llvmPointerPointerType = llvmPointerType.getPointerTo();
llvmInt8Type = LLVM::LLVMType::getInt8Ty(llvmDialect);
@@ -89,6 +90,8 @@ private:
llvmDialect, module.getDataLayout().getPointerSizeInBits());
}
+ LLVM::LLVMType getVoidType() { return llvmVoidType; }
+
LLVM::LLVMType getPointerType() { return llvmPointerType; }
LLVM::LLVMType getPointerPointerType() { return llvmPointerPointerType; }
@@ -120,7 +123,7 @@ private:
void declareCudaFunctions(Location loc);
Value *setupParamsArray(gpu::LaunchFuncOp launchOp, OpBuilder &builder);
- Value *generateKernelNameConstant(StringRef name, Location &loc,
+ Value *generateKernelNameConstant(StringRef name, Location loc,
OpBuilder &builder);
void translateGpuLaunchCalls(mlir::gpu::LaunchFuncOp launchOp);
@@ -145,6 +148,7 @@ public:
private:
LLVM::LLVMDialect *llvmDialect;
+ LLVM::LLVMType llvmVoidType;
LLVM::LLVMType llvmPointerType;
LLVM::LLVMType llvmPointerPointerType;
LLVM::LLVMType llvmInt8Type;
@@ -160,38 +164,41 @@ private:
// uses void pointers. This is fine as they have the same linkage in C.
void GpuLaunchFuncToCudaCallsPass::declareCudaFunctions(Location loc) {
ModuleOp module = getModule();
- Builder builder(module);
- if (!module.lookupSymbol<FuncOp>(cuModuleLoadName)) {
- module.push_back(
- FuncOp::create(loc, cuModuleLoadName,
- builder.getFunctionType(
- {
- getPointerPointerType(), /* CUmodule *module */
- getPointerType() /* void *cubin */
- },
- getCUResultType())));
+ OpBuilder builder(module.getBody()->getTerminator());
+ if (!module.lookupSymbol(cuModuleLoadName)) {
+ builder.create<LLVM::LLVMFuncOp>(
+ loc, cuModuleLoadName,
+ LLVM::LLVMType::getFunctionTy(
+ getCUResultType(),
+ {
+ getPointerPointerType(), /* CUmodule *module */
+ getPointerType() /* void *cubin */
+ },
+ /*isVarArg=*/false));
}
- if (!module.lookupSymbol<FuncOp>(cuModuleGetFunctionName)) {
+ if (!module.lookupSymbol(cuModuleGetFunctionName)) {
// The helper uses void* instead of CUDA's opaque CUmodule and
// CUfunction.
- module.push_back(
- FuncOp::create(loc, cuModuleGetFunctionName,
- builder.getFunctionType(
- {
- getPointerPointerType(), /* void **function */
- getPointerType(), /* void *module */
- getPointerType() /* char *name */
- },
- getCUResultType())));
+ builder.create<LLVM::LLVMFuncOp>(
+ loc, cuModuleGetFunctionName,
+ LLVM::LLVMType::getFunctionTy(
+ getCUResultType(),
+ {
+ getPointerPointerType(), /* void **function */
+ getPointerType(), /* void *module */
+ getPointerType() /* char *name */
+ },
+ /*isVarArg=*/false));
}
- if (!module.lookupSymbol<FuncOp>(cuLaunchKernelName)) {
+ if (!module.lookupSymbol(cuLaunchKernelName)) {
// Other than the CUDA api, the wrappers use uintptr_t to match the
// LLVM type if MLIR's index type, which the GPU dialect uses.
// Furthermore, they use void* instead of CUDA's opaque CUfunction and
// CUstream.
- module.push_back(FuncOp::create(
+ builder.create<LLVM::LLVMFuncOp>(
loc, cuLaunchKernelName,
- builder.getFunctionType(
+ LLVM::LLVMType::getFunctionTy(
+ getCUResultType(),
{
getPointerType(), /* void* f */
getIntPtrType(), /* intptr_t gridXDim */
@@ -205,32 +212,31 @@ void GpuLaunchFuncToCudaCallsPass::declareCudaFunctions(Location loc) {
getPointerPointerType(), /* void **kernelParams */
getPointerPointerType() /* void **extra */
},
- getCUResultType())));
+ /*isVarArg=*/false));
}
- if (!module.lookupSymbol<FuncOp>(cuGetStreamHelperName)) {
+ if (!module.lookupSymbol(cuGetStreamHelperName)) {
// Helper function to get the current CUDA stream. Uses void* instead of
// CUDAs opaque CUstream.
- module.push_back(FuncOp::create(
+ builder.create<LLVM::LLVMFuncOp>(
loc, cuGetStreamHelperName,
- builder.getFunctionType({}, getPointerType() /* void *stream */)));
+ LLVM::LLVMType::getFunctionTy(getPointerType(), /*isVarArg=*/false));
}
- if (!module.lookupSymbol<FuncOp>(cuStreamSynchronizeName)) {
- module.push_back(
- FuncOp::create(loc, cuStreamSynchronizeName,
- builder.getFunctionType(
- {
- getPointerType() /* CUstream stream */
- },
- getCUResultType())));
+ if (!module.lookupSymbol(cuStreamSynchronizeName)) {
+ builder.create<LLVM::LLVMFuncOp>(
+ loc, cuStreamSynchronizeName,
+ LLVM::LLVMType::getFunctionTy(getCUResultType(),
+ getPointerType() /* CUstream stream */,
+ /*isVarArg=*/false));
}
- if (!module.lookupSymbol<FuncOp>(kMcuMemHostRegisterPtr)) {
- module.push_back(FuncOp::create(loc, kMcuMemHostRegisterPtr,
- builder.getFunctionType(
- {
- getPointerType(), /* void *ptr */
- getInt32Type() /* int32 flags*/
- },
- {})));
+ if (!module.lookupSymbol(kMcuMemHostRegisterPtr)) {
+ builder.create<LLVM::LLVMFuncOp>(
+ loc, kMcuMemHostRegisterPtr,
+ LLVM::LLVMType::getFunctionTy(getVoidType(),
+ {
+ getPointerType(), /* void *ptr */
+ getInt32Type() /* int32 flags*/
+ },
+ /*isVarArg=*/false));
}
}
@@ -271,7 +277,7 @@ GpuLaunchFuncToCudaCallsPass::setupParamsArray(gpu::LaunchFuncOp launchOp,
// the descriptor pointer is registered via @mcuMemHostRegisterPtr
if (llvmType.isStructTy()) {
auto registerFunc =
- getModule().lookupSymbol<FuncOp>(kMcuMemHostRegisterPtr);
+ getModule().lookupSymbol<LLVM::LLVMFuncOp>(kMcuMemHostRegisterPtr);
auto zero = builder.create<LLVM::ConstantOp>(
loc, getInt32Type(), builder.getI32IntegerAttr(0));
builder.create<LLVM::CallOp>(loc, ArrayRef<Type>{},
@@ -304,7 +310,7 @@ GpuLaunchFuncToCudaCallsPass::setupParamsArray(gpu::LaunchFuncOp launchOp,
// %2 = llvm.getelementptr %0[%1, %1] : !llvm<"i8*">
// }
Value *GpuLaunchFuncToCudaCallsPass::generateKernelNameConstant(
- StringRef name, Location &loc, OpBuilder &builder) {
+ StringRef name, Location loc, OpBuilder &builder) {
// Make sure the trailing zero is included in the constant.
std::vector<char> kernelName(name.begin(), name.end());
kernelName.push_back('\0');
@@ -355,6 +361,7 @@ void GpuLaunchFuncToCudaCallsPass::translateGpuLaunchCalls(
<< "missing " << kCubinAnnotation << " attribute";
return signalPassFailure();
}
+
assert(kernelModule.getName() && "expected a named module");
SmallString<128> nameBuffer(*kernelModule.getName());
nameBuffer.append(kCubinStorageSuffix);
@@ -364,7 +371,8 @@ void GpuLaunchFuncToCudaCallsPass::translateGpuLaunchCalls(
// Emit the load module call to load the module data. Error checking is done
// in the called helper function.
auto cuModule = allocatePointer(builder, loc);
- FuncOp cuModuleLoad = getModule().lookupSymbol<FuncOp>(cuModuleLoadName);
+ auto cuModuleLoad =
+ getModule().lookupSymbol<LLVM::LLVMFuncOp>(cuModuleLoadName);
builder.create<LLVM::CallOp>(loc, ArrayRef<Type>{getCUResultType()},
builder.getSymbolRefAttr(cuModuleLoad),
ArrayRef<Value *>{cuModule, data});
@@ -374,20 +382,21 @@ void GpuLaunchFuncToCudaCallsPass::translateGpuLaunchCalls(
builder.create<LLVM::LoadOp>(loc, getPointerType(), cuModule);
auto kernelName = generateKernelNameConstant(launchOp.kernel(), loc, builder);
auto cuFunction = allocatePointer(builder, loc);
- FuncOp cuModuleGetFunction =
- getModule().lookupSymbol<FuncOp>(cuModuleGetFunctionName);
+ auto cuModuleGetFunction =
+ getModule().lookupSymbol<LLVM::LLVMFuncOp>(cuModuleGetFunctionName);
builder.create<LLVM::CallOp>(
loc, ArrayRef<Type>{getCUResultType()},
builder.getSymbolRefAttr(cuModuleGetFunction),
ArrayRef<Value *>{cuFunction, cuOwningModuleRef, kernelName});
// Grab the global stream needed for execution.
- FuncOp cuGetStreamHelper =
- getModule().lookupSymbol<FuncOp>(cuGetStreamHelperName);
+ auto cuGetStreamHelper =
+ getModule().lookupSymbol<LLVM::LLVMFuncOp>(cuGetStreamHelperName);
auto cuStream = builder.create<LLVM::CallOp>(
loc, ArrayRef<Type>{getPointerType()},
builder.getSymbolRefAttr(cuGetStreamHelper), ArrayRef<Value *>{});
// Invoke the function with required arguments.
- auto cuLaunchKernel = getModule().lookupSymbol<FuncOp>(cuLaunchKernelName);
+ auto cuLaunchKernel =
+ getModule().lookupSymbol<LLVM::LLVMFuncOp>(cuLaunchKernelName);
auto cuFunctionRef =
builder.create<LLVM::LoadOp>(loc, getPointerType(), cuFunction);
auto paramsArray = setupParamsArray(launchOp, builder);
@@ -404,7 +413,8 @@ void GpuLaunchFuncToCudaCallsPass::translateGpuLaunchCalls(
paramsArray, /* kernel params */
nullpointer /* extra */});
// Sync on the stream to make it synchronous.
- auto cuStreamSync = getModule().lookupSymbol<FuncOp>(cuStreamSynchronizeName);
+ auto cuStreamSync =
+ getModule().lookupSymbol<LLVM::LLVMFuncOp>(cuStreamSynchronizeName);
builder.create<LLVM::CallOp>(loc, ArrayRef<Type>{getCUResultType()},
builder.getSymbolRefAttr(cuStreamSync),
ArrayRef<Value *>(cuStream.getResult(0)));
OpenPOWER on IntegriCloud