summaryrefslogtreecommitdiffstats
path: root/mlir/lib/Conversion/GPUToCUDA
diff options
context:
space:
mode:
authorGeorge Karpenkov <cheshire@google.com>2019-09-19 14:33:54 -0700
committerA. Unique TensorFlower <gardener@tensorflow.org>2019-09-19 14:34:30 -0700
commit2df646bef6e7665fdb8523613d82e7d4a5013217 (patch)
tree3bcbbee989db665c23fa9d9fef9131cfaf549e85 /mlir/lib/Conversion/GPUToCUDA
parentc8961d408ee0f48e3156e3c4248bba9a43579f1f (diff)
downloadbcm5719-llvm-2df646bef6e7665fdb8523613d82e7d4a5013217.tar.gz
bcm5719-llvm-2df646bef6e7665fdb8523613d82e7d4a5013217.zip
Automated rollback of commit 5684a12434f923d03b6870f2aa16226bfb0b38b6
PiperOrigin-RevId: 270126672
Diffstat (limited to 'mlir/lib/Conversion/GPUToCUDA')
-rw-r--r--mlir/lib/Conversion/GPUToCUDA/ConvertKernelFuncToCubin.cpp47
-rw-r--r--mlir/lib/Conversion/GPUToCUDA/GenerateCubinAccessors.cpp54
2 files changed, 39 insertions, 62 deletions
diff --git a/mlir/lib/Conversion/GPUToCUDA/ConvertKernelFuncToCubin.cpp b/mlir/lib/Conversion/GPUToCUDA/ConvertKernelFuncToCubin.cpp
index aa1711e3f8e..a69fe81b0d3 100644
--- a/mlir/lib/Conversion/GPUToCUDA/ConvertKernelFuncToCubin.cpp
+++ b/mlir/lib/Conversion/GPUToCUDA/ConvertKernelFuncToCubin.cpp
@@ -49,37 +49,26 @@ namespace {
// TODO(herhut): Move to shared location.
static constexpr const char *kCubinAnnotation = "nvvm.cubin";
-/// A pass converting tagged kernel modules to cubin blobs.
-///
-/// If tagged as a kernel module, each contained function is translated to NVVM
-/// IR and further to PTX. A user provided CubinGenerator compiles the PTX to
-/// GPU binary code, which is then attached as an attribute to the function. The
-/// function body is erased.
+/// A pass converting tagged kernel functions to cubin blobs.
class GpuKernelToCubinPass : public ModulePass<GpuKernelToCubinPass> {
public:
GpuKernelToCubinPass(
CubinGenerator cubinGenerator = compilePtxToCubinForTesting)
: cubinGenerator(cubinGenerator) {}
+ // Run the dialect converter on the module.
void runOnModule() override {
- if (!getModule().getAttrOfType<UnitAttr>(
- gpu::GPUDialect::getKernelModuleAttrName()))
- return;
-
// Make sure the NVPTX target is initialized.
LLVMInitializeNVPTXTarget();
LLVMInitializeNVPTXTargetInfo();
LLVMInitializeNVPTXTargetMC();
LLVMInitializeNVPTXAsmPrinter();
- auto llvmModule = translateModuleToNVVMIR(getModule());
- if (!llvmModule)
- return signalPassFailure();
-
for (auto function : getModule().getOps<FuncOp>()) {
- if (!gpu::GPUDialect::isKernel(function))
+ if (!gpu::GPUDialect::isKernel(function) || function.isExternal()) {
continue;
- if (failed(translateGpuKernelToCubinAnnotation(*llvmModule, function)))
+ }
+ if (failed(translateGpuKernelToCubinAnnotation(function)))
signalPassFailure();
}
}
@@ -90,13 +79,8 @@ private:
std::string translateModuleToPtx(llvm::Module &module,
llvm::TargetMachine &target_machine);
-
- /// Converts llvmModule to cubin using the user-provded generator.
OwnedCubin convertModuleToCubin(llvm::Module &llvmModule, FuncOp &function);
-
- /// Translates llvmModule to cubin and assigns it to attribute of function.
- LogicalResult translateGpuKernelToCubinAnnotation(llvm::Module &llvmModule,
- FuncOp &function);
+ LogicalResult translateGpuKernelToCubinAnnotation(FuncOp &function);
CubinGenerator cubinGenerator;
};
@@ -151,13 +135,22 @@ OwnedCubin GpuKernelToCubinPass::convertModuleToCubin(llvm::Module &llvmModule,
return cubinGenerator(ptx, function);
}
-LogicalResult GpuKernelToCubinPass::translateGpuKernelToCubinAnnotation(
- llvm::Module &llvmModule, FuncOp &function) {
- auto cubin = convertModuleToCubin(llvmModule, function);
- if (!cubin)
+LogicalResult
+GpuKernelToCubinPass::translateGpuKernelToCubinAnnotation(FuncOp &function) {
+ Builder builder(function.getContext());
+
+ OwningModuleRef module = ModuleOp::create(function.getLoc());
+
+ // TODO(herhut): Also handle called functions.
+ module->push_back(function.clone());
+
+ auto llvmModule = translateModuleToNVVMIR(*module);
+ auto cubin = convertModuleToCubin(*llvmModule, function);
+
+ if (!cubin) {
return function.emitError("translation to CUDA binary failed.");
+ }
- Builder builder(function.getContext());
function.setAttr(kCubinAnnotation,
builder.getStringAttr({cubin->data(), cubin->size()}));
diff --git a/mlir/lib/Conversion/GPUToCUDA/GenerateCubinAccessors.cpp b/mlir/lib/Conversion/GPUToCUDA/GenerateCubinAccessors.cpp
index 83c3538324b..f8c6f5d15ff 100644
--- a/mlir/lib/Conversion/GPUToCUDA/GenerateCubinAccessors.cpp
+++ b/mlir/lib/Conversion/GPUToCUDA/GenerateCubinAccessors.cpp
@@ -43,15 +43,8 @@ constexpr const char *kCubinGetterAnnotation = "nvvm.cubingetter";
constexpr const char *kCubinGetterSuffix = "_cubin";
constexpr const char *kCubinStorageSuffix = "_cubin_cst";
-/// A pass which moves cubin from function attributes in nested modules
-/// to global strings and generates getter functions.
-///
-/// The GpuKernelToCubinPass annotates kernels functions with compiled device
-/// code blobs. These functions reside in nested modules generated by
-/// GpuKernelOutliningPass. This pass consumes these modules and moves the cubin
-/// blobs back to the parent module as global strings and generates accessor
-/// functions for them. The external kernel functions (also generated by the
-/// outlining pass) are annotated with the symbol of the cubin accessor.
+/// A pass generating global strings and getter functions for all cubin blobs
+/// annotated on functions via the nvvm.cubin attribute.
class GpuGenerateCubinAccessorsPass
: public ModulePass<GpuGenerateCubinAccessorsPass> {
private:
@@ -62,25 +55,18 @@ private:
}
// Inserts a global constant string containing `blob` into the parent module
- // of `kernelFunc` and generates the function that returns the address of the
- // first character of this string.
+ // of `orig` and generates the function that returns the address of the first
+ // character of this string.
// TODO(herhut): consider fusing this pass with launch-func-to-cuda.
- void generate(FuncOp kernelFunc, StringAttr blob) {
- auto stubFunc = getModule().lookupSymbol<FuncOp>(kernelFunc.getName());
- if (!stubFunc) {
- kernelFunc.emitError(
- "corresponding external function not found in parent module");
- return signalPassFailure();
- }
-
- Location loc = stubFunc.getLoc();
- SmallString<128> nameBuffer(stubFunc.getName());
- auto module = stubFunc.getParentOfType<ModuleOp>();
+ void generate(FuncOp orig, StringAttr blob) {
+ Location loc = orig.getLoc();
+ SmallString<128> nameBuffer(orig.getName());
+ auto module = orig.getParentOfType<ModuleOp>();
assert(module && "function must belong to a module");
// Insert the getter function just after the original function.
OpBuilder moduleBuilder(module.getBody(), module.getBody()->begin());
- moduleBuilder.setInsertionPoint(stubFunc.getOperation()->getNextNode());
+ moduleBuilder.setInsertionPoint(orig.getOperation()->getNextNode());
auto getterType = moduleBuilder.getFunctionType(
llvm::None, LLVM::LLVMType::getInt8PtrTy(llvmDialect));
nameBuffer.append(kCubinGetterSuffix);
@@ -89,7 +75,7 @@ private:
Block *entryBlock = result.addEntryBlock();
// Drop the getter suffix before appending the storage suffix.
- nameBuffer.resize(stubFunc.getName().size());
+ nameBuffer.resize(orig.getName().size());
nameBuffer.append(kCubinStorageSuffix);
// Obtain the address of the first character of the global string containing
@@ -100,23 +86,21 @@ private:
builder.create<LLVM::ReturnOp>(loc, startPtr);
// Store the name of the getter on the function for easier lookup.
- stubFunc.setAttr(kCubinGetterAnnotation, builder.getSymbolRefAttr(result));
+ orig.setAttr(kCubinGetterAnnotation, builder.getSymbolRefAttr(result));
}
public:
+ // Perform the conversion on the module. This may insert globals, so it
+ // cannot be done on multiple functions in parallel.
void runOnModule() override {
- llvmDialect = getContext().getRegisteredDialect<LLVM::LLVMDialect>();
+ llvmDialect =
+ getModule().getContext()->getRegisteredDialect<LLVM::LLVMDialect>();
- auto modules = getModule().getOps<ModuleOp>();
- for (auto module : llvm::make_early_inc_range(modules)) {
- if (!module.getAttrOfType<UnitAttr>(
- gpu::GPUDialect::getKernelModuleAttrName()))
+ for (auto func : getModule().getOps<FuncOp>()) {
+ StringAttr cubinBlob = func.getAttrOfType<StringAttr>(kCubinAnnotation);
+ if (!cubinBlob)
continue;
- for (auto func : module.getOps<FuncOp>()) {
- if (StringAttr blob = func.getAttrOfType<StringAttr>(kCubinAnnotation))
- generate(func, blob);
- }
- module.erase();
+ generate(func, cubinBlob);
}
}
OpenPOWER on IntegriCloud