diff options
| author | George Karpenkov <cheshire@google.com> | 2019-09-19 14:33:54 -0700 |
|---|---|---|
| committer | A. Unique TensorFlower <gardener@tensorflow.org> | 2019-09-19 14:34:30 -0700 |
| commit | 2df646bef6e7665fdb8523613d82e7d4a5013217 (patch) | |
| tree | 3bcbbee989db665c23fa9d9fef9131cfaf549e85 /mlir/lib/Conversion/GPUToCUDA | |
| parent | c8961d408ee0f48e3156e3c4248bba9a43579f1f (diff) | |
| download | bcm5719-llvm-2df646bef6e7665fdb8523613d82e7d4a5013217.tar.gz bcm5719-llvm-2df646bef6e7665fdb8523613d82e7d4a5013217.zip | |
Automated rollback of commit 5684a12434f923d03b6870f2aa16226bfb0b38b6
PiperOrigin-RevId: 270126672
Diffstat (limited to 'mlir/lib/Conversion/GPUToCUDA')
| -rw-r--r-- | mlir/lib/Conversion/GPUToCUDA/ConvertKernelFuncToCubin.cpp | 47 | ||||
| -rw-r--r-- | mlir/lib/Conversion/GPUToCUDA/GenerateCubinAccessors.cpp | 54 |
2 files changed, 39 insertions, 62 deletions
diff --git a/mlir/lib/Conversion/GPUToCUDA/ConvertKernelFuncToCubin.cpp b/mlir/lib/Conversion/GPUToCUDA/ConvertKernelFuncToCubin.cpp index aa1711e3f8e..a69fe81b0d3 100644 --- a/mlir/lib/Conversion/GPUToCUDA/ConvertKernelFuncToCubin.cpp +++ b/mlir/lib/Conversion/GPUToCUDA/ConvertKernelFuncToCubin.cpp @@ -49,37 +49,26 @@ namespace { // TODO(herhut): Move to shared location. static constexpr const char *kCubinAnnotation = "nvvm.cubin"; -/// A pass converting tagged kernel modules to cubin blobs. -/// -/// If tagged as a kernel module, each contained function is translated to NVVM -/// IR and further to PTX. A user provided CubinGenerator compiles the PTX to -/// GPU binary code, which is then attached as an attribute to the function. The -/// function body is erased. +/// A pass converting tagged kernel functions to cubin blobs. class GpuKernelToCubinPass : public ModulePass<GpuKernelToCubinPass> { public: GpuKernelToCubinPass( CubinGenerator cubinGenerator = compilePtxToCubinForTesting) : cubinGenerator(cubinGenerator) {} + // Run the dialect converter on the module. void runOnModule() override { - if (!getModule().getAttrOfType<UnitAttr>( - gpu::GPUDialect::getKernelModuleAttrName())) - return; - // Make sure the NVPTX target is initialized. LLVMInitializeNVPTXTarget(); LLVMInitializeNVPTXTargetInfo(); LLVMInitializeNVPTXTargetMC(); LLVMInitializeNVPTXAsmPrinter(); - auto llvmModule = translateModuleToNVVMIR(getModule()); - if (!llvmModule) - return signalPassFailure(); - for (auto function : getModule().getOps<FuncOp>()) { - if (!gpu::GPUDialect::isKernel(function)) + if (!gpu::GPUDialect::isKernel(function) || function.isExternal()) { continue; - if (failed(translateGpuKernelToCubinAnnotation(*llvmModule, function))) + } + if (failed(translateGpuKernelToCubinAnnotation(function))) signalPassFailure(); } } @@ -90,13 +79,8 @@ private: std::string translateModuleToPtx(llvm::Module &module, llvm::TargetMachine &target_machine); - - /// Converts llvmModule to cubin using the user-provded generator. OwnedCubin convertModuleToCubin(llvm::Module &llvmModule, FuncOp &function); - - /// Translates llvmModule to cubin and assigns it to attribute of function. - LogicalResult translateGpuKernelToCubinAnnotation(llvm::Module &llvmModule, - FuncOp &function); + LogicalResult translateGpuKernelToCubinAnnotation(FuncOp &function); CubinGenerator cubinGenerator; }; @@ -151,13 +135,22 @@ OwnedCubin GpuKernelToCubinPass::convertModuleToCubin(llvm::Module &llvmModule, return cubinGenerator(ptx, function); } -LogicalResult GpuKernelToCubinPass::translateGpuKernelToCubinAnnotation( - llvm::Module &llvmModule, FuncOp &function) { - auto cubin = convertModuleToCubin(llvmModule, function); - if (!cubin) +LogicalResult +GpuKernelToCubinPass::translateGpuKernelToCubinAnnotation(FuncOp &function) { + Builder builder(function.getContext()); + + OwningModuleRef module = ModuleOp::create(function.getLoc()); + + // TODO(herhut): Also handle called functions. + module->push_back(function.clone()); + + auto llvmModule = translateModuleToNVVMIR(*module); + auto cubin = convertModuleToCubin(*llvmModule, function); + + if (!cubin) { return function.emitError("translation to CUDA binary failed."); + } - Builder builder(function.getContext()); function.setAttr(kCubinAnnotation, builder.getStringAttr({cubin->data(), cubin->size()})); diff --git a/mlir/lib/Conversion/GPUToCUDA/GenerateCubinAccessors.cpp b/mlir/lib/Conversion/GPUToCUDA/GenerateCubinAccessors.cpp index 83c3538324b..f8c6f5d15ff 100644 --- a/mlir/lib/Conversion/GPUToCUDA/GenerateCubinAccessors.cpp +++ b/mlir/lib/Conversion/GPUToCUDA/GenerateCubinAccessors.cpp @@ -43,15 +43,8 @@ constexpr const char *kCubinGetterAnnotation = "nvvm.cubingetter"; constexpr const char *kCubinGetterSuffix = "_cubin"; constexpr const char *kCubinStorageSuffix = "_cubin_cst"; -/// A pass which moves cubin from function attributes in nested modules -/// to global strings and generates getter functions. -/// -/// The GpuKernelToCubinPass annotates kernels functions with compiled device -/// code blobs. These functions reside in nested modules generated by -/// GpuKernelOutliningPass. This pass consumes these modules and moves the cubin -/// blobs back to the parent module as global strings and generates accessor -/// functions for them. The external kernel functions (also generated by the -/// outlining pass) are annotated with the symbol of the cubin accessor. +/// A pass generating global strings and getter functions for all cubin blobs +/// annotated on functions via the nvvm.cubin attribute. class GpuGenerateCubinAccessorsPass : public ModulePass<GpuGenerateCubinAccessorsPass> { private: @@ -62,25 +55,18 @@ private: } // Inserts a global constant string containing `blob` into the parent module - // of `kernelFunc` and generates the function that returns the address of the - // first character of this string. + // of `orig` and generates the function that returns the address of the first + // character of this string. // TODO(herhut): consider fusing this pass with launch-func-to-cuda. - void generate(FuncOp kernelFunc, StringAttr blob) { - auto stubFunc = getModule().lookupSymbol<FuncOp>(kernelFunc.getName()); - if (!stubFunc) { - kernelFunc.emitError( - "corresponding external function not found in parent module"); - return signalPassFailure(); - } - - Location loc = stubFunc.getLoc(); - SmallString<128> nameBuffer(stubFunc.getName()); - auto module = stubFunc.getParentOfType<ModuleOp>(); + void generate(FuncOp orig, StringAttr blob) { + Location loc = orig.getLoc(); + SmallString<128> nameBuffer(orig.getName()); + auto module = orig.getParentOfType<ModuleOp>(); assert(module && "function must belong to a module"); // Insert the getter function just after the original function. OpBuilder moduleBuilder(module.getBody(), module.getBody()->begin()); - moduleBuilder.setInsertionPoint(stubFunc.getOperation()->getNextNode()); + moduleBuilder.setInsertionPoint(orig.getOperation()->getNextNode()); auto getterType = moduleBuilder.getFunctionType( llvm::None, LLVM::LLVMType::getInt8PtrTy(llvmDialect)); nameBuffer.append(kCubinGetterSuffix); @@ -89,7 +75,7 @@ private: Block *entryBlock = result.addEntryBlock(); // Drop the getter suffix before appending the storage suffix. - nameBuffer.resize(stubFunc.getName().size()); + nameBuffer.resize(orig.getName().size()); nameBuffer.append(kCubinStorageSuffix); // Obtain the address of the first character of the global string containing @@ -100,23 +86,21 @@ private: builder.create<LLVM::ReturnOp>(loc, startPtr); // Store the name of the getter on the function for easier lookup. - stubFunc.setAttr(kCubinGetterAnnotation, builder.getSymbolRefAttr(result)); + orig.setAttr(kCubinGetterAnnotation, builder.getSymbolRefAttr(result)); } public: + // Perform the conversion on the module. This may insert globals, so it + // cannot be done on multiple functions in parallel. void runOnModule() override { - llvmDialect = getContext().getRegisteredDialect<LLVM::LLVMDialect>(); + llvmDialect = + getModule().getContext()->getRegisteredDialect<LLVM::LLVMDialect>(); - auto modules = getModule().getOps<ModuleOp>(); - for (auto module : llvm::make_early_inc_range(modules)) { - if (!module.getAttrOfType<UnitAttr>( - gpu::GPUDialect::getKernelModuleAttrName())) + for (auto func : getModule().getOps<FuncOp>()) { + StringAttr cubinBlob = func.getAttrOfType<StringAttr>(kCubinAnnotation); + if (!cubinBlob) continue; - for (auto func : module.getOps<FuncOp>()) { - if (StringAttr blob = func.getAttrOfType<StringAttr>(kCubinAnnotation)) - generate(func, blob); - } - module.erase(); + generate(func, cubinBlob); } } |

