diff options
| author | Christian Sigg <csigg@google.com> | 2019-09-23 03:16:23 -0700 |
|---|---|---|
| committer | A. Unique TensorFlower <gardener@tensorflow.org> | 2019-09-23 03:17:01 -0700 |
| commit | b8676da1fc5e7d371a76defbd1b4a93a41fe2e33 (patch) | |
| tree | 2ebd739e95dfea1c52397e752debbb5d13952a52 /mlir/lib/Dialect/GPU/Transforms | |
| parent | c900d4994e609ac0f4730d0cdd918eae958e9d28 (diff) | |
| download | bcm5719-llvm-b8676da1fc5e7d371a76defbd1b4a93a41fe2e33.tar.gz bcm5719-llvm-b8676da1fc5e7d371a76defbd1b4a93a41fe2e33.zip | |
Outline GPU kernel function into a nested module.
Roll forward of commit 5684a12.
When outlining GPU kernels, put the kernel function inside a nested module. Then use a nested pipeline to generate the cubins, independently per kernel. In a final pass, move the cubins back to the parent module.
PiperOrigin-RevId: 270639748
Diffstat (limited to 'mlir/lib/Dialect/GPU/Transforms')
| -rw-r--r-- | mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp | 37 |
1 files changed, 33 insertions, 4 deletions
diff --git a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp index 4328fb39c29..9bf4cf6e643 100644 --- a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp @@ -93,7 +93,7 @@ static gpu::LaunchFuncOp inlineConstants(FuncOp kernelFunc, } // Outline the `gpu.launch` operation body into a kernel function. Replace -// `gpu.return` operations by `std.return` in the generated functions. +// `gpu.return` operations by `std.return` in the generated function. static FuncOp outlineKernelFunc(gpu::LaunchOp launchOp) { Location loc = launchOp.getLoc(); SmallVector<Type, 4> kernelOperandTypes(launchOp.getKernelOperandTypes()); @@ -107,7 +107,7 @@ static FuncOp outlineKernelFunc(gpu::LaunchOp launchOp) { outlinedFunc.setAttr(gpu::GPUDialect::getKernelFuncAttrName(), builder.getUnitAttr()); injectGpuIndexOperations(loc, outlinedFunc); - outlinedFunc.walk([](mlir::gpu::Return op) { + outlinedFunc.walk([](gpu::Return op) { OpBuilder replacer(op); replacer.create<ReturnOp>(op.getLoc()); op.erase(); @@ -131,15 +131,44 @@ static void convertToLaunchFuncOp(gpu::LaunchOp &launchOp, FuncOp kernelFunc) { namespace { +/// Pass that moves the kernel of each LaunchOp into its separate nested module. +/// +/// This pass moves the kernel code of each LaunchOp into a function created +/// inside a nested module. It also creates an external function of the same +/// name in the parent module. +/// +/// The kernel modules are intended to be compiled to a cubin blob independently +/// in a separate pass. The external functions can then be annotated with the +/// symbol of the cubin accessor function. class GpuKernelOutliningPass : public ModulePass<GpuKernelOutliningPass> { public: void runOnModule() override { ModuleManager moduleManager(getModule()); + auto context = getModule().getContext(); + Builder builder(context); for (auto func : getModule().getOps<FuncOp>()) { - func.walk([&](mlir::gpu::LaunchOp op) { + // Insert just after the function. + Block::iterator insertPt(func.getOperation()->getNextNode()); + func.walk([&](gpu::LaunchOp op) { + // TODO(b/141098412): Handle called functions and globals. FuncOp outlinedFunc = outlineKernelFunc(op); - moduleManager.insert(outlinedFunc); + + // Potentially renames outlinedFunc to make symbol unique. + moduleManager.insert(insertPt, outlinedFunc); + + // Potentially changes signature, pulling in constants. convertToLaunchFuncOp(op, outlinedFunc); + + // Create clone and move body from outlinedFunc. + auto kernelFunc = outlinedFunc.cloneWithoutRegions(); + kernelFunc.getBody().takeBody(outlinedFunc.getBody()); + + // Create nested module and insert kernelFunc. + auto kernelModule = ModuleOp::create(UnknownLoc::get(context)); + kernelModule.setAttr(gpu::GPUDialect::getKernelModuleAttrName(), + builder.getUnitAttr()); + kernelModule.push_back(kernelFunc); + getModule().insert(insertPt, kernelModule); }); } } |

