Outline GPU kernel function into a nested module.

Roll forward of commit 5684a12. When outlining GPU kernels, put the kernel function inside a nested module. Then use a nested pipeline to generate the cubins, independently per kernel. In a final pass, move the cubins back to the parent module. PiperOrigin-RevId: 270639748
author: Christian Sigg <csigg@google.com> 2019-09-23 03:16:23 -0700
committer: A. Unique TensorFlower <gardener@tensorflow.org> 2019-09-23 03:17:01 -0700
commit: b8676da1fc5e7d371a76defbd1b4a93a41fe2e33 (patch)
tree: 2ebd739e95dfea1c52397e752debbb5d13952a52 /mlir/lib/Dialect/GPU/Transforms
parent: c900d4994e609ac0f4730d0cdd918eae958e9d28 (diff)
download: bcm5719-llvm-b8676da1fc5e7d371a76defbd1b4a93a41fe2e33.tar.gz
bcm5719-llvm-b8676da1fc5e7d371a76defbd1b4a93a41fe2e33.zip
1 files changed, 33 insertions, 4 deletions
diff --git a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
index 4328fb39c29..9bf4cf6e643 100644
--- a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
@@ -93,7 +93,7 @@ static gpu::LaunchFuncOp inlineConstants(FuncOp kernelFunc,
 }
 
 // Outline the `gpu.launch` operation body into a kernel function. Replace
-// `gpu.return` operations by `std.return` in the generated functions.
+// `gpu.return` operations by `std.return` in the generated function.
 static FuncOp outlineKernelFunc(gpu::LaunchOp launchOp) {
   Location loc = launchOp.getLoc();
   SmallVector<Type, 4> kernelOperandTypes(launchOp.getKernelOperandTypes());
@@ -107,7 +107,7 @@ static FuncOp outlineKernelFunc(gpu::LaunchOp launchOp) {
   outlinedFunc.setAttr(gpu::GPUDialect::getKernelFuncAttrName(),
                        builder.getUnitAttr());
   injectGpuIndexOperations(loc, outlinedFunc);
-  outlinedFunc.walk([](mlir::gpu::Return op) {
+  outlinedFunc.walk([](gpu::Return op) {
     OpBuilder replacer(op);
     replacer.create<ReturnOp>(op.getLoc());
     op.erase();
@@ -131,15 +131,44 @@ static void convertToLaunchFuncOp(gpu::LaunchOp &launchOp, FuncOp kernelFunc) {
 
 namespace {
 
+/// Pass that moves the kernel of each LaunchOp into its separate nested module.
+///
+/// This pass moves the kernel code of each LaunchOp into a function created
+/// inside a nested module. It also creates an external function of the same
+/// name in the parent module.
+///
+/// The kernel modules are intended to be compiled to a cubin blob independently
+/// in a separate pass. The external functions can then be annotated with the
+/// symbol of the cubin accessor function.
 class GpuKernelOutliningPass : public ModulePass<GpuKernelOutliningPass> {
 public:
   void runOnModule() override {
     ModuleManager moduleManager(getModule());
+    auto context = getModule().getContext();
+    Builder builder(context);
     for (auto func : getModule().getOps<FuncOp>()) {
-      func.walk([&](mlir::gpu::LaunchOp op) {
+      // Insert just after the function.
+      Block::iterator insertPt(func.getOperation()->getNextNode());
+      func.walk([&](gpu::LaunchOp op) {
+        // TODO(b/141098412): Handle called functions and globals.
         FuncOp outlinedFunc = outlineKernelFunc(op);
-        moduleManager.insert(outlinedFunc);
+
+        // Potentially renames outlinedFunc to make symbol unique.
+        moduleManager.insert(insertPt, outlinedFunc);
+
+        // Potentially changes signature, pulling in constants.
         convertToLaunchFuncOp(op, outlinedFunc);
+
+        // Create clone and move body from outlinedFunc.
+        auto kernelFunc = outlinedFunc.cloneWithoutRegions();
+        kernelFunc.getBody().takeBody(outlinedFunc.getBody());
+
+        // Create nested module and insert kernelFunc.
+        auto kernelModule = ModuleOp::create(UnknownLoc::get(context));
+        kernelModule.setAttr(gpu::GPUDialect::getKernelModuleAttrName(),
+                             builder.getUnitAttr());
+        kernelModule.push_back(kernelFunc);
+        getModule().insert(insertPt, kernelModule);
       });
     }
   }
author	Christian Sigg <csigg@google.com>	2019-09-23 03:16:23 -0700
committer	A. Unique TensorFlower <gardener@tensorflow.org>	2019-09-23 03:17:01 -0700
commit	b8676da1fc5e7d371a76defbd1b4a93a41fe2e33 (patch)
tree	2ebd739e95dfea1c52397e752debbb5d13952a52 /mlir/lib/Dialect/GPU/Transforms
parent	c900d4994e609ac0f4730d0cdd918eae958e9d28 (diff)
download	bcm5719-llvm-b8676da1fc5e7d371a76defbd1b4a93a41fe2e33.tar.gz bcm5719-llvm-b8676da1fc5e7d371a76defbd1b4a93a41fe2e33.zip