Use named modules for gpu.launch_func

The kernel function called by gpu.launch_func is now placed into an isolated nested module during the outlining stage to simplify separate compilation. Until recently, modules did not have names and could not be referenced. This limitation was circumvented by introducing a stub kernel at the same name at the same nesting level as the module containing the actual kernel. This relation is only effective in one direction: from actual kernel function to its launch_func "caller". Leverage the recently introduced symbol name attributes on modules to refer to a specific nested module from `gpu.launch_func`. This removes the implicit connection between the identically named stub and kernel functions. It also enables support for `gpu.launch_func`s to call different kernels located in the same module. PiperOrigin-RevId: 273491891
author: Alex Zinenko <zinenko@google.com> 2019-10-08 04:29:58 -0700
committer: A. Unique TensorFlower <gardener@tensorflow.org> 2019-10-08 04:30:32 -0700
commit: 90d65d32d69ca46f52a9a744eafdad0d97b4a185 (patch)
tree: 405063572a964c854039bbb6d1554cae98b7bd0a /mlir/test/Conversion/GPUToCUDA
parent: 780f107a57113706a4551e32c32fcd60006d9263 (diff)
download: bcm5719-llvm-90d65d32d69ca46f52a9a744eafdad0d97b4a185.tar.gz
bcm5719-llvm-90d65d32d69ca46f52a9a744eafdad0d97b4a185.zip
2 files changed, 33 insertions, 26 deletions
diff --git a/mlir/test/Conversion/GPUToCUDA/insert-cubin-getter.mlir b/mlir/test/Conversion/GPUToCUDA/insert-cubin-getter.mlir
index 9e0907f7477..ef58433bdef 100644
--- a/mlir/test/Conversion/GPUToCUDA/insert-cubin-getter.mlir
+++ b/mlir/test/Conversion/GPUToCUDA/insert-cubin-getter.mlir
@@ -1,15 +1,15 @@
 // RUN: mlir-opt %s --generate-cubin-accessors | FileCheck %s
 
-// CHECK: llvm.mlir.global constant @[[global:.*]]("CUBIN")
+module attributes {gpu.container_module} {
 
-module attributes {gpu.kernel_module} {
-  func @kernel(!llvm.float, !llvm<"float*">)
-  attributes  {nvvm.cubin = "CUBIN"}
-}
+// CHECK: llvm.mlir.global constant @[[global:.*]]("CUBIN")
 
-func @kernel(!llvm.float, !llvm<"float*">)
-// CHECK: attributes  {gpu.kernel, nvvm.cubingetter = @[[getter:.*]]}
-  attributes  {gpu.kernel}
+  module attributes {gpu.kernel_module} {
+    // CHECK-LABEL: func @kernel
+    func @kernel(!llvm.float, !llvm<"float*">)
+    // CHECK: attributes  {nvvm.cubingetter = @[[getter:.*]]}
+    attributes  {nvvm.cubin = "CUBIN"}
+  }
 
 // CHECK: func @[[getter]]() -> !llvm<"i8*">
 // CHECK: %[[addressof:.*]] = llvm.mlir.addressof @[[global]]
@@ -17,3 +17,4 @@ func @kernel(!llvm.float, !llvm<"float*">)
 // CHECK: %[[gep:.*]] = llvm.getelementptr %[[addressof]][%[[c0]], %[[c0]]]
 // CHECK-SAME: -> !llvm<"i8*">
 // CHECK: llvm.return %[[gep]] : !llvm<"i8*">
+}
diff --git a/mlir/test/Conversion/GPUToCUDA/lower-launch-func-to-cuda.mlir b/mlir/test/Conversion/GPUToCUDA/lower-launch-func-to-cuda.mlir
index bc843e3595b..a4ff3c97cbf 100644
--- a/mlir/test/Conversion/GPUToCUDA/lower-launch-func-to-cuda.mlir
+++ b/mlir/test/Conversion/GPUToCUDA/lower-launch-func-to-cuda.mlir
@@ -1,27 +1,33 @@
 // RUN: mlir-opt %s --launch-func-to-cuda | FileCheck %s
 
-// CHECK: llvm.mlir.global constant @[[kernel_name:.*]]("kernel\00")
+module attributes {gpu.container_module} {
 
-func @cubin_getter() -> !llvm<"i8*">
+  // CHECK: llvm.mlir.global constant @[[kernel_name:.*]]("kernel\00")
 
-func @kernel(!llvm.float, !llvm<"float*">)
-    attributes { gpu.kernel, nvvm.cubingetter = @cubin_getter }
+  func @cubin_getter() -> !llvm<"i8*">
 
+  module @kernel_module attributes {gpu.kernel_module} {
+    func @kernel(!llvm.float, !llvm<"float*">)
+        attributes { gpu.kernel, nvvm.cubingetter = @cubin_getter }
+  }
 
-func @foo() {
-  %0 = "op"() : () -> (!llvm.float)
-  %1 = "op"() : () -> (!llvm<"float*">)
-  %cst = constant 8 : index
 
-  // CHECK: [[module_ptr:%.*]] = llvm.alloca {{.*}} x !llvm<"i8*"> : (!llvm.i32) -> !llvm<"i8**">
-  // CHECK: llvm.call @mcuModuleLoad([[module_ptr]], {{.*}}) : (!llvm<"i8**">, !llvm<"i8*">) -> !llvm.i32
-  // CHECK: [[func_ptr:%.*]] = llvm.alloca {{.*}} x !llvm<"i8*"> : (!llvm.i32) -> !llvm<"i8**">
-  // CHECK: llvm.call @mcuModuleGetFunction([[func_ptr]], {{.*}}, {{.*}}) : (!llvm<"i8**">, !llvm<"i8*">, !llvm<"i8*">) -> !llvm.i32
-  // CHECK: llvm.call @mcuGetStreamHelper
-  // CHECK: llvm.call @mcuLaunchKernel
-  // CHECK: llvm.call @mcuStreamSynchronize
-  "gpu.launch_func"(%cst, %cst, %cst, %cst, %cst, %cst, %0, %1) { kernel = @kernel }
-      : (index, index, index, index, index, index, !llvm.float, !llvm<"float*">) -> ()
+  func @foo() {
+    %0 = "op"() : () -> (!llvm.float)
+    %1 = "op"() : () -> (!llvm<"float*">)
+    %cst = constant 8 : index
+
+    // CHECK: [[module_ptr:%.*]] = llvm.alloca {{.*}} x !llvm<"i8*"> : (!llvm.i32) -> !llvm<"i8**">
+    // CHECK: llvm.call @mcuModuleLoad([[module_ptr]], {{.*}}) : (!llvm<"i8**">, !llvm<"i8*">) -> !llvm.i32
+    // CHECK: [[func_ptr:%.*]] = llvm.alloca {{.*}} x !llvm<"i8*"> : (!llvm.i32) -> !llvm<"i8**">
+    // CHECK: llvm.call @mcuModuleGetFunction([[func_ptr]], {{.*}}, {{.*}}) : (!llvm<"i8**">, !llvm<"i8*">, !llvm<"i8*">) -> !llvm.i32
+    // CHECK: llvm.call @mcuGetStreamHelper
+    // CHECK: llvm.call @mcuLaunchKernel
+    // CHECK: llvm.call @mcuStreamSynchronize
+    "gpu.launch_func"(%cst, %cst, %cst, %cst, %cst, %cst, %0, %1) { kernel = "kernel", kernel_module = @kernel_module }
+        : (index, index, index, index, index, index, !llvm.float, !llvm<"float*">) -> ()
+
+    return
+  }
 
-  return
 }
author	Alex Zinenko <zinenko@google.com>	2019-10-08 04:29:58 -0700
committer	A. Unique TensorFlower <gardener@tensorflow.org>	2019-10-08 04:30:32 -0700
commit	90d65d32d69ca46f52a9a744eafdad0d97b4a185 (patch)
tree	405063572a964c854039bbb6d1554cae98b7bd0a /mlir/test/Conversion/GPUToCUDA
parent	780f107a57113706a4551e32c32fcd60006d9263 (diff)
download	bcm5719-llvm-90d65d32d69ca46f52a9a744eafdad0d97b4a185.tar.gz bcm5719-llvm-90d65d32d69ca46f52a9a744eafdad0d97b4a185.zip