summaryrefslogtreecommitdiffstats
path: root/mlir/test/Conversion/GPUToCUDA
diff options
context:
space:
mode:
authorAlex Zinenko <zinenko@google.com>2019-10-08 04:29:58 -0700
committerA. Unique TensorFlower <gardener@tensorflow.org>2019-10-08 04:30:32 -0700
commit90d65d32d69ca46f52a9a744eafdad0d97b4a185 (patch)
tree405063572a964c854039bbb6d1554cae98b7bd0a /mlir/test/Conversion/GPUToCUDA
parent780f107a57113706a4551e32c32fcd60006d9263 (diff)
downloadbcm5719-llvm-90d65d32d69ca46f52a9a744eafdad0d97b4a185.tar.gz
bcm5719-llvm-90d65d32d69ca46f52a9a744eafdad0d97b4a185.zip
Use named modules for gpu.launch_func
The kernel function called by gpu.launch_func is now placed into an isolated nested module during the outlining stage to simplify separate compilation. Until recently, modules did not have names and could not be referenced. This limitation was circumvented by introducing a stub kernel at the same name at the same nesting level as the module containing the actual kernel. This relation is only effective in one direction: from actual kernel function to its launch_func "caller". Leverage the recently introduced symbol name attributes on modules to refer to a specific nested module from `gpu.launch_func`. This removes the implicit connection between the identically named stub and kernel functions. It also enables support for `gpu.launch_func`s to call different kernels located in the same module. PiperOrigin-RevId: 273491891
Diffstat (limited to 'mlir/test/Conversion/GPUToCUDA')
-rw-r--r--mlir/test/Conversion/GPUToCUDA/insert-cubin-getter.mlir17
-rw-r--r--mlir/test/Conversion/GPUToCUDA/lower-launch-func-to-cuda.mlir42
2 files changed, 33 insertions, 26 deletions
diff --git a/mlir/test/Conversion/GPUToCUDA/insert-cubin-getter.mlir b/mlir/test/Conversion/GPUToCUDA/insert-cubin-getter.mlir
index 9e0907f7477..ef58433bdef 100644
--- a/mlir/test/Conversion/GPUToCUDA/insert-cubin-getter.mlir
+++ b/mlir/test/Conversion/GPUToCUDA/insert-cubin-getter.mlir
@@ -1,15 +1,15 @@
// RUN: mlir-opt %s --generate-cubin-accessors | FileCheck %s
-// CHECK: llvm.mlir.global constant @[[global:.*]]("CUBIN")
+module attributes {gpu.container_module} {
-module attributes {gpu.kernel_module} {
- func @kernel(!llvm.float, !llvm<"float*">)
- attributes {nvvm.cubin = "CUBIN"}
-}
+// CHECK: llvm.mlir.global constant @[[global:.*]]("CUBIN")
-func @kernel(!llvm.float, !llvm<"float*">)
-// CHECK: attributes {gpu.kernel, nvvm.cubingetter = @[[getter:.*]]}
- attributes {gpu.kernel}
+ module attributes {gpu.kernel_module} {
+ // CHECK-LABEL: func @kernel
+ func @kernel(!llvm.float, !llvm<"float*">)
+ // CHECK: attributes {nvvm.cubingetter = @[[getter:.*]]}
+ attributes {nvvm.cubin = "CUBIN"}
+ }
// CHECK: func @[[getter]]() -> !llvm<"i8*">
// CHECK: %[[addressof:.*]] = llvm.mlir.addressof @[[global]]
@@ -17,3 +17,4 @@ func @kernel(!llvm.float, !llvm<"float*">)
// CHECK: %[[gep:.*]] = llvm.getelementptr %[[addressof]][%[[c0]], %[[c0]]]
// CHECK-SAME: -> !llvm<"i8*">
// CHECK: llvm.return %[[gep]] : !llvm<"i8*">
+}
diff --git a/mlir/test/Conversion/GPUToCUDA/lower-launch-func-to-cuda.mlir b/mlir/test/Conversion/GPUToCUDA/lower-launch-func-to-cuda.mlir
index bc843e3595b..a4ff3c97cbf 100644
--- a/mlir/test/Conversion/GPUToCUDA/lower-launch-func-to-cuda.mlir
+++ b/mlir/test/Conversion/GPUToCUDA/lower-launch-func-to-cuda.mlir
@@ -1,27 +1,33 @@
// RUN: mlir-opt %s --launch-func-to-cuda | FileCheck %s
-// CHECK: llvm.mlir.global constant @[[kernel_name:.*]]("kernel\00")
+module attributes {gpu.container_module} {
-func @cubin_getter() -> !llvm<"i8*">
+ // CHECK: llvm.mlir.global constant @[[kernel_name:.*]]("kernel\00")
-func @kernel(!llvm.float, !llvm<"float*">)
- attributes { gpu.kernel, nvvm.cubingetter = @cubin_getter }
+ func @cubin_getter() -> !llvm<"i8*">
+ module @kernel_module attributes {gpu.kernel_module} {
+ func @kernel(!llvm.float, !llvm<"float*">)
+ attributes { gpu.kernel, nvvm.cubingetter = @cubin_getter }
+ }
-func @foo() {
- %0 = "op"() : () -> (!llvm.float)
- %1 = "op"() : () -> (!llvm<"float*">)
- %cst = constant 8 : index
- // CHECK: [[module_ptr:%.*]] = llvm.alloca {{.*}} x !llvm<"i8*"> : (!llvm.i32) -> !llvm<"i8**">
- // CHECK: llvm.call @mcuModuleLoad([[module_ptr]], {{.*}}) : (!llvm<"i8**">, !llvm<"i8*">) -> !llvm.i32
- // CHECK: [[func_ptr:%.*]] = llvm.alloca {{.*}} x !llvm<"i8*"> : (!llvm.i32) -> !llvm<"i8**">
- // CHECK: llvm.call @mcuModuleGetFunction([[func_ptr]], {{.*}}, {{.*}}) : (!llvm<"i8**">, !llvm<"i8*">, !llvm<"i8*">) -> !llvm.i32
- // CHECK: llvm.call @mcuGetStreamHelper
- // CHECK: llvm.call @mcuLaunchKernel
- // CHECK: llvm.call @mcuStreamSynchronize
- "gpu.launch_func"(%cst, %cst, %cst, %cst, %cst, %cst, %0, %1) { kernel = @kernel }
- : (index, index, index, index, index, index, !llvm.float, !llvm<"float*">) -> ()
+ func @foo() {
+ %0 = "op"() : () -> (!llvm.float)
+ %1 = "op"() : () -> (!llvm<"float*">)
+ %cst = constant 8 : index
+
+ // CHECK: [[module_ptr:%.*]] = llvm.alloca {{.*}} x !llvm<"i8*"> : (!llvm.i32) -> !llvm<"i8**">
+ // CHECK: llvm.call @mcuModuleLoad([[module_ptr]], {{.*}}) : (!llvm<"i8**">, !llvm<"i8*">) -> !llvm.i32
+ // CHECK: [[func_ptr:%.*]] = llvm.alloca {{.*}} x !llvm<"i8*"> : (!llvm.i32) -> !llvm<"i8**">
+ // CHECK: llvm.call @mcuModuleGetFunction([[func_ptr]], {{.*}}, {{.*}}) : (!llvm<"i8**">, !llvm<"i8*">, !llvm<"i8*">) -> !llvm.i32
+ // CHECK: llvm.call @mcuGetStreamHelper
+ // CHECK: llvm.call @mcuLaunchKernel
+ // CHECK: llvm.call @mcuStreamSynchronize
+ "gpu.launch_func"(%cst, %cst, %cst, %cst, %cst, %cst, %0, %1) { kernel = "kernel", kernel_module = @kernel_module }
+ : (index, index, index, index, index, index, !llvm.float, !llvm<"float*">) -> ()
+
+ return
+ }
- return
}
OpenPOWER on IntegriCloud