diff options
| author | Alex Zinenko <zinenko@google.com> | 2019-10-08 04:29:58 -0700 |
|---|---|---|
| committer | A. Unique TensorFlower <gardener@tensorflow.org> | 2019-10-08 04:30:32 -0700 |
| commit | 90d65d32d69ca46f52a9a744eafdad0d97b4a185 (patch) | |
| tree | 405063572a964c854039bbb6d1554cae98b7bd0a /mlir/test/Conversion/GPUToCUDA | |
| parent | 780f107a57113706a4551e32c32fcd60006d9263 (diff) | |
| download | bcm5719-llvm-90d65d32d69ca46f52a9a744eafdad0d97b4a185.tar.gz bcm5719-llvm-90d65d32d69ca46f52a9a744eafdad0d97b4a185.zip | |
Use named modules for gpu.launch_func
The kernel function called by gpu.launch_func is now placed into an isolated
nested module during the outlining stage to simplify separate compilation.
Until recently, modules did not have names and could not be referenced. This
limitation was circumvented by introducing a stub kernel at the same name at
the same nesting level as the module containing the actual kernel. This
relation is only effective in one direction: from actual kernel function to its
launch_func "caller".
Leverage the recently introduced symbol name attributes on modules to refer to
a specific nested module from `gpu.launch_func`. This removes the implicit
connection between the identically named stub and kernel functions. It also
enables support for `gpu.launch_func`s to call different kernels located in the
same module.
PiperOrigin-RevId: 273491891
Diffstat (limited to 'mlir/test/Conversion/GPUToCUDA')
| -rw-r--r-- | mlir/test/Conversion/GPUToCUDA/insert-cubin-getter.mlir | 17 | ||||
| -rw-r--r-- | mlir/test/Conversion/GPUToCUDA/lower-launch-func-to-cuda.mlir | 42 |
2 files changed, 33 insertions, 26 deletions
diff --git a/mlir/test/Conversion/GPUToCUDA/insert-cubin-getter.mlir b/mlir/test/Conversion/GPUToCUDA/insert-cubin-getter.mlir index 9e0907f7477..ef58433bdef 100644 --- a/mlir/test/Conversion/GPUToCUDA/insert-cubin-getter.mlir +++ b/mlir/test/Conversion/GPUToCUDA/insert-cubin-getter.mlir @@ -1,15 +1,15 @@ // RUN: mlir-opt %s --generate-cubin-accessors | FileCheck %s -// CHECK: llvm.mlir.global constant @[[global:.*]]("CUBIN") +module attributes {gpu.container_module} { -module attributes {gpu.kernel_module} { - func @kernel(!llvm.float, !llvm<"float*">) - attributes {nvvm.cubin = "CUBIN"} -} +// CHECK: llvm.mlir.global constant @[[global:.*]]("CUBIN") -func @kernel(!llvm.float, !llvm<"float*">) -// CHECK: attributes {gpu.kernel, nvvm.cubingetter = @[[getter:.*]]} - attributes {gpu.kernel} + module attributes {gpu.kernel_module} { + // CHECK-LABEL: func @kernel + func @kernel(!llvm.float, !llvm<"float*">) + // CHECK: attributes {nvvm.cubingetter = @[[getter:.*]]} + attributes {nvvm.cubin = "CUBIN"} + } // CHECK: func @[[getter]]() -> !llvm<"i8*"> // CHECK: %[[addressof:.*]] = llvm.mlir.addressof @[[global]] @@ -17,3 +17,4 @@ func @kernel(!llvm.float, !llvm<"float*">) // CHECK: %[[gep:.*]] = llvm.getelementptr %[[addressof]][%[[c0]], %[[c0]]] // CHECK-SAME: -> !llvm<"i8*"> // CHECK: llvm.return %[[gep]] : !llvm<"i8*"> +} diff --git a/mlir/test/Conversion/GPUToCUDA/lower-launch-func-to-cuda.mlir b/mlir/test/Conversion/GPUToCUDA/lower-launch-func-to-cuda.mlir index bc843e3595b..a4ff3c97cbf 100644 --- a/mlir/test/Conversion/GPUToCUDA/lower-launch-func-to-cuda.mlir +++ b/mlir/test/Conversion/GPUToCUDA/lower-launch-func-to-cuda.mlir @@ -1,27 +1,33 @@ // RUN: mlir-opt %s --launch-func-to-cuda | FileCheck %s -// CHECK: llvm.mlir.global constant @[[kernel_name:.*]]("kernel\00") +module attributes {gpu.container_module} { -func @cubin_getter() -> !llvm<"i8*"> + // CHECK: llvm.mlir.global constant @[[kernel_name:.*]]("kernel\00") -func @kernel(!llvm.float, !llvm<"float*">) - attributes { gpu.kernel, nvvm.cubingetter = @cubin_getter } + func @cubin_getter() -> !llvm<"i8*"> + module @kernel_module attributes {gpu.kernel_module} { + func @kernel(!llvm.float, !llvm<"float*">) + attributes { gpu.kernel, nvvm.cubingetter = @cubin_getter } + } -func @foo() { - %0 = "op"() : () -> (!llvm.float) - %1 = "op"() : () -> (!llvm<"float*">) - %cst = constant 8 : index - // CHECK: [[module_ptr:%.*]] = llvm.alloca {{.*}} x !llvm<"i8*"> : (!llvm.i32) -> !llvm<"i8**"> - // CHECK: llvm.call @mcuModuleLoad([[module_ptr]], {{.*}}) : (!llvm<"i8**">, !llvm<"i8*">) -> !llvm.i32 - // CHECK: [[func_ptr:%.*]] = llvm.alloca {{.*}} x !llvm<"i8*"> : (!llvm.i32) -> !llvm<"i8**"> - // CHECK: llvm.call @mcuModuleGetFunction([[func_ptr]], {{.*}}, {{.*}}) : (!llvm<"i8**">, !llvm<"i8*">, !llvm<"i8*">) -> !llvm.i32 - // CHECK: llvm.call @mcuGetStreamHelper - // CHECK: llvm.call @mcuLaunchKernel - // CHECK: llvm.call @mcuStreamSynchronize - "gpu.launch_func"(%cst, %cst, %cst, %cst, %cst, %cst, %0, %1) { kernel = @kernel } - : (index, index, index, index, index, index, !llvm.float, !llvm<"float*">) -> () + func @foo() { + %0 = "op"() : () -> (!llvm.float) + %1 = "op"() : () -> (!llvm<"float*">) + %cst = constant 8 : index + + // CHECK: [[module_ptr:%.*]] = llvm.alloca {{.*}} x !llvm<"i8*"> : (!llvm.i32) -> !llvm<"i8**"> + // CHECK: llvm.call @mcuModuleLoad([[module_ptr]], {{.*}}) : (!llvm<"i8**">, !llvm<"i8*">) -> !llvm.i32 + // CHECK: [[func_ptr:%.*]] = llvm.alloca {{.*}} x !llvm<"i8*"> : (!llvm.i32) -> !llvm<"i8**"> + // CHECK: llvm.call @mcuModuleGetFunction([[func_ptr]], {{.*}}, {{.*}}) : (!llvm<"i8**">, !llvm<"i8*">, !llvm<"i8*">) -> !llvm.i32 + // CHECK: llvm.call @mcuGetStreamHelper + // CHECK: llvm.call @mcuLaunchKernel + // CHECK: llvm.call @mcuStreamSynchronize + "gpu.launch_func"(%cst, %cst, %cst, %cst, %cst, %cst, %0, %1) { kernel = "kernel", kernel_module = @kernel_module } + : (index, index, index, index, index, index, !llvm.float, !llvm<"float*">) -> () + + return + } - return } |

