summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp9
-rw-r--r--mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp9
-rw-r--r--mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir45
-rw-r--r--mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir45
4 files changed, 106 insertions, 2 deletions
diff --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
index c2493f773d1..00f89d3644f 100644
--- a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
+++ b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
@@ -712,7 +712,8 @@ public:
populateGpuToNVVMConversionPatterns(converter, patterns);
ConversionTarget target(getContext());
target.addIllegalDialect<gpu::GPUDialect>();
- target.addIllegalOp<LLVM::ExpOp>();
+ target.addIllegalOp<LLVM::FAbsOp, LLVM::FCeilOp, LLVM::CosOp,
+ LLVM::ExpOP>();
target.addIllegalOp<FuncOp>();
target.addLegalDialect<LLVM::LLVMDialect>();
target.addLegalDialect<NVVM::NVVMDialect>();
@@ -739,6 +740,12 @@ void mlir::populateGpuToNVVMConversionPatterns(
NVVM::GridDimYOp, NVVM::GridDimZOp>,
GPUAllReduceOpLowering, GPUShuffleOpLowering, GPUFuncOpLowering,
GPUReturnOpLowering>(converter);
+ patterns.insert<OpToFuncCallLowering<AbsFOp>>(converter, "__nv_fabsf",
+ "__nv_fabs");
+ patterns.insert<OpToFuncCallLowering<CeilFOp>>(converter, "__nv_ceilf",
+ "__nv_ceil");
+ patterns.insert<OpToFuncCallLowering<CosOp>>(converter, "__nv_cosf",
+ "__nv_cos");
patterns.insert<OpToFuncCallLowering<ExpOp>>(converter, "__nv_expf",
"__nv_exp");
}
diff --git a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
index 83770641bd4..119479d7ec1 100644
--- a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
+++ b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
@@ -51,12 +51,19 @@ public:
GPUIndexIntrinsicOpLowering<gpu::GridDimOp, ROCDL::GridDimXOp,
ROCDL::GridDimYOp, ROCDL::GridDimZOp>>(
converter);
+ patterns.insert<OpToFuncCallLowering<AbsFOp>>(converter, "_ocml_fabs_f32",
+ "_ocml_fabs_f64");
+ patterns.insert<OpToFuncCallLowering<CeilFOp>>(converter, "_ocml_ceil_f32",
+ "_ocml_ceil_f64");
+ patterns.insert<OpToFuncCallLowering<CosOp>>(converter, "_ocml_cos_f32",
+ "_ocml_cos_f64");
patterns.insert<OpToFuncCallLowering<ExpOp>>(converter, "_ocml_exp_f32",
"_ocml_exp_f64");
ConversionTarget target(getContext());
target.addLegalDialect<LLVM::LLVMDialect, ROCDL::ROCDLDialect>();
- target.addIllegalOp<LLVM::ExpOp>();
+ target.addIllegalOp<LLVM::FAbsOp, LLVM::FCeilOp, LLVM::CosOP,
+ LLVM::ExpOp>();
target.addDynamicallyLegalOp<FuncOp>(
[&](FuncOp op) { return converter.isSignatureLegal(op.getType()); });
if (failed(applyPartialConversion(m, target, patterns, &converter)))
diff --git a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
index b1820cb778f..24bf56557c3 100644
--- a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
+++ b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
@@ -112,6 +112,51 @@ module attributes {gpu.kernel_module} {
// -----
module attributes {gpu.kernel_module} {
+ // CHECK: llvm.func @__nv_fabsf(!llvm.float) -> !llvm.float
+ // CHECK: llvm.func @__nv_fabs(!llvm.double) -> !llvm.double
+ // CHECK-LABEL: func @gpu_fabs
+ func @gpu_fabs(%arg_f32 : f32, %arg_f64 : f64) {
+ %result32 = std.absf %arg_f32 : f32
+ // CHECK: llvm.call @__nv_fabsf(%{{.*}}) : (!llvm.float) -> !llvm.float
+ %result64 = std.absf %arg_f64 : f64
+ // CHECK: llvm.call @__nv_fabs(%{{.*}}) : (!llvm.double) -> !llvm.double
+ std.return
+ }
+}
+
+// -----
+
+module attributes {gpu.kernel_module} {
+ // CHECK: llvm.func @__nv_ceilf(!llvm.float) -> !llvm.float
+ // CHECK: llvm.func @__nv_ceil(!llvm.double) -> !llvm.double
+ // CHECK-LABEL: func @gpu_ceil
+ func @gpu_ceil(%arg_f32 : f32, %arg_f64 : f64) {
+ %result32 = std.ceilf %arg_f32 : f32
+ // CHECK: llvm.call @__nv_ceilf(%{{.*}}) : (!llvm.float) -> !llvm.float
+ %result64 = std.ceilf %arg_f64 : f64
+ // CHECK: llvm.call @__nv_ceil(%{{.*}}) : (!llvm.double) -> !llvm.double
+ std.return
+ }
+}
+
+// -----
+
+module attributes {gpu.kernel_module} {
+ // CHECK: llvm.func @__nv_cosf(!llvm.float) -> !llvm.float
+ // CHECK: llvm.func @__nv_cos(!llvm.double) -> !llvm.double
+ // CHECK-LABEL: func @gpu_cos
+ func @gpu_cos(%arg_f32 : f32, %arg_f64 : f64) {
+ %result32 = std.cos %arg_f32 : f32
+ // CHECK: llvm.call @__nv_cosf(%{{.*}}) : (!llvm.float) -> !llvm.float
+ %result64 = std.cos %arg_f64 : f64
+ // CHECK: llvm.call @__nv_cos(%{{.*}}) : (!llvm.double) -> !llvm.double
+ std.return
+ }
+}
+
+// -----
+
+module attributes {gpu.kernel_module} {
// CHECK: llvm.func @__nv_expf(!llvm.float) -> !llvm.float
// CHECK: llvm.func @__nv_exp(!llvm.double) -> !llvm.double
// CHECK-LABEL: func @gpu_exp
diff --git a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir
index f803266ad28..372fe2784c3 100644
--- a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir
+++ b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir
@@ -39,6 +39,51 @@ module attributes {gpu.kernel_module} {
// -----
module attributes {gpu.kernel_module} {
+ // CHECK: llvm.func @_ocml_fabs_f32(!llvm.float) -> !llvm.float
+ // CHECK: llvm.func @_ocml_fabs_f64(!llvm.double) -> !llvm.double
+ // CHECK-LABEL: func @gpu_fabs
+ func @gpu_fabs(%arg_f32 : f32, %arg_f64 : f64) {
+ %result32 = std.absf %arg_f32 : f32
+ // CHECK: llvm.call @_ocml_fabs_f32(%{{.*}}) : (!llvm.float) -> !llvm.float
+ %result64 = std.absf %arg_f64 : f64
+ // CHECK: llvm.call @_ocml_fabs_f64(%{{.*}}) : (!llvm.double) -> !llvm.double
+ std.return
+ }
+}
+
+// -----
+
+module attributes {gpu.kernel_module} {
+ // CHECK: llvm.func @_ocml_ceil_f32(!llvm.float) -> !llvm.float
+ // CHECK: llvm.func @_ocml_ceil_f64(!llvm.double) -> !llvm.double
+ // CHECK-LABEL: func @gpu_ceil
+ func @gpu_ceil(%arg_f32 : f32, %arg_f64 : f64) {
+ %result32 = std.ceilf %arg_f32 : f32
+ // CHECK: llvm.call @_ocml_ceil_f32(%{{.*}}) : (!llvm.float) -> !llvm.float
+ %result64 = std.ceilf %arg_f64 : f64
+ // CHECK: llvm.call @_ocml_ceil_f64(%{{.*}}) : (!llvm.double) -> !llvm.double
+ std.return
+ }
+}
+
+// -----
+
+module attributes {gpu.kernel_module} {
+ // CHECK: llvm.func @_ocml_cos_f32(!llvm.float) -> !llvm.float
+ // CHECK: llvm.func @_ocml_cos_f64(!llvm.double) -> !llvm.double
+ // CHECK-LABEL: func @gpu_cos
+ func @gpu_cos(%arg_f32 : f32, %arg_f64 : f64) {
+ %result32 = std.cos %arg_f32 : f32
+ // CHECK: llvm.call @_ocml_cos_f32(%{{.*}}) : (!llvm.float) -> !llvm.float
+ %result64 = std.cos %arg_f64 : f64
+ // CHECK: llvm.call @_ocml_cos_f64(%{{.*}}) : (!llvm.double) -> !llvm.double
+ std.return
+ }
+}
+
+// -----
+
+module attributes {gpu.kernel_module} {
// CHECK: llvm.func @_ocml_exp_f32(!llvm.float) -> !llvm.float
// CHECK: llvm.func @_ocml_exp_f64(!llvm.double) -> !llvm.double
// CHECK-LABEL: func @gpu_exp
OpenPOWER on IntegriCloud