diff options
4 files changed, 106 insertions, 2 deletions
diff --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp index c2493f773d1..00f89d3644f 100644 --- a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp +++ b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp @@ -712,7 +712,8 @@ public: populateGpuToNVVMConversionPatterns(converter, patterns); ConversionTarget target(getContext()); target.addIllegalDialect<gpu::GPUDialect>(); - target.addIllegalOp<LLVM::ExpOp>(); + target.addIllegalOp<LLVM::FAbsOp, LLVM::FCeilOp, LLVM::CosOp, + LLVM::ExpOP>(); target.addIllegalOp<FuncOp>(); target.addLegalDialect<LLVM::LLVMDialect>(); target.addLegalDialect<NVVM::NVVMDialect>(); @@ -739,6 +740,12 @@ void mlir::populateGpuToNVVMConversionPatterns( NVVM::GridDimYOp, NVVM::GridDimZOp>, GPUAllReduceOpLowering, GPUShuffleOpLowering, GPUFuncOpLowering, GPUReturnOpLowering>(converter); + patterns.insert<OpToFuncCallLowering<AbsFOp>>(converter, "__nv_fabsf", + "__nv_fabs"); + patterns.insert<OpToFuncCallLowering<CeilFOp>>(converter, "__nv_ceilf", + "__nv_ceil"); + patterns.insert<OpToFuncCallLowering<CosOp>>(converter, "__nv_cosf", + "__nv_cos"); patterns.insert<OpToFuncCallLowering<ExpOp>>(converter, "__nv_expf", "__nv_exp"); } diff --git a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp index 83770641bd4..119479d7ec1 100644 --- a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp +++ b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp @@ -51,12 +51,19 @@ public: GPUIndexIntrinsicOpLowering<gpu::GridDimOp, ROCDL::GridDimXOp, ROCDL::GridDimYOp, ROCDL::GridDimZOp>>( converter); + patterns.insert<OpToFuncCallLowering<AbsFOp>>(converter, "_ocml_fabs_f32", + "_ocml_fabs_f64"); + patterns.insert<OpToFuncCallLowering<CeilFOp>>(converter, "_ocml_ceil_f32", + "_ocml_ceil_f64"); + patterns.insert<OpToFuncCallLowering<CosOp>>(converter, "_ocml_cos_f32", + "_ocml_cos_f64"); patterns.insert<OpToFuncCallLowering<ExpOp>>(converter, "_ocml_exp_f32", "_ocml_exp_f64"); ConversionTarget target(getContext()); target.addLegalDialect<LLVM::LLVMDialect, ROCDL::ROCDLDialect>(); - target.addIllegalOp<LLVM::ExpOp>(); + target.addIllegalOp<LLVM::FAbsOp, LLVM::FCeilOp, LLVM::CosOP, + LLVM::ExpOp>(); target.addDynamicallyLegalOp<FuncOp>( [&](FuncOp op) { return converter.isSignatureLegal(op.getType()); }); if (failed(applyPartialConversion(m, target, patterns, &converter))) diff --git a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir index b1820cb778f..24bf56557c3 100644 --- a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir +++ b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir @@ -112,6 +112,51 @@ module attributes {gpu.kernel_module} { // ----- module attributes {gpu.kernel_module} { + // CHECK: llvm.func @__nv_fabsf(!llvm.float) -> !llvm.float + // CHECK: llvm.func @__nv_fabs(!llvm.double) -> !llvm.double + // CHECK-LABEL: func @gpu_fabs + func @gpu_fabs(%arg_f32 : f32, %arg_f64 : f64) { + %result32 = std.absf %arg_f32 : f32 + // CHECK: llvm.call @__nv_fabsf(%{{.*}}) : (!llvm.float) -> !llvm.float + %result64 = std.absf %arg_f64 : f64 + // CHECK: llvm.call @__nv_fabs(%{{.*}}) : (!llvm.double) -> !llvm.double + std.return + } +} + +// ----- + +module attributes {gpu.kernel_module} { + // CHECK: llvm.func @__nv_ceilf(!llvm.float) -> !llvm.float + // CHECK: llvm.func @__nv_ceil(!llvm.double) -> !llvm.double + // CHECK-LABEL: func @gpu_ceil + func @gpu_ceil(%arg_f32 : f32, %arg_f64 : f64) { + %result32 = std.ceilf %arg_f32 : f32 + // CHECK: llvm.call @__nv_ceilf(%{{.*}}) : (!llvm.float) -> !llvm.float + %result64 = std.ceilf %arg_f64 : f64 + // CHECK: llvm.call @__nv_ceil(%{{.*}}) : (!llvm.double) -> !llvm.double + std.return + } +} + +// ----- + +module attributes {gpu.kernel_module} { + // CHECK: llvm.func @__nv_cosf(!llvm.float) -> !llvm.float + // CHECK: llvm.func @__nv_cos(!llvm.double) -> !llvm.double + // CHECK-LABEL: func @gpu_cos + func @gpu_cos(%arg_f32 : f32, %arg_f64 : f64) { + %result32 = std.cos %arg_f32 : f32 + // CHECK: llvm.call @__nv_cosf(%{{.*}}) : (!llvm.float) -> !llvm.float + %result64 = std.cos %arg_f64 : f64 + // CHECK: llvm.call @__nv_cos(%{{.*}}) : (!llvm.double) -> !llvm.double + std.return + } +} + +// ----- + +module attributes {gpu.kernel_module} { // CHECK: llvm.func @__nv_expf(!llvm.float) -> !llvm.float // CHECK: llvm.func @__nv_exp(!llvm.double) -> !llvm.double // CHECK-LABEL: func @gpu_exp diff --git a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir index f803266ad28..372fe2784c3 100644 --- a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir +++ b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir @@ -39,6 +39,51 @@ module attributes {gpu.kernel_module} { // ----- module attributes {gpu.kernel_module} { + // CHECK: llvm.func @_ocml_fabs_f32(!llvm.float) -> !llvm.float + // CHECK: llvm.func @_ocml_fabs_f64(!llvm.double) -> !llvm.double + // CHECK-LABEL: func @gpu_fabs + func @gpu_fabs(%arg_f32 : f32, %arg_f64 : f64) { + %result32 = std.absf %arg_f32 : f32 + // CHECK: llvm.call @_ocml_fabs_f32(%{{.*}}) : (!llvm.float) -> !llvm.float + %result64 = std.absf %arg_f64 : f64 + // CHECK: llvm.call @_ocml_fabs_f64(%{{.*}}) : (!llvm.double) -> !llvm.double + std.return + } +} + +// ----- + +module attributes {gpu.kernel_module} { + // CHECK: llvm.func @_ocml_ceil_f32(!llvm.float) -> !llvm.float + // CHECK: llvm.func @_ocml_ceil_f64(!llvm.double) -> !llvm.double + // CHECK-LABEL: func @gpu_ceil + func @gpu_ceil(%arg_f32 : f32, %arg_f64 : f64) { + %result32 = std.ceilf %arg_f32 : f32 + // CHECK: llvm.call @_ocml_ceil_f32(%{{.*}}) : (!llvm.float) -> !llvm.float + %result64 = std.ceilf %arg_f64 : f64 + // CHECK: llvm.call @_ocml_ceil_f64(%{{.*}}) : (!llvm.double) -> !llvm.double + std.return + } +} + +// ----- + +module attributes {gpu.kernel_module} { + // CHECK: llvm.func @_ocml_cos_f32(!llvm.float) -> !llvm.float + // CHECK: llvm.func @_ocml_cos_f64(!llvm.double) -> !llvm.double + // CHECK-LABEL: func @gpu_cos + func @gpu_cos(%arg_f32 : f32, %arg_f64 : f64) { + %result32 = std.cos %arg_f32 : f32 + // CHECK: llvm.call @_ocml_cos_f32(%{{.*}}) : (!llvm.float) -> !llvm.float + %result64 = std.cos %arg_f64 : f64 + // CHECK: llvm.call @_ocml_cos_f64(%{{.*}}) : (!llvm.double) -> !llvm.double + std.return + } +} + +// ----- + +module attributes {gpu.kernel_module} { // CHECK: llvm.func @_ocml_exp_f32(!llvm.float) -> !llvm.float // CHECK: llvm.func @_ocml_exp_f64(!llvm.double) -> !llvm.double // CHECK-LABEL: func @gpu_exp |