diff options
| author | Deven Desai <36858332+deven-amd@users.noreply.github.com> | 2019-09-26 23:49:51 -0700 |
|---|---|---|
| committer | A. Unique TensorFlower <gardener@tensorflow.org> | 2019-09-27 00:22:32 -0700 |
| commit | fee40fef5c37fee2b398d4d6ec28958bf5c0c0f5 (patch) | |
| tree | e87354a12e4a24d998d8d7592e5bdeb06880c573 /mlir/lib/Target | |
| parent | 7385d8789560a392971c60426c7d17569551bd32 (diff) | |
| download | bcm5719-llvm-fee40fef5c37fee2b398d4d6ec28958bf5c0c0f5.tar.gz bcm5719-llvm-fee40fef5c37fee2b398d4d6ec28958bf5c0c0f5.zip | |
[ROCm] Adding ROCDL Dialect.
This commit introduces the ROCDL Dialect (i.e. the ROCDL ops + the code to lower those ROCDL ops to LLWM intrinsics/functions). Think of ROCDL Dialect as analogous to the NVVM Dialect, but for AMD GPUs. This patch contains just the essentials needed to get a simple example up and running. We expect to make further additions to the ROCDL Dialect.
This is the first of 3 commits, the follow-up will be:
* add a pass that lowers GPU Dialect to ROCDL Dialect
* add a "mlir-rocm-runner" utility
Closes tensorflow/mlir#146
COPYBARA_INTEGRATE_REVIEW=https://github.com/tensorflow/mlir/pull/146 from deven-amd:deven-rocdl-dialect e78e8005c75a78912631116c78dc844fcc4b0de9
PiperOrigin-RevId: 271511259
Diffstat (limited to 'mlir/lib/Target')
| -rw-r--r-- | mlir/lib/Target/CMakeLists.txt | 14 | ||||
| -rw-r--r-- | mlir/lib/Target/LLVMIR/ConvertToROCDLIR.cpp | 119 |
2 files changed, 133 insertions, 0 deletions
diff --git a/mlir/lib/Target/CMakeLists.txt b/mlir/lib/Target/CMakeLists.txt index 9f49b813336..111e2f67313 100644 --- a/mlir/lib/Target/CMakeLists.txt +++ b/mlir/lib/Target/CMakeLists.txt @@ -28,3 +28,17 @@ target_link_libraries(MLIRTargetNVVMIR MLIRNVVMIR MLIRTargetLLVMIRModuleTranslation ) +add_llvm_library(MLIRTargetROCDLIR + LLVMIR/ConvertToROCDLIR.cpp + + ADDITIONAL_HEADER_DIRS + ${MLIR_MAIN_INCLUDE_DIR}/mlir/Target/LLVMIR + DEPENDS + intrinsics_gen + ) +target_link_libraries(MLIRTargetROCDLIR + MLIRGPU + MLIRIR + MLIRROCDLIR + MLIRTargetLLVMIRModuleTranslation + ) diff --git a/mlir/lib/Target/LLVMIR/ConvertToROCDLIR.cpp b/mlir/lib/Target/LLVMIR/ConvertToROCDLIR.cpp new file mode 100644 index 00000000000..2c2d1169a3d --- /dev/null +++ b/mlir/lib/Target/LLVMIR/ConvertToROCDLIR.cpp @@ -0,0 +1,119 @@ +//===- ConvertToROCDLIR.cpp - MLIR to LLVM IR conversion ------------------===// +// +// Copyright 2019 The MLIR Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// ============================================================================= +// +// This file implements a translation between the MLIR LLVM + ROCDL dialects and +// LLVM IR with ROCDL intrinsics and metadata. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Target/ROCDLIR.h" + +#include "mlir/Dialect/GPU/GPUDialect.h" +#include "mlir/Dialect/LLVMIR/ROCDLDialect.h" +#include "mlir/IR/Function.h" +#include "mlir/IR/Module.h" +#include "mlir/Target/LLVMIR/ModuleTranslation.h" +#include "mlir/Translation.h" + +#include "llvm/ADT/StringRef.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/ToolOutputFile.h" + +#include <iostream> + +using namespace mlir; + +namespace { +// Create a call to llvm intrisic +static llvm::Value *createIntrinsicCall(llvm::IRBuilder<> &builder, + llvm::Intrinsic::ID intrinsic, + ArrayRef<llvm::Value *> args = {}) { + llvm::Module *module = builder.GetInsertBlock()->getModule(); + llvm::Function *fn = llvm::Intrinsic::getDeclaration(module, intrinsic); + return builder.CreateCall(fn, args); +} + +// Create a call to ROCm-Device-Library function +// Currently this routine will work only for calling ROCDL functions that +// take a single int32 argument. It is likely that the interface of this +// function will change to make it more generic. +static llvm::Value *createDeviceFunctionCall(llvm::IRBuilder<> &builder, + StringRef fn_name, int parameter) { + llvm::Module *module = builder.GetInsertBlock()->getModule(); + llvm::FunctionType *function_type = llvm::FunctionType::get( + llvm::Type::getInt32Ty(module->getContext()), // return type. + llvm::Type::getInt32Ty(module->getContext()), // parameter type. + false); // no variadic arguments. + llvm::Function *fn = llvm::dyn_cast<llvm::Function>( + module->getOrInsertFunction(fn_name, function_type).getCallee()); + llvm::Value *fn_op0 = llvm::ConstantInt::get( + llvm::Type::getInt32Ty(module->getContext()), parameter); + return builder.CreateCall(fn, llvm::ArrayRef<llvm::Value *>(fn_op0)); +} + +class ModuleTranslation : public LLVM::ModuleTranslation { + +public: + explicit ModuleTranslation(ModuleOp module) + : LLVM::ModuleTranslation(module) {} + ~ModuleTranslation() override {} + +protected: + LogicalResult convertOperation(Operation &opInst, + llvm::IRBuilder<> &builder) override { + +#include "mlir/Dialect/LLVMIR/ROCDLConversions.inc" + + return LLVM::ModuleTranslation::convertOperation(opInst, builder); + } +}; +} // namespace + +std::unique_ptr<llvm::Module> mlir::translateModuleToROCDLIR(ModuleOp m) { + ModuleTranslation translation(m); + + // lower MLIR (with RODL Dialect) to LLVM IR (with ROCDL intrinsics) + auto llvmModule = + LLVM::ModuleTranslation::translateModule<ModuleTranslation>(m); + + // foreach GPU kernel + // 1. Insert AMDGPU_KERNEL calling convention. + // 2. Insert amdgpu-flat-workgroup-size(1, 1024) attribute. + for (FuncOp func : m.getOps<FuncOp>()) { + if (!func.getAttrOfType<UnitAttr>(gpu::GPUDialect::getKernelFuncAttrName())) + continue; + + auto *llvmFunc = llvmModule->getFunction(func.getName()); + + llvmFunc->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); + + llvmFunc->addFnAttr("amdgpu-flat-work-group-size", "1, 1024"); + } + + return llvmModule; +} + +static TranslateFromMLIRRegistration + registration("mlir-to-rocdlir", + [](ModuleOp module, llvm::raw_ostream &output) { + auto llvmModule = mlir::translateModuleToROCDLIR(module); + if (!llvmModule) + return failure(); + + llvmModule->print(output, nullptr); + return success(); + }); |

