[ROCm] Adding ROCDL Dialect.

This commit introduces the ROCDL Dialect (i.e. the ROCDL ops + the code to lower those ROCDL ops to LLWM intrinsics/functions). Think of ROCDL Dialect as analogous to the NVVM Dialect, but for AMD GPUs. This patch contains just the essentials needed to get a simple example up and running. We expect to make further additions to the ROCDL Dialect. This is the first of 3 commits, the follow-up will be: * add a pass that lowers GPU Dialect to ROCDL Dialect * add a "mlir-rocm-runner" utility Closes tensorflow/mlir#146 COPYBARA_INTEGRATE_REVIEW=https://github.com/tensorflow/mlir/pull/146 from deven-amd:deven-rocdl-dialect e78e8005c75a78912631116c78dc844fcc4b0de9 PiperOrigin-RevId: 271511259
author: Deven Desai <36858332+deven-amd@users.noreply.github.com> 2019-09-26 23:49:51 -0700
committer: A. Unique TensorFlower <gardener@tensorflow.org> 2019-09-27 00:22:32 -0700
commit: fee40fef5c37fee2b398d4d6ec28958bf5c0c0f5 (patch)
tree: e87354a12e4a24d998d8d7592e5bdeb06880c573 /mlir/lib/Target
parent: 7385d8789560a392971c60426c7d17569551bd32 (diff)
download: bcm5719-llvm-fee40fef5c37fee2b398d4d6ec28958bf5c0c0f5.tar.gz
bcm5719-llvm-fee40fef5c37fee2b398d4d6ec28958bf5c0c0f5.zip
2 files changed, 133 insertions, 0 deletions
diff --git a/mlir/lib/Target/CMakeLists.txt b/mlir/lib/Target/CMakeLists.txt
index 9f49b813336..111e2f67313 100644
--- a/mlir/lib/Target/CMakeLists.txt
+++ b/mlir/lib/Target/CMakeLists.txt
@@ -28,3 +28,17 @@ target_link_libraries(MLIRTargetNVVMIR
   MLIRNVVMIR
   MLIRTargetLLVMIRModuleTranslation
   )
+add_llvm_library(MLIRTargetROCDLIR
+  LLVMIR/ConvertToROCDLIR.cpp
+
+  ADDITIONAL_HEADER_DIRS
+  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Target/LLVMIR
+  DEPENDS
+  intrinsics_gen
+  )
+target_link_libraries(MLIRTargetROCDLIR
+  MLIRGPU
+  MLIRIR
+  MLIRROCDLIR
+  MLIRTargetLLVMIRModuleTranslation
+  )
diff --git a/mlir/lib/Target/LLVMIR/ConvertToROCDLIR.cpp b/mlir/lib/Target/LLVMIR/ConvertToROCDLIR.cpp
new file mode 100644
index 00000000000..2c2d1169a3d
--- /dev/null
+++ b/mlir/lib/Target/LLVMIR/ConvertToROCDLIR.cpp
@@ -0,0 +1,119 @@
+//===- ConvertToROCDLIR.cpp - MLIR to LLVM IR conversion ------------------===//
+//
+// Copyright 2019 The MLIR Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// =============================================================================
+//
+// This file implements a translation between the MLIR LLVM + ROCDL dialects and
+// LLVM IR with ROCDL intrinsics and metadata.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Target/ROCDLIR.h"
+
+#include "mlir/Dialect/GPU/GPUDialect.h"
+#include "mlir/Dialect/LLVMIR/ROCDLDialect.h"
+#include "mlir/IR/Function.h"
+#include "mlir/IR/Module.h"
+#include "mlir/Target/LLVMIR/ModuleTranslation.h"
+#include "mlir/Translation.h"
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/ToolOutputFile.h"
+
+#include <iostream>
+
+using namespace mlir;
+
+namespace {
+// Create a call to llvm intrisic
+static llvm::Value *createIntrinsicCall(llvm::IRBuilder<> &builder,
+                                        llvm::Intrinsic::ID intrinsic,
+                                        ArrayRef<llvm::Value *> args = {}) {
+  llvm::Module *module = builder.GetInsertBlock()->getModule();
+  llvm::Function *fn = llvm::Intrinsic::getDeclaration(module, intrinsic);
+  return builder.CreateCall(fn, args);
+}
+
+// Create a call to ROCm-Device-Library function
+//   Currently this routine will work only for calling ROCDL functions that
+// take a single int32 argument. It is likely that the interface of this
+// function will change to make it more generic.
+static llvm::Value *createDeviceFunctionCall(llvm::IRBuilder<> &builder,
+                                             StringRef fn_name, int parameter) {
+  llvm::Module *module = builder.GetInsertBlock()->getModule();
+  llvm::FunctionType *function_type = llvm::FunctionType::get(
+      llvm::Type::getInt32Ty(module->getContext()), // return type.
+      llvm::Type::getInt32Ty(module->getContext()), // parameter type.
+      false);                                       // no variadic arguments.
+  llvm::Function *fn = llvm::dyn_cast<llvm::Function>(
+      module->getOrInsertFunction(fn_name, function_type).getCallee());
+  llvm::Value *fn_op0 = llvm::ConstantInt::get(
+      llvm::Type::getInt32Ty(module->getContext()), parameter);
+  return builder.CreateCall(fn, llvm::ArrayRef<llvm::Value *>(fn_op0));
+}
+
+class ModuleTranslation : public LLVM::ModuleTranslation {
+
+public:
+  explicit ModuleTranslation(ModuleOp module)
+      : LLVM::ModuleTranslation(module) {}
+  ~ModuleTranslation() override {}
+
+protected:
+  LogicalResult convertOperation(Operation &opInst,
+                                 llvm::IRBuilder<> &builder) override {
+
+#include "mlir/Dialect/LLVMIR/ROCDLConversions.inc"
+
+    return LLVM::ModuleTranslation::convertOperation(opInst, builder);
+  }
+};
+} // namespace
+
+std::unique_ptr<llvm::Module> mlir::translateModuleToROCDLIR(ModuleOp m) {
+  ModuleTranslation translation(m);
+
+  // lower MLIR (with RODL Dialect) to LLVM IR (with ROCDL intrinsics)
+  auto llvmModule =
+      LLVM::ModuleTranslation::translateModule<ModuleTranslation>(m);
+
+  // foreach GPU kernel
+  // 1. Insert AMDGPU_KERNEL calling convention.
+  // 2. Insert amdgpu-flat-workgroup-size(1, 1024) attribute.
+  for (FuncOp func : m.getOps<FuncOp>()) {
+    if (!func.getAttrOfType<UnitAttr>(gpu::GPUDialect::getKernelFuncAttrName()))
+      continue;
+
+    auto *llvmFunc = llvmModule->getFunction(func.getName());
+
+    llvmFunc->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
+
+    llvmFunc->addFnAttr("amdgpu-flat-work-group-size", "1, 1024");
+  }
+
+  return llvmModule;
+}
+
+static TranslateFromMLIRRegistration
+    registration("mlir-to-rocdlir",
+                 [](ModuleOp module, llvm::raw_ostream &output) {
+                   auto llvmModule = mlir::translateModuleToROCDLIR(module);
+                   if (!llvmModule)
+                     return failure();
+
+                   llvmModule->print(output, nullptr);
+                   return success();
+                 });
author	Deven Desai <36858332+deven-amd@users.noreply.github.com>	2019-09-26 23:49:51 -0700
committer	A. Unique TensorFlower <gardener@tensorflow.org>	2019-09-27 00:22:32 -0700
commit	fee40fef5c37fee2b398d4d6ec28958bf5c0c0f5 (patch)
tree	e87354a12e4a24d998d8d7592e5bdeb06880c573 /mlir/lib/Target
parent	7385d8789560a392971c60426c7d17569551bd32 (diff)
download	bcm5719-llvm-fee40fef5c37fee2b398d4d6ec28958bf5c0c0f5.tar.gz bcm5719-llvm-fee40fef5c37fee2b398d4d6ec28958bf5c0c0f5.zip