summaryrefslogtreecommitdiffstats
path: root/mlir/lib/Dialect/GPU/Transforms
diff options
context:
space:
mode:
authorChristian Sigg <csigg@google.com>2019-09-23 03:16:23 -0700
committerA. Unique TensorFlower <gardener@tensorflow.org>2019-09-23 03:17:01 -0700
commitb8676da1fc5e7d371a76defbd1b4a93a41fe2e33 (patch)
tree2ebd739e95dfea1c52397e752debbb5d13952a52 /mlir/lib/Dialect/GPU/Transforms
parentc900d4994e609ac0f4730d0cdd918eae958e9d28 (diff)
downloadbcm5719-llvm-b8676da1fc5e7d371a76defbd1b4a93a41fe2e33.tar.gz
bcm5719-llvm-b8676da1fc5e7d371a76defbd1b4a93a41fe2e33.zip
Outline GPU kernel function into a nested module.
Roll forward of commit 5684a12. When outlining GPU kernels, put the kernel function inside a nested module. Then use a nested pipeline to generate the cubins, independently per kernel. In a final pass, move the cubins back to the parent module. PiperOrigin-RevId: 270639748
Diffstat (limited to 'mlir/lib/Dialect/GPU/Transforms')
-rw-r--r--mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp37
1 files changed, 33 insertions, 4 deletions
diff --git a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
index 4328fb39c29..9bf4cf6e643 100644
--- a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
@@ -93,7 +93,7 @@ static gpu::LaunchFuncOp inlineConstants(FuncOp kernelFunc,
}
// Outline the `gpu.launch` operation body into a kernel function. Replace
-// `gpu.return` operations by `std.return` in the generated functions.
+// `gpu.return` operations by `std.return` in the generated function.
static FuncOp outlineKernelFunc(gpu::LaunchOp launchOp) {
Location loc = launchOp.getLoc();
SmallVector<Type, 4> kernelOperandTypes(launchOp.getKernelOperandTypes());
@@ -107,7 +107,7 @@ static FuncOp outlineKernelFunc(gpu::LaunchOp launchOp) {
outlinedFunc.setAttr(gpu::GPUDialect::getKernelFuncAttrName(),
builder.getUnitAttr());
injectGpuIndexOperations(loc, outlinedFunc);
- outlinedFunc.walk([](mlir::gpu::Return op) {
+ outlinedFunc.walk([](gpu::Return op) {
OpBuilder replacer(op);
replacer.create<ReturnOp>(op.getLoc());
op.erase();
@@ -131,15 +131,44 @@ static void convertToLaunchFuncOp(gpu::LaunchOp &launchOp, FuncOp kernelFunc) {
namespace {
+/// Pass that moves the kernel of each LaunchOp into its separate nested module.
+///
+/// This pass moves the kernel code of each LaunchOp into a function created
+/// inside a nested module. It also creates an external function of the same
+/// name in the parent module.
+///
+/// The kernel modules are intended to be compiled to a cubin blob independently
+/// in a separate pass. The external functions can then be annotated with the
+/// symbol of the cubin accessor function.
class GpuKernelOutliningPass : public ModulePass<GpuKernelOutliningPass> {
public:
void runOnModule() override {
ModuleManager moduleManager(getModule());
+ auto context = getModule().getContext();
+ Builder builder(context);
for (auto func : getModule().getOps<FuncOp>()) {
- func.walk([&](mlir::gpu::LaunchOp op) {
+ // Insert just after the function.
+ Block::iterator insertPt(func.getOperation()->getNextNode());
+ func.walk([&](gpu::LaunchOp op) {
+ // TODO(b/141098412): Handle called functions and globals.
FuncOp outlinedFunc = outlineKernelFunc(op);
- moduleManager.insert(outlinedFunc);
+
+ // Potentially renames outlinedFunc to make symbol unique.
+ moduleManager.insert(insertPt, outlinedFunc);
+
+ // Potentially changes signature, pulling in constants.
convertToLaunchFuncOp(op, outlinedFunc);
+
+ // Create clone and move body from outlinedFunc.
+ auto kernelFunc = outlinedFunc.cloneWithoutRegions();
+ kernelFunc.getBody().takeBody(outlinedFunc.getBody());
+
+ // Create nested module and insert kernelFunc.
+ auto kernelModule = ModuleOp::create(UnknownLoc::get(context));
+ kernelModule.setAttr(gpu::GPUDialect::getKernelModuleAttrName(),
+ builder.getUnitAttr());
+ kernelModule.push_back(kernelFunc);
+ getModule().insert(insertPt, kernelModule);
});
}
}
OpenPOWER on IntegriCloud