summaryrefslogtreecommitdiffstats
path: root/mlir/lib/Dialect/GPU/Transforms
diff options
context:
space:
mode:
authorStephan Herhut <herhut@google.com>2019-09-04 06:15:40 -0700
committerA. Unique TensorFlower <gardener@tensorflow.org>2019-09-04 06:16:07 -0700
commitdfd06af562e93e767000e96c436846caea847c38 (patch)
treeab4cf40cb2440e87fff5447663718a7fcf43741b /mlir/lib/Dialect/GPU/Transforms
parent2f13df13b0bb481702fc83eb50c273deadb55f20 (diff)
downloadbcm5719-llvm-dfd06af562e93e767000e96c436846caea847c38.tar.gz
bcm5719-llvm-dfd06af562e93e767000e96c436846caea847c38.zip
Make GPU kernel outlining inline constants.
It is generally beneficial to pass less arguments to a kernel, so cloning constants into the kernel is beneficial. PiperOrigin-RevId: 267139084
Diffstat (limited to 'mlir/lib/Dialect/GPU/Transforms')
-rw-r--r--mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp42
1 files changed, 40 insertions, 2 deletions
diff --git a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
index dae8ae8ec55..f464b091fc8 100644
--- a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
@@ -56,6 +56,42 @@ static void injectGpuIndexOperations(Location loc, FuncOp kernelFunc) {
}
}
+// Move all constant arguments of the given kernel function into the function,
+// thereby reducing the number of kernel arguments.
+static gpu::LaunchFuncOp inlineConstants(FuncOp kernelFunc,
+ gpu::LaunchFuncOp launch) {
+ OpBuilder kernelBuilder(kernelFunc.getBody());
+ auto &firstBlock = kernelFunc.getBody().front();
+ llvm::SmallVector<Value *, 8> newLaunchArgs;
+ for (int i = launch.getNumKernelOperands() - 1; i >= 0; --i) {
+ auto operandOp = launch.getKernelOperand(i)->getDefiningOp();
+ auto constant = dyn_cast_or_null<ConstantOp>(operandOp);
+ if (!constant) {
+ newLaunchArgs.push_back(launch.getKernelOperand(i));
+ continue;
+ }
+ auto newConstant = kernelBuilder.clone(*operandOp);
+ firstBlock.getArgument(i)->replaceAllUsesWith(newConstant->getResult(0));
+ firstBlock.eraseArgument(i);
+ }
+ if (newLaunchArgs.size() != launch.getNumKernelOperands()) {
+ std::reverse(newLaunchArgs.begin(), newLaunchArgs.end());
+ OpBuilder LaunchBuilder(launch);
+ SmallVector<Type, 8> newArgumentTypes;
+ newArgumentTypes.reserve(firstBlock.getNumArguments());
+ for (auto value : firstBlock.getArguments()) {
+ newArgumentTypes.push_back(value->getType());
+ }
+ kernelFunc.setType(LaunchBuilder.getFunctionType(newArgumentTypes, {}));
+ auto newLaunch = LaunchBuilder.create<gpu::LaunchFuncOp>(
+ launch.getLoc(), kernelFunc, launch.getGridSizeOperandValues(),
+ launch.getBlockSizeOperandValues(), newLaunchArgs);
+ launch.erase();
+ return newLaunch;
+ }
+ return launch;
+}
+
// Outline the `gpu.launch` operation body into a kernel function. Replace
// `gpu.return` operations by `std.return` in the generated functions.
static FuncOp outlineKernelFunc(gpu::LaunchOp launchOp) {
@@ -80,14 +116,16 @@ static FuncOp outlineKernelFunc(gpu::LaunchOp launchOp) {
}
// Replace `gpu.launch` operations with an `gpu.launch_func` operation launching
-// `kernelFunc`.
+// `kernelFunc`. The kernel func contains the body of the `gpu.launch` with
+// constant region arguments inlined.
static void convertToLaunchFuncOp(gpu::LaunchOp &launchOp, FuncOp kernelFunc) {
OpBuilder builder(launchOp);
SmallVector<Value *, 4> kernelOperandValues(
launchOp.getKernelOperandValues());
- builder.create<gpu::LaunchFuncOp>(
+ auto launchFuncOp = builder.create<gpu::LaunchFuncOp>(
launchOp.getLoc(), kernelFunc, launchOp.getGridSizeOperandValues(),
launchOp.getBlockSizeOperandValues(), kernelOperandValues);
+ inlineConstants(kernelFunc, launchFuncOp);
launchOp.erase();
}
OpenPOWER on IntegriCloud