summaryrefslogtreecommitdiffstats
path: root/mlir/lib/Dialect/GPU/Transforms
diff options
context:
space:
mode:
authorStephan Herhut <herhut@google.com>2019-11-20 02:59:02 -0800
committerA. Unique TensorFlower <gardener@tensorflow.org>2019-11-20 02:59:35 -0800
commitabb626686d47ef3809ad800f8a25e68c56e79a7e (patch)
treea9e3687a8095f09fda547734ce4781a267c2e9ef /mlir/lib/Dialect/GPU/Transforms
parent88368a19aa5b13b9a08f187a99ed0d04d17be653 (diff)
downloadbcm5719-llvm-abb626686d47ef3809ad800f8a25e68c56e79a7e.tar.gz
bcm5719-llvm-abb626686d47ef3809ad800f8a25e68c56e79a7e.zip
Extend kernel outlining to also consider dim worth inlining.
PiperOrigin-RevId: 281483447
Diffstat (limited to 'mlir/lib/Dialect/GPU/Transforms')
-rw-r--r--mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp30
1 files changed, 21 insertions, 9 deletions
diff --git a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
index 420b2340bc9..235a74ba1c3 100644
--- a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
@@ -56,22 +56,34 @@ static void injectGpuIndexOperations(Location loc, FuncOp kernelFunc) {
}
}
-// Move all constant arguments of the given kernel function into the function,
-// thereby reducing the number of kernel arguments.
-static gpu::LaunchFuncOp inlineConstants(FuncOp kernelFunc,
- gpu::LaunchFuncOp launch) {
+static bool isInliningBeneficiary(Operation *op) {
+ return isa<ConstantOp>(op) || isa<DimOp>(op);
+}
+
+// Move arguments of the given kernel function into the function if this reduces
+// the number of kernel arguments.
+static gpu::LaunchFuncOp inlineBeneficiaryOps(FuncOp kernelFunc,
+ gpu::LaunchFuncOp launch) {
OpBuilder kernelBuilder(kernelFunc.getBody());
auto &firstBlock = kernelFunc.getBody().front();
llvm::SmallVector<Value *, 8> newLaunchArgs;
+ BlockAndValueMapping map;
+ for (int i = 0, e = launch.getNumKernelOperands(); i < e; ++i) {
+ map.map(launch.getKernelOperand(i), kernelFunc.getArgument(i));
+ }
for (int i = launch.getNumKernelOperands() - 1; i >= 0; --i) {
auto operandOp = launch.getKernelOperand(i)->getDefiningOp();
- auto constant = dyn_cast_or_null<ConstantOp>(operandOp);
- if (!constant) {
+ if (!operandOp || !isInliningBeneficiary(operandOp)) {
newLaunchArgs.push_back(launch.getKernelOperand(i));
continue;
}
- auto newConstant = kernelBuilder.clone(*operandOp);
- firstBlock.getArgument(i)->replaceAllUsesWith(newConstant->getResult(0));
+ // Only inline operations that do not create new arguments.
+ if (!llvm::all_of(operandOp->getOperands(),
+ [map](Value *value) { return map.contains(value); })) {
+ continue;
+ }
+ auto clone = kernelBuilder.clone(*operandOp, map);
+ firstBlock.getArgument(i)->replaceAllUsesWith(clone->getResult(0));
firstBlock.eraseArgument(i);
}
if (newLaunchArgs.size() == launch.getNumKernelOperands())
@@ -125,7 +137,7 @@ static void convertToLaunchFuncOp(gpu::LaunchOp &launchOp, FuncOp kernelFunc) {
auto launchFuncOp = builder.create<gpu::LaunchFuncOp>(
launchOp.getLoc(), kernelFunc, launchOp.getGridSizeOperandValues(),
launchOp.getBlockSizeOperandValues(), kernelOperandValues);
- inlineConstants(kernelFunc, launchFuncOp);
+ inlineBeneficiaryOps(kernelFunc, launchFuncOp);
launchOp.erase();
}
OpenPOWER on IntegriCloud