diff options
| author | Stephan Herhut <herhut@google.com> | 2019-11-20 02:59:02 -0800 |
|---|---|---|
| committer | A. Unique TensorFlower <gardener@tensorflow.org> | 2019-11-20 02:59:35 -0800 |
| commit | abb626686d47ef3809ad800f8a25e68c56e79a7e (patch) | |
| tree | a9e3687a8095f09fda547734ce4781a267c2e9ef /mlir/lib/Dialect/GPU/Transforms | |
| parent | 88368a19aa5b13b9a08f187a99ed0d04d17be653 (diff) | |
| download | bcm5719-llvm-abb626686d47ef3809ad800f8a25e68c56e79a7e.tar.gz bcm5719-llvm-abb626686d47ef3809ad800f8a25e68c56e79a7e.zip | |
Extend kernel outlining to also consider dim worth inlining.
PiperOrigin-RevId: 281483447
Diffstat (limited to 'mlir/lib/Dialect/GPU/Transforms')
| -rw-r--r-- | mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp | 30 |
1 files changed, 21 insertions, 9 deletions
diff --git a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp index 420b2340bc9..235a74ba1c3 100644 --- a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp @@ -56,22 +56,34 @@ static void injectGpuIndexOperations(Location loc, FuncOp kernelFunc) { } } -// Move all constant arguments of the given kernel function into the function, -// thereby reducing the number of kernel arguments. -static gpu::LaunchFuncOp inlineConstants(FuncOp kernelFunc, - gpu::LaunchFuncOp launch) { +static bool isInliningBeneficiary(Operation *op) { + return isa<ConstantOp>(op) || isa<DimOp>(op); +} + +// Move arguments of the given kernel function into the function if this reduces +// the number of kernel arguments. +static gpu::LaunchFuncOp inlineBeneficiaryOps(FuncOp kernelFunc, + gpu::LaunchFuncOp launch) { OpBuilder kernelBuilder(kernelFunc.getBody()); auto &firstBlock = kernelFunc.getBody().front(); llvm::SmallVector<Value *, 8> newLaunchArgs; + BlockAndValueMapping map; + for (int i = 0, e = launch.getNumKernelOperands(); i < e; ++i) { + map.map(launch.getKernelOperand(i), kernelFunc.getArgument(i)); + } for (int i = launch.getNumKernelOperands() - 1; i >= 0; --i) { auto operandOp = launch.getKernelOperand(i)->getDefiningOp(); - auto constant = dyn_cast_or_null<ConstantOp>(operandOp); - if (!constant) { + if (!operandOp || !isInliningBeneficiary(operandOp)) { newLaunchArgs.push_back(launch.getKernelOperand(i)); continue; } - auto newConstant = kernelBuilder.clone(*operandOp); - firstBlock.getArgument(i)->replaceAllUsesWith(newConstant->getResult(0)); + // Only inline operations that do not create new arguments. + if (!llvm::all_of(operandOp->getOperands(), + [map](Value *value) { return map.contains(value); })) { + continue; + } + auto clone = kernelBuilder.clone(*operandOp, map); + firstBlock.getArgument(i)->replaceAllUsesWith(clone->getResult(0)); firstBlock.eraseArgument(i); } if (newLaunchArgs.size() == launch.getNumKernelOperands()) @@ -125,7 +137,7 @@ static void convertToLaunchFuncOp(gpu::LaunchOp &launchOp, FuncOp kernelFunc) { auto launchFuncOp = builder.create<gpu::LaunchFuncOp>( launchOp.getLoc(), kernelFunc, launchOp.getGridSizeOperandValues(), launchOp.getBlockSizeOperandValues(), kernelOperandValues); - inlineConstants(kernelFunc, launchFuncOp); + inlineBeneficiaryOps(kernelFunc, launchFuncOp); launchOp.erase(); } |

