diff options
author | Alex Zinenko <zinenko@google.com> | 2019-12-06 14:28:54 -0800 |
---|---|---|
committer | A. Unique TensorFlower <gardener@tensorflow.org> | 2019-12-06 14:29:25 -0800 |
commit | e96150eb46d8c381f11a7333f0384aad0fc8d1b6 (patch) | |
tree | fb1a8ad40a7ddec80aadfa14d20759ea3ff879d0 | |
parent | 883f5557263e5543c2a8cf7621be885760b47d5d (diff) | |
download | bcm5719-llvm-e96150eb46d8c381f11a7333f0384aad0fc8d1b6.tar.gz bcm5719-llvm-e96150eb46d8c381f11a7333f0384aad0fc8d1b6.zip |
Replace custom getBody method with an ODS-generated in gpu::LaunchOp
PiperOrigin-RevId: 284262981
-rw-r--r-- | mlir/include/mlir/Dialect/GPU/GPUOps.td | 3 | ||||
-rw-r--r-- | mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp | 14 | ||||
-rw-r--r-- | mlir/lib/Dialect/GPU/IR/GPUDialect.cpp | 38 | ||||
-rw-r--r-- | mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp | 2 |
4 files changed, 26 insertions, 31 deletions
diff --git a/mlir/include/mlir/Dialect/GPU/GPUOps.td b/mlir/include/mlir/Dialect/GPU/GPUOps.td index 9b4e21800bd..a64625f9231 100644 --- a/mlir/include/mlir/Dialect/GPU/GPUOps.td +++ b/mlir/include/mlir/Dialect/GPU/GPUOps.td @@ -275,9 +275,6 @@ def GPU_LaunchOp : GPU_Op<"launch", [IsolatedFromAbove]>, let hasCanonicalizer = 1; let extraClassDeclaration = [{ - /// Get the kernel region. - Region &getBody(); - /// Get the SSA values corresponding to kernel block identifiers. KernelDim3 getBlockIds(); /// Get the SSA values corresponding to kernel thread identifiers. diff --git a/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp b/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp index eb0b0d5931a..c269dc5c45a 100644 --- a/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp +++ b/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp @@ -313,7 +313,7 @@ LogicalResult createLaunchBody(OpBuilder &builder, OpTy rootForOp, gpu::LaunchOp launchOp, unsigned numBlockDims, unsigned numThreadDims) { OpBuilder::InsertionGuard bodyInsertionGuard(builder); - builder.setInsertionPointToEnd(&launchOp.getBody().front()); + builder.setInsertionPointToEnd(&launchOp.body().front()); auto returnOp = builder.create<gpu::ReturnOp>(launchOp.getLoc()); rootForOp.getOperation()->moveBefore(returnOp); @@ -389,7 +389,7 @@ LogicalResult createLaunchFromOp(OpTy rootForOp, llvm::zip_first(valuesToForward, launchOp.getKernelArguments())) { Value *from = std::get<0>(pair); Value *to = std::get<1>(pair); - replaceAllUsesInRegionWith(from, to, launchOp.getBody()); + replaceAllUsesInRegionWith(from, to, launchOp.body()); } return success(); } @@ -444,15 +444,15 @@ void LoopToGpuConverter::createLaunch(OpTy rootForOp, OpTy innermostForOp, terminator.erase(); builder.setInsertionPointToEnd(innermostForOp.getBody()); builder.create<gpu::ReturnOp>(terminatorLoc); - launchOp.getBody().front().getOperations().splice( - launchOp.getBody().front().begin(), + launchOp.body().front().getOperations().splice( + launchOp.body().front().begin(), innermostForOp.getBody()->getOperations()); // Remap the loop iterators to use block/thread identifiers instead. Loops // may iterate from LB with step S whereas GPU thread/block ids always iterate // from 0 to N with step 1. Therefore, loop induction variables are replaced // with (gpu-thread/block-id * S) + LB. - builder.setInsertionPointToStart(&launchOp.getBody().front()); + builder.setInsertionPointToStart(&launchOp.body().front()); auto lbArgumentIt = std::next(launchOp.getKernelArguments().begin(), originallyForwardedValues); auto stepArgumentIt = std::next(lbArgumentIt, lbs.size()); @@ -469,7 +469,7 @@ void LoopToGpuConverter::createLaunch(OpTy rootForOp, OpTy innermostForOp, builder.create<AddIOp>(rootForOp.getLoc(), *lbArgumentIt, id); en.value()->replaceAllUsesWith(ivReplacement); replaceAllUsesInRegionWith(steps[en.index()], *stepArgumentIt, - launchOp.getBody()); + launchOp.body()); std::advance(lbArgumentIt, 1); std::advance(stepArgumentIt, 1); } @@ -481,7 +481,7 @@ void LoopToGpuConverter::createLaunch(OpTy rootForOp, OpTy innermostForOp, llvm::zip_first(valuesToForward, launchOp.getKernelArguments())) { Value *from = std::get<0>(pair); Value *to = std::get<1>(pair); - replaceAllUsesInRegionWith(from, to, launchOp.getBody()); + replaceAllUsesInRegionWith(from, to, launchOp.body()); } // We are done and can erase the original outermost loop. diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp index 87b170b6da8..95775f588b1 100644 --- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp +++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp @@ -197,29 +197,27 @@ void LaunchOp::build(Builder *builder, OperationState &result, Value *gridSizeX, kernelRegion->push_back(body); } -Region &LaunchOp::getBody() { return getOperation()->getRegion(0); } - KernelDim3 LaunchOp::getBlockIds() { - assert(!getBody().getBlocks().empty() && "FuncOp body must not be empty."); - auto args = getBody().getBlocks().front().getArguments(); + assert(!body().getBlocks().empty() && "FuncOp body must not be empty."); + auto args = body().getBlocks().front().getArguments(); return KernelDim3{args[0], args[1], args[2]}; } KernelDim3 LaunchOp::getThreadIds() { - assert(!getBody().getBlocks().empty() && "FuncOp body must not be empty."); - auto args = getBody().getBlocks().front().getArguments(); + assert(!body().getBlocks().empty() && "FuncOp body must not be empty."); + auto args = body().getBlocks().front().getArguments(); return KernelDim3{args[3], args[4], args[5]}; } KernelDim3 LaunchOp::getGridSize() { - assert(!getBody().getBlocks().empty() && "FuncOp body must not be empty."); - auto args = getBody().getBlocks().front().getArguments(); + assert(!body().getBlocks().empty() && "FuncOp body must not be empty."); + auto args = body().getBlocks().front().getArguments(); return KernelDim3{args[6], args[7], args[8]}; } KernelDim3 LaunchOp::getBlockSize() { - assert(!getBody().getBlocks().empty() && "FuncOp body must not be empty."); - auto args = getBody().getBlocks().front().getArguments(); + assert(!body().getBlocks().empty() && "FuncOp body must not be empty."); + auto args = body().getBlocks().front().getArguments(); return KernelDim3{args[9], args[10], args[11]}; } @@ -240,7 +238,7 @@ KernelDim3 LaunchOp::getBlockSizeOperandValues() { } llvm::iterator_range<Block::args_iterator> LaunchOp::getKernelArguments() { - auto args = getBody().getBlocks().front().getArguments(); + auto args = body().getBlocks().front().getArguments(); return llvm::drop_begin(args, LaunchOp::kNumConfigRegionAttributes); } @@ -248,8 +246,8 @@ LogicalResult verify(LaunchOp op) { // Kernel launch takes kNumConfigOperands leading operands for grid/block // sizes and transforms them into kNumConfigRegionAttributes region arguments // for block/thread identifiers and grid/block sizes. - if (!op.getBody().empty()) { - Block &entryBlock = op.getBody().front(); + if (!op.body().empty()) { + Block &entryBlock = op.body().front(); if (entryBlock.getNumArguments() != LaunchOp::kNumConfigOperands + op.getNumOperands()) return op.emitOpError("unexpected number of region arguments"); @@ -257,7 +255,7 @@ LogicalResult verify(LaunchOp op) { // Block terminators without successors are expected to exit the kernel region // and must be `gpu.launch`. - for (Block &block : op.getBody()) { + for (Block &block : op.body()) { if (block.empty()) continue; if (block.back().getNumSuccessors() != 0) @@ -304,13 +302,13 @@ void printLaunchOp(OpAsmPrinter &p, LaunchOp op) { operands = operands.drop_front(LaunchOp::kNumConfigOperands); // Print the data argument remapping. - if (!op.getBody().empty() && !operands.empty()) { + if (!op.body().empty() && !operands.empty()) { p << ' ' << op.getArgsKeyword() << '('; for (unsigned i = 0, e = operands.size(); i < e; ++i) { if (i != 0) p << ", "; - p << *op.getBody().front().getArgument( - LaunchOp::kNumConfigRegionAttributes + i) + p << *op.body().front().getArgument(LaunchOp::kNumConfigRegionAttributes + + i) << " = " << *operands[i]; } p << ") "; @@ -326,7 +324,7 @@ void printLaunchOp(OpAsmPrinter &p, LaunchOp op) { } } - p.printRegion(op.getBody(), /*printEntryBlockArgs=*/false); + p.printRegion(op.body(), /*printEntryBlockArgs=*/false); p.printOptionalAttrDict(op.getAttrs()); } @@ -438,7 +436,7 @@ ParseResult parseLaunchOp(OpAsmParser &parser, OperationState &result) { } void LaunchOp::eraseKernelArgument(unsigned index) { - Block &entryBlock = getBody().front(); + Block &entryBlock = body().front(); assert(index < entryBlock.getNumArguments() - kNumConfigRegionAttributes && "kernel argument index overflow"); entryBlock.eraseArgument(kNumConfigRegionAttributes + index); @@ -453,7 +451,7 @@ class PropagateConstantBounds : public OpRewritePattern<LaunchOp> { PatternMatchResult matchAndRewrite(LaunchOp launchOp, PatternRewriter &rewriter) const override { auto origInsertionPoint = rewriter.saveInsertionPoint(); - rewriter.setInsertionPointToStart(&launchOp.getBody().front()); + rewriter.setInsertionPointToStart(&launchOp.body().front()); // Traverse operands passed to kernel and check if some of them are known // constants. If so, clone the constant operation inside the kernel region diff --git a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp index 81d585219a1..fbcd18b9535 100644 --- a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp @@ -115,7 +115,7 @@ static FuncOp outlineKernelFunc(gpu::LaunchOp launchOp) { std::string kernelFuncName = Twine(launchOp.getParentOfType<FuncOp>().getName(), "_kernel").str(); FuncOp outlinedFunc = FuncOp::create(loc, kernelFuncName, type); - outlinedFunc.getBody().takeBody(launchOp.getBody()); + outlinedFunc.getBody().takeBody(launchOp.body()); Builder builder(launchOp.getContext()); outlinedFunc.setAttr(gpu::GPUDialect::getKernelFuncAttrName(), builder.getUnitAttr()); |