summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlex Zinenko <zinenko@google.com>2019-12-06 14:28:54 -0800
committerA. Unique TensorFlower <gardener@tensorflow.org>2019-12-06 14:29:25 -0800
commite96150eb46d8c381f11a7333f0384aad0fc8d1b6 (patch)
treefb1a8ad40a7ddec80aadfa14d20759ea3ff879d0
parent883f5557263e5543c2a8cf7621be885760b47d5d (diff)
downloadbcm5719-llvm-e96150eb46d8c381f11a7333f0384aad0fc8d1b6.tar.gz
bcm5719-llvm-e96150eb46d8c381f11a7333f0384aad0fc8d1b6.zip
Replace custom getBody method with an ODS-generated in gpu::LaunchOp
PiperOrigin-RevId: 284262981
-rw-r--r--mlir/include/mlir/Dialect/GPU/GPUOps.td3
-rw-r--r--mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp14
-rw-r--r--mlir/lib/Dialect/GPU/IR/GPUDialect.cpp38
-rw-r--r--mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp2
4 files changed, 26 insertions, 31 deletions
diff --git a/mlir/include/mlir/Dialect/GPU/GPUOps.td b/mlir/include/mlir/Dialect/GPU/GPUOps.td
index 9b4e21800bd..a64625f9231 100644
--- a/mlir/include/mlir/Dialect/GPU/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/GPUOps.td
@@ -275,9 +275,6 @@ def GPU_LaunchOp : GPU_Op<"launch", [IsolatedFromAbove]>,
let hasCanonicalizer = 1;
let extraClassDeclaration = [{
- /// Get the kernel region.
- Region &getBody();
-
/// Get the SSA values corresponding to kernel block identifiers.
KernelDim3 getBlockIds();
/// Get the SSA values corresponding to kernel thread identifiers.
diff --git a/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp b/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp
index eb0b0d5931a..c269dc5c45a 100644
--- a/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp
+++ b/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp
@@ -313,7 +313,7 @@ LogicalResult createLaunchBody(OpBuilder &builder, OpTy rootForOp,
gpu::LaunchOp launchOp, unsigned numBlockDims,
unsigned numThreadDims) {
OpBuilder::InsertionGuard bodyInsertionGuard(builder);
- builder.setInsertionPointToEnd(&launchOp.getBody().front());
+ builder.setInsertionPointToEnd(&launchOp.body().front());
auto returnOp = builder.create<gpu::ReturnOp>(launchOp.getLoc());
rootForOp.getOperation()->moveBefore(returnOp);
@@ -389,7 +389,7 @@ LogicalResult createLaunchFromOp(OpTy rootForOp,
llvm::zip_first(valuesToForward, launchOp.getKernelArguments())) {
Value *from = std::get<0>(pair);
Value *to = std::get<1>(pair);
- replaceAllUsesInRegionWith(from, to, launchOp.getBody());
+ replaceAllUsesInRegionWith(from, to, launchOp.body());
}
return success();
}
@@ -444,15 +444,15 @@ void LoopToGpuConverter::createLaunch(OpTy rootForOp, OpTy innermostForOp,
terminator.erase();
builder.setInsertionPointToEnd(innermostForOp.getBody());
builder.create<gpu::ReturnOp>(terminatorLoc);
- launchOp.getBody().front().getOperations().splice(
- launchOp.getBody().front().begin(),
+ launchOp.body().front().getOperations().splice(
+ launchOp.body().front().begin(),
innermostForOp.getBody()->getOperations());
// Remap the loop iterators to use block/thread identifiers instead. Loops
// may iterate from LB with step S whereas GPU thread/block ids always iterate
// from 0 to N with step 1. Therefore, loop induction variables are replaced
// with (gpu-thread/block-id * S) + LB.
- builder.setInsertionPointToStart(&launchOp.getBody().front());
+ builder.setInsertionPointToStart(&launchOp.body().front());
auto lbArgumentIt = std::next(launchOp.getKernelArguments().begin(),
originallyForwardedValues);
auto stepArgumentIt = std::next(lbArgumentIt, lbs.size());
@@ -469,7 +469,7 @@ void LoopToGpuConverter::createLaunch(OpTy rootForOp, OpTy innermostForOp,
builder.create<AddIOp>(rootForOp.getLoc(), *lbArgumentIt, id);
en.value()->replaceAllUsesWith(ivReplacement);
replaceAllUsesInRegionWith(steps[en.index()], *stepArgumentIt,
- launchOp.getBody());
+ launchOp.body());
std::advance(lbArgumentIt, 1);
std::advance(stepArgumentIt, 1);
}
@@ -481,7 +481,7 @@ void LoopToGpuConverter::createLaunch(OpTy rootForOp, OpTy innermostForOp,
llvm::zip_first(valuesToForward, launchOp.getKernelArguments())) {
Value *from = std::get<0>(pair);
Value *to = std::get<1>(pair);
- replaceAllUsesInRegionWith(from, to, launchOp.getBody());
+ replaceAllUsesInRegionWith(from, to, launchOp.body());
}
// We are done and can erase the original outermost loop.
diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
index 87b170b6da8..95775f588b1 100644
--- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
+++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
@@ -197,29 +197,27 @@ void LaunchOp::build(Builder *builder, OperationState &result, Value *gridSizeX,
kernelRegion->push_back(body);
}
-Region &LaunchOp::getBody() { return getOperation()->getRegion(0); }
-
KernelDim3 LaunchOp::getBlockIds() {
- assert(!getBody().getBlocks().empty() && "FuncOp body must not be empty.");
- auto args = getBody().getBlocks().front().getArguments();
+ assert(!body().getBlocks().empty() && "FuncOp body must not be empty.");
+ auto args = body().getBlocks().front().getArguments();
return KernelDim3{args[0], args[1], args[2]};
}
KernelDim3 LaunchOp::getThreadIds() {
- assert(!getBody().getBlocks().empty() && "FuncOp body must not be empty.");
- auto args = getBody().getBlocks().front().getArguments();
+ assert(!body().getBlocks().empty() && "FuncOp body must not be empty.");
+ auto args = body().getBlocks().front().getArguments();
return KernelDim3{args[3], args[4], args[5]};
}
KernelDim3 LaunchOp::getGridSize() {
- assert(!getBody().getBlocks().empty() && "FuncOp body must not be empty.");
- auto args = getBody().getBlocks().front().getArguments();
+ assert(!body().getBlocks().empty() && "FuncOp body must not be empty.");
+ auto args = body().getBlocks().front().getArguments();
return KernelDim3{args[6], args[7], args[8]};
}
KernelDim3 LaunchOp::getBlockSize() {
- assert(!getBody().getBlocks().empty() && "FuncOp body must not be empty.");
- auto args = getBody().getBlocks().front().getArguments();
+ assert(!body().getBlocks().empty() && "FuncOp body must not be empty.");
+ auto args = body().getBlocks().front().getArguments();
return KernelDim3{args[9], args[10], args[11]};
}
@@ -240,7 +238,7 @@ KernelDim3 LaunchOp::getBlockSizeOperandValues() {
}
llvm::iterator_range<Block::args_iterator> LaunchOp::getKernelArguments() {
- auto args = getBody().getBlocks().front().getArguments();
+ auto args = body().getBlocks().front().getArguments();
return llvm::drop_begin(args, LaunchOp::kNumConfigRegionAttributes);
}
@@ -248,8 +246,8 @@ LogicalResult verify(LaunchOp op) {
// Kernel launch takes kNumConfigOperands leading operands for grid/block
// sizes and transforms them into kNumConfigRegionAttributes region arguments
// for block/thread identifiers and grid/block sizes.
- if (!op.getBody().empty()) {
- Block &entryBlock = op.getBody().front();
+ if (!op.body().empty()) {
+ Block &entryBlock = op.body().front();
if (entryBlock.getNumArguments() !=
LaunchOp::kNumConfigOperands + op.getNumOperands())
return op.emitOpError("unexpected number of region arguments");
@@ -257,7 +255,7 @@ LogicalResult verify(LaunchOp op) {
// Block terminators without successors are expected to exit the kernel region
// and must be `gpu.launch`.
- for (Block &block : op.getBody()) {
+ for (Block &block : op.body()) {
if (block.empty())
continue;
if (block.back().getNumSuccessors() != 0)
@@ -304,13 +302,13 @@ void printLaunchOp(OpAsmPrinter &p, LaunchOp op) {
operands = operands.drop_front(LaunchOp::kNumConfigOperands);
// Print the data argument remapping.
- if (!op.getBody().empty() && !operands.empty()) {
+ if (!op.body().empty() && !operands.empty()) {
p << ' ' << op.getArgsKeyword() << '(';
for (unsigned i = 0, e = operands.size(); i < e; ++i) {
if (i != 0)
p << ", ";
- p << *op.getBody().front().getArgument(
- LaunchOp::kNumConfigRegionAttributes + i)
+ p << *op.body().front().getArgument(LaunchOp::kNumConfigRegionAttributes +
+ i)
<< " = " << *operands[i];
}
p << ") ";
@@ -326,7 +324,7 @@ void printLaunchOp(OpAsmPrinter &p, LaunchOp op) {
}
}
- p.printRegion(op.getBody(), /*printEntryBlockArgs=*/false);
+ p.printRegion(op.body(), /*printEntryBlockArgs=*/false);
p.printOptionalAttrDict(op.getAttrs());
}
@@ -438,7 +436,7 @@ ParseResult parseLaunchOp(OpAsmParser &parser, OperationState &result) {
}
void LaunchOp::eraseKernelArgument(unsigned index) {
- Block &entryBlock = getBody().front();
+ Block &entryBlock = body().front();
assert(index < entryBlock.getNumArguments() - kNumConfigRegionAttributes &&
"kernel argument index overflow");
entryBlock.eraseArgument(kNumConfigRegionAttributes + index);
@@ -453,7 +451,7 @@ class PropagateConstantBounds : public OpRewritePattern<LaunchOp> {
PatternMatchResult matchAndRewrite(LaunchOp launchOp,
PatternRewriter &rewriter) const override {
auto origInsertionPoint = rewriter.saveInsertionPoint();
- rewriter.setInsertionPointToStart(&launchOp.getBody().front());
+ rewriter.setInsertionPointToStart(&launchOp.body().front());
// Traverse operands passed to kernel and check if some of them are known
// constants. If so, clone the constant operation inside the kernel region
diff --git a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
index 81d585219a1..fbcd18b9535 100644
--- a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
@@ -115,7 +115,7 @@ static FuncOp outlineKernelFunc(gpu::LaunchOp launchOp) {
std::string kernelFuncName =
Twine(launchOp.getParentOfType<FuncOp>().getName(), "_kernel").str();
FuncOp outlinedFunc = FuncOp::create(loc, kernelFuncName, type);
- outlinedFunc.getBody().takeBody(launchOp.getBody());
+ outlinedFunc.getBody().takeBody(launchOp.body());
Builder builder(launchOp.getContext());
outlinedFunc.setAttr(gpu::GPUDialect::getKernelFuncAttrName(),
builder.getUnitAttr());
OpenPOWER on IntegriCloud