summaryrefslogtreecommitdiffstats
path: root/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
diff options
context:
space:
mode:
authorAlex Zinenko <zinenko@google.com>2019-11-25 07:59:52 -0800
committerA. Unique TensorFlower <gardener@tensorflow.org>2019-11-25 08:10:37 -0800
commitbf4692dc49728f9baaff5ed25a00a46b43988875 (patch)
tree83fd9463aaeb2985540e7c5e4b50dd556047d5cb /mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
parentd2284f1f0ba937ed0da8996957eb3e4557243f64 (diff)
downloadbcm5719-llvm-bf4692dc49728f9baaff5ed25a00a46b43988875.tar.gz
bcm5719-llvm-bf4692dc49728f9baaff5ed25a00a46b43988875.zip
Introduce gpu.func
Introduce a new function-like operation to the GPU dialect to provide a placeholder for the execution semantic description and to add support for GPU memory hierarchy. This aligns with the overall goal of the dialect to expose the common abstraction layer for GPU devices, in particular by providing an MLIR unit of semantics (i.e. an operation) for memory modeling. This proposal has been discussed in the mailing list: https://groups.google.com/a/tensorflow.org/d/msg/mlir/RfXNP7Hklsc/MBNN7KhjAgAJ As decided, the "convergence" aspect of the execution model will be factored out into a new discussion and therefore is not included in this commit. This commit only introduces the operation but does not hook it up with the remaining flow. The intention is to develop the new flow while keeping the old flow operational and do the switch in a simple, separately reversible commit. PiperOrigin-RevId: 282357599
Diffstat (limited to 'mlir/lib/Dialect/GPU/IR/GPUDialect.cpp')
-rw-r--r--mlir/lib/Dialect/GPU/IR/GPUDialect.cpp201
1 files changed, 195 insertions, 6 deletions
diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
index bfd094d6203..5fc1cade760 100644
--- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
+++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
@@ -45,7 +45,7 @@ bool GPUDialect::isKernel(Operation *op) {
GPUDialect::GPUDialect(MLIRContext *context)
: Dialect(getDialectName(), context) {
- addOperations<LaunchOp, LaunchFuncOp,
+ addOperations<LaunchOp, LaunchFuncOp, GPUFuncOp,
#define GET_OP_LIST
#include "mlir/Dialect/GPU/GPUOps.cpp.inc"
>();
@@ -93,7 +93,7 @@ LogicalResult GPUDialect::verifyOperationAttribute(Operation *op,
// Check that `launch_func` refers to a well-formed kernel function.
StringRef kernelName = launchOp.kernel();
Operation *kernelFunc = kernelModule.lookupSymbol(kernelName);
- auto kernelStdFunction = dyn_cast_or_null<FuncOp>(kernelFunc);
+ auto kernelStdFunction = dyn_cast_or_null<::mlir::FuncOp>(kernelFunc);
auto kernelLLVMFunction = dyn_cast_or_null<LLVM::LLVMFuncOp>(kernelFunc);
if (!kernelStdFunction && !kernelLLVMFunction)
return launchOp.emitOpError("kernel function '")
@@ -501,9 +501,10 @@ void LaunchOp::getCanonicalizationPatterns(OwningRewritePatternList &results,
//===----------------------------------------------------------------------===//
void LaunchFuncOp::build(Builder *builder, OperationState &result,
- FuncOp kernelFunc, Value *gridSizeX, Value *gridSizeY,
- Value *gridSizeZ, Value *blockSizeX, Value *blockSizeY,
- Value *blockSizeZ, ArrayRef<Value *> kernelOperands) {
+ ::mlir::FuncOp kernelFunc, Value *gridSizeX,
+ Value *gridSizeY, Value *gridSizeZ, Value *blockSizeX,
+ Value *blockSizeY, Value *blockSizeZ,
+ ArrayRef<Value *> kernelOperands) {
// Add grid and block sizes as op operands, followed by the data operands.
result.addOperands(
{gridSizeX, gridSizeY, gridSizeZ, blockSizeX, blockSizeY, blockSizeZ});
@@ -517,7 +518,7 @@ void LaunchFuncOp::build(Builder *builder, OperationState &result,
}
void LaunchFuncOp::build(Builder *builder, OperationState &result,
- FuncOp kernelFunc, KernelDim3 gridSize,
+ ::mlir::FuncOp kernelFunc, KernelDim3 gridSize,
KernelDim3 blockSize,
ArrayRef<Value *> kernelOperands) {
build(builder, result, kernelFunc, gridSize.x, gridSize.y, gridSize.z,
@@ -572,3 +573,191 @@ LogicalResult LaunchFuncOp::verify() {
return success();
}
+
+//===----------------------------------------------------------------------===//
+// GPUFuncOp
+//===----------------------------------------------------------------------===//
+
+void GPUFuncOp::build(Builder *builder, OperationState &result, StringRef name,
+ FunctionType type, ArrayRef<Type> workgroupAttributions,
+ ArrayRef<Type> privateAttributions,
+ ArrayRef<NamedAttribute> attrs) {
+ result.addAttribute(SymbolTable::getSymbolAttrName(),
+ builder->getStringAttr(name));
+ result.addAttribute(getTypeAttrName(), TypeAttr::get(type));
+ result.addAttribute(getNumWorkgroupAttributionsAttrName(),
+ builder->getI64IntegerAttr(workgroupAttributions.size()));
+ result.addAttributes(attrs);
+ Region *body = result.addRegion();
+ Block *entryBlock = new Block;
+ entryBlock->addArguments(type.getInputs());
+ entryBlock->addArguments(workgroupAttributions);
+ entryBlock->addArguments(privateAttributions);
+
+ body->getBlocks().push_back(entryBlock);
+}
+
+/// Parses a GPU function memory attribution.
+///
+/// memory-attribution ::= (`workgroup` `(` ssa-id-and-type-list `)`)?
+/// (`private` `(` ssa-id-and-type-list `)`)?
+///
+/// Note that this function parses only one of the two similar parts, with the
+/// keyword provided as argument.
+static ParseResult
+parseAttributions(OpAsmParser &parser, StringRef keyword,
+ SmallVectorImpl<OpAsmParser::OperandType> &args,
+ SmallVectorImpl<Type> &argTypes) {
+ // If we could not parse the keyword, just assume empty list and succeed.
+ if (failed(parser.parseOptionalKeyword(keyword)))
+ return success();
+
+ if (failed(parser.parseLParen()))
+ return failure();
+
+ // Early exit for an empty list.
+ if (succeeded(parser.parseOptionalRParen()))
+ return success();
+
+ do {
+ OpAsmParser::OperandType arg;
+ Type type;
+
+ if (parser.parseRegionArgument(arg) || parser.parseColonType(type))
+ return failure();
+
+ args.push_back(arg);
+ argTypes.push_back(type);
+ } while (succeeded(parser.parseOptionalComma()));
+
+ return parser.parseRParen();
+}
+
+/// Parses a GPU function.
+///
+/// <operation> ::= `gpu.func` symbol-ref-id `(` argument-list `)`
+/// (`->` function-result-list)? memory-attribution `kernel`?
+/// function-attributes? region
+ParseResult GPUFuncOp::parse(OpAsmParser &parser, OperationState &result) {
+ SmallVector<OpAsmParser::OperandType, 8> entryArgs;
+ SmallVector<SmallVector<NamedAttribute, 2>, 1> argAttrs;
+ SmallVector<SmallVector<NamedAttribute, 2>, 1> resultAttrs;
+ SmallVector<Type, 8> argTypes;
+ SmallVector<Type, 4> resultTypes;
+ bool isVariadic;
+
+ // Parse the function name.
+ StringAttr nameAttr;
+ if (parser.parseSymbolName(nameAttr, ::mlir::SymbolTable::getSymbolAttrName(),
+ result.attributes))
+ return failure();
+
+ auto signatureLocation = parser.getCurrentLocation();
+ if (failed(impl::parseFunctionSignature(
+ parser, /*allowVariadic=*/false, entryArgs, argTypes, argAttrs,
+ isVariadic, resultTypes, resultAttrs)))
+ return failure();
+
+ if (entryArgs.empty() && !argTypes.empty())
+ return parser.emitError(signatureLocation)
+ << "gpu.func requires named arguments";
+
+ // Construct the function type. More types will be added to the region, but
+ // not to the functiont type.
+ Builder &builder = parser.getBuilder();
+ auto type = builder.getFunctionType(argTypes, resultTypes);
+ result.addAttribute(getTypeAttrName(), TypeAttr::get(type));
+
+ // Parse workgroup memory attributions.
+ if (failed(parseAttributions(parser, getWorkgroupKeyword(), entryArgs,
+ argTypes)))
+ return failure();
+
+ // Store the number of operands we just parsed as the number of workgroup
+ // memory attributions.
+ unsigned numWorkgroupAttrs = argTypes.size() - type.getNumInputs();
+ result.addAttribute(getNumWorkgroupAttributionsAttrName(),
+ builder.getI64IntegerAttr(numWorkgroupAttrs));
+
+ // Parse private memory attributions.
+ if (failed(
+ parseAttributions(parser, getPrivateKeyword(), entryArgs, argTypes)))
+ return failure();
+
+ // Parse the kernel attribute if present.
+ if (succeeded(parser.parseOptionalKeyword(getKernelKeyword())))
+ result.addAttribute(GPUDialect::getKernelFuncAttrName(),
+ builder.getUnitAttr());
+
+ // Parse attributes.
+ if (failed(parser.parseOptionalAttrDictWithKeyword(result.attributes)))
+ return failure();
+ mlir::impl::addArgAndResultAttrs(builder, result, argAttrs, resultAttrs);
+
+ // Parse the region. If no argument names were provided, take all names
+ // (including those of attributions) from the entry block.
+ auto *body = result.addRegion();
+ return parser.parseRegion(*body, entryArgs, argTypes);
+}
+
+static void printAttributions(OpAsmPrinter &p, StringRef keyword,
+ ArrayRef<BlockArgument *> values) {
+ if (values.empty())
+ return;
+
+ p << ' ' << keyword << '(';
+ interleaveComma(values, p.getStream(),
+ [&p](BlockArgument *v) { p << *v << " : " << v->getType(); });
+ p << ')';
+}
+
+void GPUFuncOp::print(OpAsmPrinter &p) {
+ p << getOperationName() << ' ';
+ p.printSymbolName(getName());
+
+ FunctionType type = getType();
+ impl::printFunctionSignature(p, this->getOperation(), type.getInputs(),
+ /*isVariadic=*/false, type.getResults());
+
+ printAttributions(p, getWorkgroupKeyword(), getWorkgroupAttributions());
+ printAttributions(p, getPrivateKeyword(), getPrivateAttributions());
+ if (isKernel())
+ p << ' ' << getKernelKeyword();
+
+ impl::printFunctionAttributes(p, this->getOperation(), type.getNumInputs(),
+ type.getNumResults(),
+ {getNumWorkgroupAttributionsAttrName(),
+ GPUDialect::getKernelFuncAttrName()});
+ p.printRegion(getBody(), /*printEntryBlockArgs=*/false);
+}
+
+/// Hook for FunctionLike verifier.
+LogicalResult GPUFuncOp::verifyType() {
+ Type type = getTypeAttr().getValue();
+ if (!type.isa<FunctionType>())
+ return emitOpError("requires '" + getTypeAttrName() +
+ "' attribute of function type");
+ return success();
+}
+
+/// Verifies the body of the function.
+LogicalResult GPUFuncOp::verifyBody() {
+ unsigned numFuncArguments = getNumArguments();
+ unsigned numWorkgroupAttributions = getNumWorkgroupAttributions();
+ unsigned numBlockArguments = front().getNumArguments();
+ if (numBlockArguments < numFuncArguments + numWorkgroupAttributions)
+ return emitOpError() << "expected at least "
+ << numFuncArguments + numWorkgroupAttributions
+ << " arguments to body region";
+
+ ArrayRef<Type> funcArgTypes = getType().getInputs();
+ for (unsigned i = 0; i < numFuncArguments; ++i) {
+ Type blockArgType = front().getArgument(i)->getType();
+ if (funcArgTypes[i] != blockArgType)
+ return emitOpError() << "expected body region argument #" << i
+ << " to be of type " << funcArgTypes[i] << ", got "
+ << blockArgType;
+ }
+
+ return success();
+}
OpenPOWER on IntegriCloud