summaryrefslogtreecommitdiffstats
path: root/mlir/lib
diff options
context:
space:
mode:
authorUday Bondhugula <bondhugula@google.com>2018-10-12 14:54:54 -0700
committerjpienaar <jpienaar@google.com>2019-03-29 13:29:21 -0700
commit86eac4618c06a54c1f6d95a8c9d94b15dda5e35b (patch)
treea6574eb104b867e24814319c44b26d953527f4c2 /mlir/lib
parent9e3b928e32285bf47d366d5685d2c65e616544cb (diff)
downloadbcm5719-llvm-86eac4618c06a54c1f6d95a8c9d94b15dda5e35b.tar.gz
bcm5719-llvm-86eac4618c06a54c1f6d95a8c9d94b15dda5e35b.zip
Create private exclusive / single use affine computation slice for an op stmt.
- add util to create a private / exclusive / single use affine computation slice for an op stmt (see method doc comment); a single multi-result affine_apply op is prepended to the op stmt to provide all results needed for its operands as a function of loop iterators and symbols. - use it for DMA pipelining (to create private slices for DMA start stmt's); resolve TODOs/feature request (b/117159533) - move createComposedAffineApplyOp to Transforms/Utils; free it from taking a memref as input / generalize it. PiperOrigin-RevId: 216926818
Diffstat (limited to 'mlir/lib')
-rw-r--r--mlir/lib/Analysis/AffineAnalysis.cpp6
-rw-r--r--mlir/lib/Transforms/ComposeAffineMaps.cpp53
-rw-r--r--mlir/lib/Transforms/PipelineDataTransfer.cpp96
-rw-r--r--mlir/lib/Transforms/Utils.cpp147
4 files changed, 177 insertions, 125 deletions
diff --git a/mlir/lib/Analysis/AffineAnalysis.cpp b/mlir/lib/Analysis/AffineAnalysis.cpp
index f332836180a..ee1641b575e 100644
--- a/mlir/lib/Analysis/AffineAnalysis.cpp
+++ b/mlir/lib/Analysis/AffineAnalysis.cpp
@@ -304,8 +304,8 @@ AffineExpr mlir::simplifyAffineExpr(AffineExpr expr, unsigned numDims,
// TODO(andydavis) Add a method to AffineApplyOp which forward substitutes
// the AffineApplyOp into any user AffineApplyOps.
void mlir::getReachableAffineApplyOps(
- const SmallVector<MLValue *, 4> &operands,
- SmallVector<OperationStmt *, 4> *affineApplyOps) {
+ ArrayRef<MLValue *> operands,
+ SmallVectorImpl<OperationStmt *> &affineApplyOps) {
struct State {
// The ssa value for this node in the DFS traversal.
MLValue *value;
@@ -329,7 +329,7 @@ void mlir::getReachableAffineApplyOps(
if (auto affineApplyOp = opStmt->getAs<AffineApplyOp>()) {
if (state.operandIndex == 0) {
// Pre-Visit: Add 'opStmt' to reachable sequence.
- affineApplyOps->push_back(opStmt);
+ affineApplyOps.push_back(opStmt);
}
if (state.operandIndex < opStmt->getNumOperands()) {
// Visit: Add next 'affineApplyOp' operand to worklist.
diff --git a/mlir/lib/Transforms/ComposeAffineMaps.cpp b/mlir/lib/Transforms/ComposeAffineMaps.cpp
index 0aa593202e1..9e44dab1bff 100644
--- a/mlir/lib/Transforms/ComposeAffineMaps.cpp
+++ b/mlir/lib/Transforms/ComposeAffineMaps.cpp
@@ -66,6 +66,7 @@
#include "mlir/StandardOps/StandardOps.h"
#include "mlir/Transforms/Pass.h"
#include "mlir/Transforms/Passes.h"
+#include "mlir/Transforms/Utils.h"
#include "llvm/Support/CommandLine.h"
using namespace mlir;
@@ -90,46 +91,6 @@ MLFunctionPass *mlir::createComposeAffineMapsPass() {
return new ComposeAffineMaps();
}
-// Creates and inserts into 'builder' a new AffineApplyOp with the number of
-// results equal to the rank of 'memrefType'. The AffineApplyOp is composed
-// with all other AffineApplyOps reachable from input paramter 'operands'.
-// The final results of the composed AffineApplyOp are returned in output
-// paramter 'results'.
-static void createComposedAffineApplyOp(
- MLFuncBuilder *builder, Location *loc, MemRefType *memrefType,
- const SmallVector<MLValue *, 4> &indices,
- const SmallVector<OperationStmt *, 4> &affineApplyOps,
- SmallVector<SSAValue *, 4> *results) {
- // Get rank of memref type.
- unsigned rank = memrefType->getRank();
- assert(indices.size() == rank);
- // Create identity map with same number of dimensions as 'memrefType'.
- auto map = builder->getMultiDimIdentityMap(rank);
- // Initialize AffineValueMap with identity map.
- AffineValueMap valueMap(map, indices);
-
- for (auto *opStmt : affineApplyOps) {
- assert(opStmt->is<AffineApplyOp>());
- auto affineApplyOp = opStmt->getAs<AffineApplyOp>();
- // Forward substitute 'affineApplyOp' into 'valueMap'.
- valueMap.forwardSubstitute(*affineApplyOp);
- }
- // Compose affine maps from all ancestor AffineApplyOps.
- // Create new AffineApplyOp from 'valueMap'.
- unsigned numOperands = valueMap.getNumOperands();
- SmallVector<SSAValue *, 4> operands(numOperands);
- for (unsigned i = 0; i < numOperands; ++i) {
- operands[i] = valueMap.getOperand(i);
- }
- // Create new AffineApplyOp based on 'valueMap'.
- auto affineApplyOp =
- builder->create<AffineApplyOp>(loc, valueMap.getAffineMap(), operands);
- results->resize(rank);
- for (unsigned i = 0; i < rank; ++i) {
- (*results)[i] = affineApplyOp->getResult(i);
- }
-}
-
PassResult ComposeAffineMaps::runOnMLFunction(MLFunction *f) {
// Gather all loads, stores and affine apply ops.
struct OpGatherer : public StmtWalker<OpGatherer> {
@@ -170,14 +131,14 @@ PassResult ComposeAffineMaps::runOnMLFunction(MLFunction *f) {
// Gather sequnce of AffineApplyOps reachable from 'indices'.
SmallVector<OperationStmt *, 4> affineApplyOps;
- getReachableAffineApplyOps(indices, &affineApplyOps);
+ getReachableAffineApplyOps(indices, affineApplyOps);
// Skip transforming 'loadOp' if there are no affine maps to compose.
if (affineApplyOps.size() <= 1)
continue;
SmallVector<SSAValue *, 4> results;
- createComposedAffineApplyOp(&builder, opStmt->getLoc(), memrefType, indices,
- affineApplyOps, &results);
+ createComposedAffineApplyOp(&builder, opStmt->getLoc(), indices,
+ affineApplyOps, results);
// Create new LoadOp with new affine apply op.
auto *newLoadResult =
builder.create<LoadOp>(opStmt->getLoc(), loadOp->getMemRef(), results)
@@ -203,14 +164,14 @@ PassResult ComposeAffineMaps::runOnMLFunction(MLFunction *f) {
}
// Gather sequnce of AffineApplyOps reachable from 'indices'.
SmallVector<OperationStmt *, 4> affineApplyOps;
- getReachableAffineApplyOps(indices, &affineApplyOps);
+ getReachableAffineApplyOps(indices, affineApplyOps);
// Skip transforming 'storeOp' if there are no affine maps to compose.
if (affineApplyOps.size() <= 1)
continue;
SmallVector<SSAValue *, 4> results;
- createComposedAffineApplyOp(&builder, opStmt->getLoc(), memrefType, indices,
- affineApplyOps, &results);
+ createComposedAffineApplyOp(&builder, opStmt->getLoc(), indices,
+ affineApplyOps, results);
// Create new StoreOp with new affine apply op.
builder.create<StoreOp>(opStmt->getLoc(), storeOp->getValueToStore(),
storeOp->getMemRef(), results);
diff --git a/mlir/lib/Transforms/PipelineDataTransfer.cpp b/mlir/lib/Transforms/PipelineDataTransfer.cpp
index dd8b9a7615c..bb60d8e9d78 100644
--- a/mlir/lib/Transforms/PipelineDataTransfer.cpp
+++ b/mlir/lib/Transforms/PipelineDataTransfer.cpp
@@ -21,7 +21,7 @@
#include "mlir/Transforms/Passes.h"
-#include "mlir/IR/AffineExpr.h"
+#include "mlir/Analysis/AffineAnalysis.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/BuiltinOps.h"
#include "mlir/StandardOps/StandardOps.h"
@@ -94,8 +94,7 @@ static bool doubleBuffer(MLValue *oldMemRef, ForStmt *forStmt) {
auto *newMemRefType = doubleShape(cast<MemRefType>(oldMemRef->getType()));
// Create and place the alloc at the top level.
- auto *func = forStmt->getFunction();
- MLFuncBuilder topBuilder(func, func->begin());
+ MLFuncBuilder topBuilder(forStmt->getFunction());
auto *newMemRef = cast<MLValue>(
topBuilder.create<AllocOp>(forStmt->getLoc(), newMemRefType)
->getResult());
@@ -105,13 +104,9 @@ static bool doubleBuffer(MLValue *oldMemRef, ForStmt *forStmt) {
bInner.getAffineMap(/*dimCount=*/1, /*symbolCount=*/0, {d0 % 2}, {});
auto ivModTwoOp =
bInner.create<AffineApplyOp>(forStmt->getLoc(), modTwoMap, forStmt);
- if (!replaceAllMemRefUsesWith(oldMemRef, newMemRef, ivModTwoOp->getResult(0)))
+ if (!replaceAllMemRefUsesWith(oldMemRef, newMemRef,
+ cast<MLValue>(ivModTwoOp->getResult(0))))
return false;
- // We don't need ivMod2Op any more - this is cloned by
- // replaceAllMemRefUsesWith wherever the memref replacement happens. Once
- // b/117159533 is addressed, we'll eventually only need to pass
- // ivModTwoOp->getResult(0) to replaceAllMemRefUsesWith.
- cast<OperationStmt>(ivModTwoOp->getOperation())->eraseFromBlock();
return true;
}
@@ -169,16 +164,18 @@ PassResult PipelineDataTransfer::runOnMLFunction(MLFunction *f) {
for (auto *dmaStartStmt : dmaStartStmts) {
MLValue *oldMemRef = cast<MLValue>(dmaStartStmt->getOperand(
getHigherMemRefPos(dmaStartStmt->getAs<DmaStartOp>())));
- if (!doubleBuffer(oldMemRef, forStmt))
+ if (!doubleBuffer(oldMemRef, forStmt)) {
return PassResult::Failure;
+ }
}
// Double the buffers for tag memref's.
for (auto *dmaFinishStmt : dmaFinishStmts) {
MLValue *oldTagMemRef = cast<MLValue>(
dmaFinishStmt->getOperand(getTagMemRefPos(*dmaFinishStmt)));
- if (!doubleBuffer(oldTagMemRef, forStmt))
+ if (!doubleBuffer(oldTagMemRef, forStmt)) {
return PassResult::Failure;
+ }
}
// Collect all compute ops.
@@ -186,75 +183,43 @@ PassResult PipelineDataTransfer::runOnMLFunction(MLFunction *f) {
computeOps.reserve(forStmt->getStatements().size());
// Store delay for statement for later lookup for AffineApplyOp's.
DenseMap<const Statement *, unsigned> opDelayMap;
- for (const auto &stmt : *forStmt) {
+ for (auto &stmt : *forStmt) {
auto *opStmt = dyn_cast<OperationStmt>(&stmt);
if (!opStmt) {
// All for and if stmt's are treated as pure compute operations.
- // TODO(bondhugula): check whether such statements do not have any DMAs
- // nested within.
opDelayMap[&stmt] = 1;
} else if (opStmt->is<DmaStartOp>()) {
// DMA starts are not shifted.
- opDelayMap[&stmt] = 0;
+ opDelayMap[opStmt] = 0;
+ // Set shifts for DMA start stmt's affine operand computation slices to 0.
+ if (auto *slice = mlir::createAffineComputationSlice(opStmt)) {
+ opDelayMap[slice] = 0;
+ } else {
+ // If a slice wasn't created, the reachable affine_apply op's from its
+ // operands are the ones that go with it.
+ SmallVector<OperationStmt *, 4> affineApplyStmts;
+ SmallVector<MLValue *, 4> operands(opStmt->getOperands());
+ getReachableAffineApplyOps(operands, affineApplyStmts);
+ for (auto *op : affineApplyStmts) {
+ opDelayMap[op] = 0;
+ }
+ }
} else if (opStmt->is<DmaWaitOp>()) {
// DMA finish op shifted by one.
- opDelayMap[&stmt] = 1;
- } else if (!opStmt->is<AffineApplyOp>()) {
- // Compute op shifted by one.
- opDelayMap[&stmt] = 1;
+ opDelayMap[opStmt] = 1;
+ } else {
+ // Everything else is a compute op; so shifted by one (op's supplying
+ // 'affine' operands to DMA start's have already been set right shifts.
+ opDelayMap[opStmt] = 1;
computeOps.push_back(&stmt);
}
- // Shifts for affine apply op's determined later.
- }
-
- // Get the ancestor of a 'stmt' that lies in forStmt's block.
- auto getAncestorInForBlock =
- [&](const Statement *stmt, const StmtBlock &block) -> const Statement * {
- // Traverse up the statement hierarchy starting from the owner of operand to
- // find the ancestor statement that resides in the block of 'forStmt'.
- while (stmt != nullptr && stmt->getBlock() != &block) {
- stmt = stmt->getParentStmt();
- }
- return stmt;
- };
-
- // Determine delays for affine apply op's: look up delay from its consumer op.
- // This code will be thrown away once we have a way to obtain indices through
- // a composed affine_apply op. See TODO(b/117159533). Such a composed
- // affine_apply will be used exclusively by a given memref deferencing op.
- for (const auto &stmt : *forStmt) {
- auto *opStmt = dyn_cast<OperationStmt>(&stmt);
- // Skip statements that aren't affine apply ops.
- if (!opStmt || !opStmt->is<AffineApplyOp>())
- continue;
- // Traverse uses of each result of the affine apply op.
- for (auto *res : opStmt->getResults()) {
- for (auto &use : res->getUses()) {
- auto *ancestorInForBlock =
- getAncestorInForBlock(use.getOwner(), *forStmt);
- assert(ancestorInForBlock &&
- "traversing parent should reach forStmt block");
- auto *opCheck = dyn_cast<OperationStmt>(ancestorInForBlock);
- if (!opCheck || opCheck->is<AffineApplyOp>())
- continue;
- assert(opDelayMap.find(ancestorInForBlock) != opDelayMap.end());
- if (opDelayMap.find(&stmt) != opDelayMap.end()) {
- // This is where we enforce all uses of this affine_apply to have
- // the same shifts - so that we know what shift to use for the
- // affine_apply to preserve semantics.
- assert(opDelayMap[&stmt] == opDelayMap[ancestorInForBlock]);
- } else {
- // Obtain delay from its consumer.
- opDelayMap[&stmt] = opDelayMap[ancestorInForBlock];
- }
- }
- }
}
// Get delays stored in map.
std::vector<uint64_t> delays(forStmt->getStatements().size());
unsigned s = 0;
for (const auto &stmt : *forStmt) {
+ assert(opDelayMap.find(&stmt) != opDelayMap.end());
delays[s++] = opDelayMap[&stmt];
}
@@ -263,8 +228,9 @@ PassResult PipelineDataTransfer::runOnMLFunction(MLFunction *f) {
return PassResult::Failure;
}
- if (stmtBodySkew(forStmt, delays))
+ if (stmtBodySkew(forStmt, delays)) {
return PassResult::Failure;
+ }
return PassResult::Success;
}
diff --git a/mlir/lib/Transforms/Utils.cpp b/mlir/lib/Transforms/Utils.cpp
index 2e8f0d32736..62ef3ba225a 100644
--- a/mlir/lib/Transforms/Utils.cpp
+++ b/mlir/lib/Transforms/Utils.cpp
@@ -22,7 +22,8 @@
#include "mlir/Transforms/Utils.h"
-#include "mlir/IR/AffineMap.h"
+#include "mlir/Analysis/AffineAnalysis.h"
+#include "mlir/Analysis/AffineStructures.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/BuiltinOps.h"
#include "mlir/StandardOps/StandardOps.h"
@@ -48,7 +49,7 @@ static bool isMemRefDereferencingOp(const Operation &op) {
// TODO(mlir-team): extend this for SSAValue / CFGFunctions. Can also be easily
// extended to add additional indices at any position.
bool mlir::replaceAllMemRefUsesWith(MLValue *oldMemRef, MLValue *newMemRef,
- ArrayRef<SSAValue *> extraIndices,
+ ArrayRef<MLValue *> extraIndices,
AffineMap indexRemap) {
unsigned newMemRefRank = cast<MemRefType>(newMemRef->getType())->getRank();
(void)newMemRefRank; // unused in opt mode
@@ -101,24 +102,16 @@ bool mlir::replaceAllMemRefUsesWith(MLValue *oldMemRef, MLValue *newMemRef,
operands.push_back(newMemRef);
MLFuncBuilder builder(opStmt);
- // Normally, we could just use extraIndices as operands, but we will
- // clone it so that each op gets its own "private" index. See b/117159533.
for (auto *extraIndex : extraIndices) {
- OperationStmt::OperandMapTy operandMap;
// TODO(mlir-team): An operation/SSA value should provide a method to
// return the position of an SSA result in its defining
// operation.
assert(extraIndex->getDefiningStmt()->getNumResults() == 1 &&
"single result op's expected to generate these indices");
- // TODO: actually check if this is a result of an affine_apply op.
assert((cast<MLValue>(extraIndex)->isValidDim() ||
cast<MLValue>(extraIndex)->isValidSymbol()) &&
"invalid memory op index");
- auto *clonedExtraIndex =
- cast<OperationStmt>(
- builder.clone(*extraIndex->getDefiningStmt(), operandMap))
- ->getResult(0);
- operands.push_back(cast<MLValue>(clonedExtraIndex));
+ operands.push_back(cast<MLValue>(extraIndex));
}
// Construct new indices. The indices of a memref come right after it, i.e.,
@@ -163,3 +156,135 @@ bool mlir::replaceAllMemRefUsesWith(MLValue *oldMemRef, MLValue *newMemRef,
}
return true;
}
+
+// Creates and inserts into 'builder' a new AffineApplyOp, with the number of
+// its results equal to the number of 'operands, as a composition
+// of all other AffineApplyOps reachable from input parameter 'operands'. If the
+// operands were drawing results from multiple affine apply ops, this also leads
+// to a collapse into a single affine apply op. The final results of the
+// composed AffineApplyOp are returned in output parameter 'results'.
+OperationStmt *
+mlir::createComposedAffineApplyOp(MLFuncBuilder *builder, Location *loc,
+ ArrayRef<MLValue *> operands,
+ ArrayRef<OperationStmt *> affineApplyOps,
+ SmallVectorImpl<SSAValue *> &results) {
+ // Create identity map with same number of dimensions as number of operands.
+ auto map = builder->getMultiDimIdentityMap(operands.size());
+ // Initialize AffineValueMap with identity map.
+ AffineValueMap valueMap(map, operands);
+
+ for (auto *opStmt : affineApplyOps) {
+ assert(opStmt->is<AffineApplyOp>());
+ auto affineApplyOp = opStmt->getAs<AffineApplyOp>();
+ // Forward substitute 'affineApplyOp' into 'valueMap'.
+ valueMap.forwardSubstitute(*affineApplyOp);
+ }
+ // Compose affine maps from all ancestor AffineApplyOps.
+ // Create new AffineApplyOp from 'valueMap'.
+ unsigned numOperands = valueMap.getNumOperands();
+ SmallVector<SSAValue *, 4> outOperands(numOperands);
+ for (unsigned i = 0; i < numOperands; ++i) {
+ outOperands[i] = valueMap.getOperand(i);
+ }
+ // Create new AffineApplyOp based on 'valueMap'.
+ auto affineApplyOp =
+ builder->create<AffineApplyOp>(loc, valueMap.getAffineMap(), outOperands);
+ results.resize(operands.size());
+ for (unsigned i = 0, e = operands.size(); i < e; ++i) {
+ results[i] = affineApplyOp->getResult(i);
+ }
+ return cast<OperationStmt>(affineApplyOp->getOperation());
+}
+
+/// Given an operation statement, inserts a new single affine apply operation,
+/// that is exclusively used by this operation statement, and that provides all
+/// operands that are results of an affine_apply as a function of loop iterators
+/// and program parameters and whose results are.
+///
+/// Before
+///
+/// for %i = 0 to #map(%N)
+/// %idx = affine_apply (d0) -> (d0 mod 2) (%i)
+/// "send"(%idx, %A, ...)
+/// "compute"(%idx)
+///
+/// After
+///
+/// for %i = 0 to #map(%N)
+/// %idx = affine_apply (d0) -> (d0 mod 2) (%i)
+/// "send"(%idx, %A, ...)
+/// %idx_ = affine_apply (d0) -> (d0 mod 2) (%i)
+/// "compute"(%idx_)
+///
+/// This allows applying different transformations on send and compute (for eg.
+/// different shifts/delays).
+///
+/// Returns nullptr if none of the operands were the result of an affine_apply
+/// and thus there was no affine computation slice to create. Returns the newly
+/// affine_apply operation statement otherwise.
+///
+///
+OperationStmt *mlir::createAffineComputationSlice(OperationStmt *opStmt) {
+ // Collect all operands that are results of affine apply ops.
+ SmallVector<MLValue *, 4> subOperands;
+ subOperands.reserve(opStmt->getNumOperands());
+ for (auto *operand : opStmt->getOperands()) {
+ auto *defStmt = operand->getDefiningStmt();
+ if (defStmt && defStmt->is<AffineApplyOp>()) {
+ subOperands.push_back(operand);
+ }
+ }
+
+ // Gather sequence of AffineApplyOps reachable from 'subOperands'.
+ SmallVector<OperationStmt *, 4> affineApplyOps;
+ getReachableAffineApplyOps(subOperands, affineApplyOps);
+ // Skip transforming if there are no affine maps to compose.
+ if (affineApplyOps.empty())
+ return nullptr;
+
+ // Check if all uses of the affine apply op's lie in this op stmt
+ // itself, in which case there would be nothing to do.
+ bool localized = true;
+ for (auto *op : affineApplyOps) {
+ for (auto *result : op->getResults()) {
+ for (auto &use : result->getUses()) {
+ if (use.getOwner() != opStmt) {
+ localized = false;
+ break;
+ }
+ }
+ }
+ }
+ if (localized)
+ return nullptr;
+
+ MLFuncBuilder builder(opStmt);
+ SmallVector<SSAValue *, 4> results;
+ auto *affineApplyStmt = createComposedAffineApplyOp(
+ &builder, opStmt->getLoc(), subOperands, affineApplyOps, results);
+ assert(results.size() == subOperands.size() &&
+ "number of results should be the same as the number of subOperands");
+
+ // Construct the new operands that include the results from the composed
+ // affine apply op above instead of existing ones (subOperands). So, they
+ // differ from opStmt's operands only for those operands in 'subOperands', for
+ // which they will be replaced by the corresponding one from 'results'.
+ SmallVector<MLValue *, 4> newOperands(opStmt->getOperands());
+ for (unsigned i = 0, e = newOperands.size(); i < e; i++) {
+ // Replace the subOperands from among the new operands.
+ unsigned j, f;
+ for (j = 0, f = subOperands.size(); j < f; j++) {
+ if (newOperands[i] == subOperands[j])
+ break;
+ }
+ if (j < subOperands.size()) {
+ newOperands[i] = cast<MLValue>(results[j]);
+ }
+ }
+
+ for (unsigned idx = 0; idx < newOperands.size(); idx++) {
+ opStmt->setOperand(idx, newOperands[idx]);
+ }
+
+ return affineApplyStmt;
+}
OpenPOWER on IntegriCloud