summaryrefslogtreecommitdiffstats
path: root/mlir/lib/Transforms/Vectorize.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'mlir/lib/Transforms/Vectorize.cpp')
-rw-r--r--mlir/lib/Transforms/Vectorize.cpp63
1 files changed, 35 insertions, 28 deletions
diff --git a/mlir/lib/Transforms/Vectorize.cpp b/mlir/lib/Transforms/Vectorize.cpp
index ac551d7c20c..7f26161e520 100644
--- a/mlir/lib/Transforms/Vectorize.cpp
+++ b/mlir/lib/Transforms/Vectorize.cpp
@@ -20,6 +20,7 @@
//
//===----------------------------------------------------------------------===//
+#include "mlir/AffineOps/AffineOps.h"
#include "mlir/Analysis/LoopAnalysis.h"
#include "mlir/Analysis/NestedMatcher.h"
#include "mlir/Analysis/VectorAnalysis.h"
@@ -252,9 +253,9 @@ using namespace mlir;
/// ==========
/// The algorithm proceeds in a few steps:
/// 1. defining super-vectorization patterns and matching them on the tree of
-/// ForInst. A super-vectorization pattern is defined as a recursive data
-/// structures that matches and captures nested, imperfectly-nested loops
-/// that have a. comformable loop annotations attached (e.g. parallel,
+/// AffineForOp. A super-vectorization pattern is defined as a recursive
+/// data structures that matches and captures nested, imperfectly-nested
+/// loops that have a. comformable loop annotations attached (e.g. parallel,
/// reduction, vectoriable, ...) as well as b. all contiguous load/store
/// operations along a specified minor dimension (not necessarily the
/// fastest varying) ;
@@ -279,11 +280,11 @@ using namespace mlir;
/// it by its vector form. Otherwise, if the scalar value is a constant,
/// it is vectorized into a splat. In all other cases, vectorization for
/// the pattern currently fails.
-/// e. if everything under the root ForInst in the current pattern vectorizes
-/// properly, we commit that loop to the IR. Otherwise we discard it and
-/// restore a previously cloned version of the loop. Thanks to the
-/// recursive scoping nature of matchers and captured patterns, this is
-/// transparently achieved by a simple RAII implementation.
+/// e. if everything under the root AffineForOp in the current pattern
+/// vectorizes properly, we commit that loop to the IR. Otherwise we
+/// discard it and restore a previously cloned version of the loop. Thanks
+/// to the recursive scoping nature of matchers and captured patterns,
+/// this is transparently achieved by a simple RAII implementation.
/// f. vectorization is applied on the next pattern in the list. Because
/// pattern interference avoidance is not yet implemented and that we do
/// not support further vectorizing an already vector load we need to
@@ -667,12 +668,13 @@ namespace {
struct VectorizationStrategy {
SmallVector<int64_t, 8> vectorSizes;
- DenseMap<ForInst *, unsigned> loopToVectorDim;
+ DenseMap<Instruction *, unsigned> loopToVectorDim;
};
} // end anonymous namespace
-static void vectorizeLoopIfProfitable(ForInst *loop, unsigned depthInPattern,
+static void vectorizeLoopIfProfitable(Instruction *loop,
+ unsigned depthInPattern,
unsigned patternDepth,
VectorizationStrategy *strategy) {
assert(patternDepth > depthInPattern &&
@@ -704,13 +706,13 @@ static bool analyzeProfitability(ArrayRef<NestedMatch> matches,
unsigned depthInPattern, unsigned patternDepth,
VectorizationStrategy *strategy) {
for (auto m : matches) {
- auto *loop = cast<ForInst>(m.getMatchedInstruction());
bool fail = analyzeProfitability(m.getMatchedChildren(), depthInPattern + 1,
patternDepth, strategy);
if (fail) {
return fail;
}
- vectorizeLoopIfProfitable(loop, depthInPattern, patternDepth, strategy);
+ vectorizeLoopIfProfitable(m.getMatchedInstruction(), depthInPattern,
+ patternDepth, strategy);
}
return false;
}
@@ -855,8 +857,8 @@ static bool vectorizeRootOrTerminal(Value *iv, LoadOrStoreOpPointer memoryOp,
/// Coarsens the loops bounds and transforms all remaining load and store
/// operations into the appropriate vector_transfer.
-static bool vectorizeForInst(ForInst *loop, int64_t step,
- VectorizationState *state) {
+static bool vectorizeAffineForOp(AffineForOp *loop, int64_t step,
+ VectorizationState *state) {
using namespace functional;
loop->setStep(step);
@@ -873,7 +875,7 @@ static bool vectorizeForInst(ForInst *loop, int64_t step,
};
auto loadAndStores = matcher::Op(notVectorizedThisPattern);
SmallVector<NestedMatch, 8> loadAndStoresMatches;
- loadAndStores.match(loop, &loadAndStoresMatches);
+ loadAndStores.match(loop->getInstruction(), &loadAndStoresMatches);
for (auto ls : loadAndStoresMatches) {
auto *opInst = cast<OperationInst>(ls.getMatchedInstruction());
auto load = opInst->dyn_cast<LoadOp>();
@@ -898,7 +900,7 @@ static bool vectorizeForInst(ForInst *loop, int64_t step,
static FilterFunctionType
isVectorizableLoopPtrFactory(unsigned fastestVaryingMemRefDimension) {
return [fastestVaryingMemRefDimension](const Instruction &forInst) {
- const auto &loop = cast<ForInst>(forInst);
+ auto loop = cast<OperationInst>(forInst).cast<AffineForOp>();
return isVectorizableLoopAlongFastestVaryingMemRefDim(
loop, fastestVaryingMemRefDimension);
};
@@ -912,7 +914,8 @@ static bool vectorizeNonRoot(ArrayRef<NestedMatch> matches,
/// if all vectorizations in `childrenMatches` have already succeeded
/// recursively in DFS post-order.
static bool doVectorize(NestedMatch oneMatch, VectorizationState *state) {
- ForInst *loop = cast<ForInst>(oneMatch.getMatchedInstruction());
+ auto *loopInst = oneMatch.getMatchedInstruction();
+ auto loop = cast<OperationInst>(loopInst)->cast<AffineForOp>();
auto childrenMatches = oneMatch.getMatchedChildren();
// 1. DFS postorder recursion, if any of my children fails, I fail too.
@@ -924,7 +927,7 @@ static bool doVectorize(NestedMatch oneMatch, VectorizationState *state) {
// 2. This loop may have been omitted from vectorization for various reasons
// (e.g. due to the performance model or pattern depth > vector size).
- auto it = state->strategy->loopToVectorDim.find(loop);
+ auto it = state->strategy->loopToVectorDim.find(loopInst);
if (it == state->strategy->loopToVectorDim.end()) {
return false;
}
@@ -939,10 +942,10 @@ static bool doVectorize(NestedMatch oneMatch, VectorizationState *state) {
// exploratory tradeoffs (see top of the file). Apply coarsening, i.e.:
// | ub -> ub
// | step -> step * vectorSize
- LLVM_DEBUG(dbgs() << "\n[early-vect] vectorizeForInst by " << vectorSize
+ LLVM_DEBUG(dbgs() << "\n[early-vect] vectorizeForOp by " << vectorSize
<< " : ");
- LLVM_DEBUG(loop->print(dbgs()));
- return vectorizeForInst(loop, loop->getStep() * vectorSize, state);
+ LLVM_DEBUG(loopInst->print(dbgs()));
+ return vectorizeAffineForOp(loop, loop->getStep() * vectorSize, state);
}
/// Non-root pattern iterates over the matches at this level, calls doVectorize
@@ -1186,7 +1189,8 @@ static bool vectorizeOperations(VectorizationState *state) {
/// Each root may succeed independently but will otherwise clean after itself if
/// anything below it fails.
static bool vectorizeRootMatch(NestedMatch m, VectorizationStrategy *strategy) {
- auto *loop = cast<ForInst>(m.getMatchedInstruction());
+ auto loop =
+ cast<OperationInst>(m.getMatchedInstruction())->cast<AffineForOp>();
VectorizationState state;
state.strategy = strategy;
@@ -1197,17 +1201,20 @@ static bool vectorizeRootMatch(NestedMatch m, VectorizationStrategy *strategy) {
// vectorizable. If a pattern is not vectorizable anymore, we just skip it.
// TODO(ntv): implement a non-greedy profitability analysis that keeps only
// non-intersecting patterns.
- if (!isVectorizableLoop(*loop)) {
+ if (!isVectorizableLoop(loop)) {
LLVM_DEBUG(dbgs() << "\n[early-vect]+++++ loop is not vectorizable");
return true;
}
- FuncBuilder builder(loop); // builder to insert in place of loop
- ForInst *clonedLoop = cast<ForInst>(builder.clone(*loop));
+ auto *loopInst = loop->getInstruction();
+ FuncBuilder builder(loopInst);
+ auto clonedLoop =
+ cast<OperationInst>(builder.clone(*loopInst))->cast<AffineForOp>();
+
auto fail = doVectorize(m, &state);
/// Sets up error handling for this root loop. This is how the root match
/// maintains a clone for handling failure and restores the proper state via
/// RAII.
- ScopeGuard sg2([&fail, loop, clonedLoop]() {
+ ScopeGuard sg2([&fail, &loop, &clonedLoop]() {
if (fail) {
loop->getInductionVar()->replaceAllUsesWith(
clonedLoop->getInductionVar());
@@ -1291,8 +1298,8 @@ PassResult Vectorize::runOnFunction(Function *f) {
if (fail) {
continue;
}
- auto *loop = cast<ForInst>(m.getMatchedInstruction());
- vectorizeLoopIfProfitable(loop, 0, patternDepth, &strategy);
+ vectorizeLoopIfProfitable(m.getMatchedInstruction(), 0, patternDepth,
+ &strategy);
// TODO(ntv): if pattern does not apply, report it; alter the
// cost/benefit.
fail = vectorizeRootMatch(m, &strategy);
OpenPOWER on IntegriCloud