summaryrefslogtreecommitdiffstats
path: root/mlir/lib
diff options
context:
space:
mode:
Diffstat (limited to 'mlir/lib')
-rw-r--r--mlir/lib/AffineOps/AffineOps.cpp2
-rw-r--r--mlir/lib/Analysis/AffineAnalysis.cpp8
-rw-r--r--mlir/lib/Analysis/Utils.cpp12
-rw-r--r--mlir/lib/IR/Block.cpp2
-rw-r--r--mlir/lib/Transforms/DmaGeneration.cpp19
-rw-r--r--mlir/lib/Transforms/LoopFusion.cpp3
-rw-r--r--mlir/lib/Transforms/LoopTiling.cpp3
-rw-r--r--mlir/lib/Transforms/LoopUnroll.cpp2
-rw-r--r--mlir/lib/Transforms/LoopUnrollAndJam.cpp2
-rw-r--r--mlir/lib/Transforms/LowerAffine.cpp8
-rw-r--r--mlir/lib/Transforms/LowerVectorTransfers.cpp10
-rw-r--r--mlir/lib/Transforms/MaterializeVectors.cpp24
-rw-r--r--mlir/lib/Transforms/MemRefDataFlowOpt.cpp8
-rw-r--r--mlir/lib/Transforms/PipelineDataTransfer.cpp18
-rw-r--r--mlir/lib/Transforms/Utils/LoopUtils.cpp14
-rw-r--r--mlir/lib/Transforms/Utils/Utils.cpp4
-rw-r--r--mlir/lib/Transforms/Vectorize.cpp50
17 files changed, 97 insertions, 92 deletions
diff --git a/mlir/lib/AffineOps/AffineOps.cpp b/mlir/lib/AffineOps/AffineOps.cpp
index 249b09f41cd..be5a2f14628 100644
--- a/mlir/lib/AffineOps/AffineOps.cpp
+++ b/mlir/lib/AffineOps/AffineOps.cpp
@@ -716,7 +716,7 @@ static void printBound(AffineBound bound, const char *prefix, OpAsmPrinter *p) {
}
void AffineForOp::print(OpAsmPrinter *p) const {
- *p << "for ";
+ *p << "affine.for ";
p->printOperand(getBody()->getArgument(0));
*p << " = ";
printBound(getLowerBound(), "max", p);
diff --git a/mlir/lib/Analysis/AffineAnalysis.cpp b/mlir/lib/Analysis/AffineAnalysis.cpp
index 9d2ea691bdd..3a086ba512d 100644
--- a/mlir/lib/Analysis/AffineAnalysis.cpp
+++ b/mlir/lib/Analysis/AffineAnalysis.cpp
@@ -756,8 +756,8 @@ void MemRefAccess::getAccessMap(AffineValueMap *accessMap) const {
// For example, given the following MLIR code with with "source" and
// "destination" accesses to the same memref labled, and symbols %M, %N, %K:
//
-// for %i0 = 0 to 100 {
-// for %i1 = 0 to 50 {
+// affine.for %i0 = 0 to 100 {
+// affine.for %i1 = 0 to 50 {
// %a0 = affine.apply
// (d0, d1) -> (d0 * 2 - d1 * 4 + s1, d1 * 3 - s0) (%i0, %i1)[%M, %N]
// // Source memref access.
@@ -765,8 +765,8 @@ void MemRefAccess::getAccessMap(AffineValueMap *accessMap) const {
// }
// }
//
-// for %i2 = 0 to 100 {
-// for %i3 = 0 to 50 {
+// affine.for %i2 = 0 to 100 {
+// affine.for %i3 = 0 to 50 {
// %a1 = affine.apply
// (d0, d1) -> (d0 * 7 + d1 * 9 - s1, d1 * 11 + s0) (%i2, %i3)[%K, %M]
// // Destination memref access.
diff --git a/mlir/lib/Analysis/Utils.cpp b/mlir/lib/Analysis/Utils.cpp
index ae48e644a68..a48f39c2aac 100644
--- a/mlir/lib/Analysis/Utils.cpp
+++ b/mlir/lib/Analysis/Utils.cpp
@@ -36,13 +36,13 @@
using namespace mlir;
/// Populates 'loops' with IVs of the loops surrounding 'inst' ordered from
-/// the outermost 'for' instruction to the innermost one.
+/// the outermost 'affine.for' instruction to the innermost one.
void mlir::getLoopIVs(const Instruction &inst,
SmallVectorImpl<OpPointer<AffineForOp>> *loops) {
auto *currInst = inst.getParentInst();
OpPointer<AffineForOp> currAffineForOp;
- // Traverse up the hierarchy collecing all 'for' instruction while skipping
- // over 'if' instructions.
+ // Traverse up the hierarchy collecing all 'affine.for' instruction while
+ // skipping over 'if' instructions.
while (currInst && ((currAffineForOp = currInst->dyn_cast<AffineForOp>()) ||
currInst->isa<AffineIfOp>())) {
if (currAffineForOp)
@@ -111,8 +111,8 @@ bool MemRefRegion::unionBoundingBox(const MemRefRegion &other) {
// For example, the memref region for this load operation at loopDepth = 1 will
// be as below:
//
-// for %i = 0 to 32 {
-// for %ii = %i to (d0) -> (d0 + 8) (%i) {
+// affine.for %i = 0 to 32 {
+// affine.for %ii = %i to (d0) -> (d0 + 8) (%i) {
// load %A[%ii]
// }
// }
@@ -614,7 +614,7 @@ Optional<int64_t> mlir::getMemoryFootprintBytes(const Block &block,
int memorySpace) {
std::vector<std::unique_ptr<MemRefRegion>> regions;
- // Walk this 'for' instruction to gather all memory regions.
+ // Walk this 'affine.for' instruction to gather all memory regions.
bool error = false;
const_cast<Block *>(&block)->walk([&](Instruction *opInst) {
if (!opInst->isa<LoadOp>() && !opInst->isa<StoreOp>()) {
diff --git a/mlir/lib/IR/Block.cpp b/mlir/lib/IR/Block.cpp
index e0c76e9efad..83e15097942 100644
--- a/mlir/lib/IR/Block.cpp
+++ b/mlir/lib/IR/Block.cpp
@@ -189,7 +189,7 @@ unsigned Block::getNumSuccessors() const {
return terminator->getNumSuccessors();
}
assert(getParent() && "top-level block with no terminator");
- // Blocks inside 'for'/'if' instructions don't have successors.
+ // Blocks inside 'affine.for'/'if' instructions don't have successors.
return 0;
}
diff --git a/mlir/lib/Transforms/DmaGeneration.cpp b/mlir/lib/Transforms/DmaGeneration.cpp
index 855ff37f60f..631ebf939ea 100644
--- a/mlir/lib/Transforms/DmaGeneration.cpp
+++ b/mlir/lib/Transforms/DmaGeneration.cpp
@@ -338,7 +338,8 @@ bool DmaGeneration::generateDma(const MemRefRegion &region, Block *block,
auto fastMemRefType = top.getMemRefType(
fastBufferShape, memRefType.getElementType(), {}, fastMemorySpace);
- // Create the fast memory space buffer just before the 'for' instruction.
+ // Create the fast memory space buffer just before the 'affine.for'
+ // instruction.
fastMemRef = prologue.create<AllocOp>(loc, fastMemRefType)->getResult();
// Record it.
fastBufferMap[memref] = fastMemRef;
@@ -456,7 +457,7 @@ bool DmaGeneration::runOnBlock(Block *block, uint64_t consumedCapacityBytes) {
// approach is conservative in some cases at the moment, we do a check later
// and report an error with location info.
// TODO(bondhugula): An 'if' instruction is being treated similar to an
- // operation instruction. 'if''s could have 'for's in them; treat them
+ // operation instruction. 'if''s could have 'affine.for's in them; treat them
// separately.
// Get to the first load, store, or for op.
@@ -470,9 +471,9 @@ bool DmaGeneration::runOnBlock(Block *block, uint64_t consumedCapacityBytes) {
if (auto forOp = it->dyn_cast<AffineForOp>()) {
// We'll assume for now that loops with steps are tiled loops, and so DMAs
// are not performed for that depth, but only further inside.
- // If the memory footprint of the 'for' loop is higher than fast memory
- // capacity (when provided), we recurse to DMA at an inner level until
- // we find a depth at which footprint fits in the capacity. If the
+ // If the memory footprint of the 'affine.for' loop is higher than fast
+ // memory capacity (when provided), we recurse to DMA at an inner level
+ // until we find a depth at which footprint fits in the capacity. If the
// footprint can't be calcuated, we assume for now it fits.
// Returns true if the footprint is known to exceed capacity.
@@ -489,13 +490,13 @@ bool DmaGeneration::runOnBlock(Block *block, uint64_t consumedCapacityBytes) {
consumedCapacityBytes += runOnBlock(/*begin=*/curBegin, /*end=*/it);
// Recurse onto the body of this loop.
runOnBlock(forOp->getBody(), consumedCapacityBytes);
- // The next region starts right after the 'for' instruction.
+ // The next region starts right after the 'affine.for' instruction.
curBegin = std::next(it);
} else {
// We have enough capacity, i.e., DMAs will be computed for the portion
- // of the block until 'it', and for the 'for' loop. For the latter, they
- // are placed just before this loop (for incoming DMAs) and right after
- // (for outgoing ones).
+ // of the block until 'it', and for the 'affine.for' loop. For the
+ // latter, they are placed just before this loop (for incoming DMAs) and
+ // right after (for outgoing ones).
consumedCapacityBytes += runOnBlock(/*begin=*/curBegin, /*end=*/it);
// Inner loop DMAs have their own scope - we don't thus update consumed
diff --git a/mlir/lib/Transforms/LoopFusion.cpp b/mlir/lib/Transforms/LoopFusion.cpp
index 8d5f51059bf..9e96b0800b3 100644
--- a/mlir/lib/Transforms/LoopFusion.cpp
+++ b/mlir/lib/Transforms/LoopFusion.cpp
@@ -510,7 +510,8 @@ bool MemRefDependenceGraph::init(Function *f) {
// all loads and store accesses it contains.
LoopNestStateCollector collector;
collector.collect(&inst);
- // Return false if a non 'for' region was found (not currently supported).
+ // Return false if a non 'affine.for' region was found (not currently
+ // supported).
if (collector.hasNonForRegion)
return false;
Node node(nextNodeId++, &inst);
diff --git a/mlir/lib/Transforms/LoopTiling.cpp b/mlir/lib/Transforms/LoopTiling.cpp
index 368a1dac1df..f00c2e767e6 100644
--- a/mlir/lib/Transforms/LoopTiling.cpp
+++ b/mlir/lib/Transforms/LoopTiling.cpp
@@ -231,7 +231,8 @@ UtilResult mlir::tileCodeGen(MutableArrayRef<OpPointer<AffineForOp>> band,
static void
getTileableBands(Function *f,
std::vector<SmallVector<OpPointer<AffineForOp>, 6>> *bands) {
- // Get maximal perfect nest of 'for' insts starting from root (inclusive).
+ // Get maximal perfect nest of 'affine.for' insts starting from root
+ // (inclusive).
auto getMaximalPerfectLoopNest = [&](OpPointer<AffineForOp> root) {
SmallVector<OpPointer<AffineForOp>, 6> band;
OpPointer<AffineForOp> currInst = root;
diff --git a/mlir/lib/Transforms/LoopUnroll.cpp b/mlir/lib/Transforms/LoopUnroll.cpp
index 3a7cfb85e08..025a86891df 100644
--- a/mlir/lib/Transforms/LoopUnroll.cpp
+++ b/mlir/lib/Transforms/LoopUnroll.cpp
@@ -164,7 +164,7 @@ PassResult LoopUnroll::runOnFunction(Function *f) {
return success();
}
-/// Unrolls a 'for' inst. Returns true if the loop was unrolled, false
+/// Unrolls a 'affine.for' inst. Returns true if the loop was unrolled, false
/// otherwise. The default unroll factor is 4.
bool LoopUnroll::runOnAffineForOp(OpPointer<AffineForOp> forOp) {
// Use the function callback if one was provided.
diff --git a/mlir/lib/Transforms/LoopUnrollAndJam.cpp b/mlir/lib/Transforms/LoopUnrollAndJam.cpp
index b2aed7d9d7f..2f0249824dd 100644
--- a/mlir/lib/Transforms/LoopUnrollAndJam.cpp
+++ b/mlir/lib/Transforms/LoopUnrollAndJam.cpp
@@ -105,7 +105,7 @@ PassResult LoopUnrollAndJam::runOnFunction(Function *f) {
return success();
}
-/// Unroll and jam a 'for' inst. Default unroll jam factor is
+/// Unroll and jam a 'affine.for' inst. Default unroll jam factor is
/// kDefaultUnrollJamFactor. Return false if nothing was done.
bool LoopUnrollAndJam::runOnAffineForOp(OpPointer<AffineForOp> forOp) {
// Unroll and jam by the factor that was passed if any.
diff --git a/mlir/lib/Transforms/LowerAffine.cpp b/mlir/lib/Transforms/LowerAffine.cpp
index 0d8eb8a4761..ef45891c26f 100644
--- a/mlir/lib/Transforms/LowerAffine.cpp
+++ b/mlir/lib/Transforms/LowerAffine.cpp
@@ -283,7 +283,8 @@ static Value *buildMinMaxReductionSeq(Location loc, CmpIPredicate predicate,
return value;
}
-// Convert a "for" loop to a flow of blocks. Return `false` on success.
+// Convert a "affine.for" loop to a flow of blocks. Return `false` on
+// success.
//
// Create an SESE region for the loop (including its body) and append it to the
// end of the current region. The loop region consists of the initialization
@@ -330,8 +331,9 @@ bool LowerAffinePass::lowerAffineFor(OpPointer<AffineForOp> forOp) {
auto loc = forOp->getLoc();
auto *forInst = forOp->getInstruction();
- // Start by splitting the block containing the 'for' into two parts. The part
- // before will get the init code, the part after will be the end point.
+ // Start by splitting the block containing the 'affine.for' into two parts.
+ // The part before will get the init code, the part after will be the end
+ // point.
auto *initBlock = forInst->getBlock();
auto *endBlock = initBlock->splitBlock(forInst);
diff --git a/mlir/lib/Transforms/LowerVectorTransfers.cpp b/mlir/lib/Transforms/LowerVectorTransfers.cpp
index 63fb45db9c5..e63d3c8111c 100644
--- a/mlir/lib/Transforms/LowerVectorTransfers.cpp
+++ b/mlir/lib/Transforms/LowerVectorTransfers.cpp
@@ -126,9 +126,9 @@ private:
/// // Read the slice `%A[%i0, %i1:%i1+256, %i2:%i2+32]` into
/// // vector<32x256xf32> and pad with %f0 to handle the boundary case:
/// %f0 = constant 0.0f : f32
-/// for %i0 = 0 to %0 {
-/// for %i1 = 0 to %1 step 256 {
-/// for %i2 = 0 to %2 step 32 {
+/// affine.for %i0 = 0 to %0 {
+/// affine.for %i1 = 0 to %1 step 256 {
+/// affine.for %i2 = 0 to %2 step 32 {
/// %v = vector_transfer_read %A, %i0, %i1, %i2, %f0
/// {permutation_map: (d0, d1, d2) -> (d2, d1)} :
/// (memref<?x?x?xf32>, index, index, f32) -> vector<32x256xf32>
@@ -139,8 +139,8 @@ private:
/// MLIR resembling:
///
/// ```mlir
-/// for %d1 = 0 to 256 {
-/// for %d2 = 0 to 32 {
+/// affine.for %d1 = 0 to 256 {
+/// affine.for %d2 = 0 to 32 {
/// %s = %A[%i0, %i1 + %d1, %i2 + %d2] : f32
/// %tmp[%d2, %d1] = %s
/// }
diff --git a/mlir/lib/Transforms/MaterializeVectors.cpp b/mlir/lib/Transforms/MaterializeVectors.cpp
index be5a03bc416..4434ab5322e 100644
--- a/mlir/lib/Transforms/MaterializeVectors.cpp
+++ b/mlir/lib/Transforms/MaterializeVectors.cpp
@@ -101,10 +101,10 @@
/// mlfunc @materialize(%M : index, %N : index, %O : index, %P : index) {
/// %A = alloc (%M, %N, %O, %P) : memref<?x?x?x?xf32, 0>
/// %f1 = constant splat<vector<4x4x4xf32>, 1.000000e+00> :
-/// vector<4x4x4xf32> for %i0 = 0 to %M step 4 {
-/// for %i1 = 0 to %N step 4 {
-/// for %i2 = 0 to %O {
-/// for %i3 = 0 to %P step 4 {
+/// vector<4x4x4xf32> affine.for %i0 = 0 to %M step 4 {
+/// affine.for %i1 = 0 to %N step 4 {
+/// affine.for %i2 = 0 to %O {
+/// affine.for %i3 = 0 to %P step 4 {
/// vector_transfer_write %f1, %A, %i0, %i1, %i2, %i3
/// {permutation_map: (d0, d1, d2, d3) -> (d3, d1, d0)} :
/// vector<4x4x4xf32>, memref<?x?x?x?xf32, 0>,
@@ -120,10 +120,10 @@
/// mlfunc @materialize(%M : index, %N : index, %O : index, %P : index) {
/// %A = alloc (%M, %N, %O, %P) : memref<?x?x?x?xf32, 0>
/// %f1 = constant splat<vector<4x4xf32>, 1.000000e+00> : vector<4x4x4xf32>
-/// for %i0 = 0 to %arg0 step 4 {
-/// for %i1 = 0 to %arg1 step 4 {
-/// for %i2 = 0 to %arg2 {
-/// for %i3 = 0 to %arg3 step 4 {
+/// affine.for %i0 = 0 to %arg0 step 4 {
+/// affine.for %i1 = 0 to %arg1 step 4 {
+/// affine.for %i2 = 0 to %arg2 {
+/// affine.for %i3 = 0 to %arg3 step 4 {
/// %1 = affine.apply (d0, d1, d2, d3) -> (d0, d1, d2, d3)
/// (%i0, %i1, %i2, %i3)
/// vector_transfer_write f1, %0, %1#0, %1#1, %1#2, %1#3
@@ -293,10 +293,10 @@ static Value *substitute(Value *v, VectorType hwVectorType,
/// super-vectorization has been applied:
///
/// ```mlir
-/// for %i0 = 0 to %M {
-/// for %i1 = 0 to %N step 3 {
-/// for %i2 = 0 to %O {
-/// for %i3 = 0 to %P step 32 {
+/// affine.for %i0 = 0 to %M {
+/// affine.for %i1 = 0 to %N step 3 {
+/// affine.for %i2 = 0 to %O {
+/// affine.for %i3 = 0 to %P step 32 {
/// %r = vector_transfer_read(%A, map(%i..)#0, map(%i..)#1, map(%i..)#2)
/// -> vector<3x32xf32>
/// ...
diff --git a/mlir/lib/Transforms/MemRefDataFlowOpt.cpp b/mlir/lib/Transforms/MemRefDataFlowOpt.cpp
index d9f940a01f3..3141d748750 100644
--- a/mlir/lib/Transforms/MemRefDataFlowOpt.cpp
+++ b/mlir/lib/Transforms/MemRefDataFlowOpt.cpp
@@ -19,7 +19,7 @@
// potentially getting rid of intermediate memref's entirely.
// TODO(mlir-team): In the future, similar techniques could be used to eliminate
// dead memref store's and perform more complex forwarding when support for
-// SSA scalars live out of 'for'/'if' statements is available.
+// SSA scalars live out of 'affine.for'/'if' statements is available.
//===----------------------------------------------------------------------===//
#include "mlir/Analysis/AffineAnalysis.h"
@@ -55,7 +55,7 @@ namespace {
//
// (* A dependence being satisfied at a block: a dependence that is satisfied by
// virtue of the destination instruction appearing textually / lexically after
-// the source instruction within the body of a 'for' instruction; thus, a
+// the source instruction within the body of a 'affine.for' instruction; thus, a
// dependence is always either satisfied by a loop or by a block).
//
// The above conditions are simple to check, sufficient, and powerful for most
@@ -145,8 +145,8 @@ void MemRefDataFlowOpt::forwardStoreToLoad(OpPointer<LoadOp> loadOp) {
// Check if this store is a candidate for forwarding; we only forward if
// the dependence from the store is carried by the *body* of innermost
// common surrounding loop. As an example this filters out cases like:
- // for %i0
- // for %i1
+ // affine.for %i0
+ // affine.for %i1
// %idx = affine.apply (d0) -> (d0 + 1) (%i0)
// store %A[%idx]
// load %A[%i0]
diff --git a/mlir/lib/Transforms/PipelineDataTransfer.cpp b/mlir/lib/Transforms/PipelineDataTransfer.cpp
index cfa045f2279..84c8cd830dc 100644
--- a/mlir/lib/Transforms/PipelineDataTransfer.cpp
+++ b/mlir/lib/Transforms/PipelineDataTransfer.cpp
@@ -71,11 +71,11 @@ static unsigned getTagMemRefPos(const Instruction &dmaInst) {
return 0;
}
-/// Doubles the buffer of the supplied memref on the specified 'for' instruction
-/// by adding a leading dimension of size two to the memref. Replaces all uses
-/// of the old memref by the new one while indexing the newly added dimension by
-/// the loop IV of the specified 'for' instruction modulo 2. Returns false if
-/// such a replacement cannot be performed.
+/// Doubles the buffer of the supplied memref on the specified 'affine.for'
+/// instruction by adding a leading dimension of size two to the memref.
+/// Replaces all uses of the old memref by the new one while indexing the newly
+/// added dimension by the loop IV of the specified 'affine.for' instruction
+/// modulo 2. Returns false if such a replacement cannot be performed.
static bool doubleBuffer(Value *oldMemRef, OpPointer<AffineForOp> forOp) {
auto *forBody = forOp->getBody();
FuncBuilder bInner(forBody, forBody->begin());
@@ -108,7 +108,7 @@ static bool doubleBuffer(Value *oldMemRef, OpPointer<AffineForOp> forOp) {
dynamicDimCount++));
}
- // Create and place the alloc right before the 'for' instruction.
+ // Create and place the alloc right before the 'affine.for' instruction.
// TODO(mlir-team): we are assuming scoped allocation here, and aren't
// inserting a dealloc -- this isn't the right thing.
Value *newMemRef =
@@ -137,9 +137,9 @@ static bool doubleBuffer(Value *oldMemRef, OpPointer<AffineForOp> forOp) {
/// Returns success if the IR is in a valid state.
PassResult PipelineDataTransfer::runOnFunction(Function *f) {
// Do a post order walk so that inner loop DMAs are processed first. This is
- // necessary since 'for' instructions nested within would otherwise become
- // invalid (erased) when the outer loop is pipelined (the pipelined one gets
- // deleted and replaced by a prologue, a new steady-state loop and an
+ // necessary since 'affine.for' instructions nested within would otherwise
+ // become invalid (erased) when the outer loop is pipelined (the pipelined one
+ // gets deleted and replaced by a prologue, a new steady-state loop and an
// epilogue).
forOps.clear();
f->walkPostOrder<AffineForOp>(
diff --git a/mlir/lib/Transforms/Utils/LoopUtils.cpp b/mlir/lib/Transforms/Utils/LoopUtils.cpp
index a1903ace026..110949f43d5 100644
--- a/mlir/lib/Transforms/Utils/LoopUtils.cpp
+++ b/mlir/lib/Transforms/Utils/LoopUtils.cpp
@@ -138,8 +138,8 @@ void mlir::promoteSingleIterationLoops(Function *f) {
[](OpPointer<AffineForOp> forOp) { promoteIfSingleIteration(forOp); });
}
-/// Generates a 'for' inst with the specified lower and upper bounds while
-/// generating the right IV remappings for the shifted instructions. The
+/// Generates a 'affine.for' inst with the specified lower and upper bounds
+/// while generating the right IV remappings for the shifted instructions. The
/// instruction blocks that go into the loop are specified in instGroupQueue
/// starting from the specified offset, and in that order; the first element of
/// the pair specifies the shift applied to that group of instructions; note
@@ -194,10 +194,10 @@ generateLoop(AffineMap lbMap, AffineMap ubMap,
return loopChunk;
}
-/// Skew the instructions in the body of a 'for' instruction with the specified
-/// instruction-wise shifts. The shifts are with respect to the original
-/// execution order, and are multiplied by the loop 'step' before being applied.
-/// A shift of zero for each instruction will lead to no change.
+/// Skew the instructions in the body of a 'affine.for' instruction with the
+/// specified instruction-wise shifts. The shifts are with respect to the
+/// original execution order, and are multiplied by the loop 'step' before being
+/// applied. A shift of zero for each instruction will lead to no change.
// The skewing of instructions with respect to one another can be used for
// example to allow overlap of asynchronous operations (such as DMA
// communication) with computation, or just relative shifting of instructions
@@ -246,7 +246,7 @@ UtilResult mlir::instBodySkew(OpPointer<AffineForOp> forOp,
// An array of instruction groups sorted by shift amount; each group has all
// instructions with the same shift in the order in which they appear in the
- // body of the 'for' inst.
+ // body of the 'affine.for' inst.
std::vector<std::vector<Instruction *>> sortedInstGroups(maxShift + 1);
unsigned pos = 0;
for (auto &inst : *forOp->getBody()) {
diff --git a/mlir/lib/Transforms/Utils/Utils.cpp b/mlir/lib/Transforms/Utils/Utils.cpp
index 41689be52fc..90d28bf34df 100644
--- a/mlir/lib/Transforms/Utils/Utils.cpp
+++ b/mlir/lib/Transforms/Utils/Utils.cpp
@@ -194,14 +194,14 @@ bool mlir::replaceAllMemRefUsesWith(const Value *oldMemRef, Value *newMemRef,
///
/// Before
///
-/// for %i = 0 to #map(%N)
+/// affine.for %i = 0 to #map(%N)
/// %idx = affine.apply (d0) -> (d0 mod 2) (%i)
/// "send"(%idx, %A, ...)
/// "compute"(%idx)
///
/// After
///
-/// for %i = 0 to #map(%N)
+/// affine.for %i = 0 to #map(%N)
/// %idx = affine.apply (d0) -> (d0 mod 2) (%i)
/// "send"(%idx, %A, ...)
/// %idx_ = affine.apply (d0) -> (d0 mod 2) (%i)
diff --git a/mlir/lib/Transforms/Vectorize.cpp b/mlir/lib/Transforms/Vectorize.cpp
index 5a8d5d24661..1f4c7b9fcc8 100644
--- a/mlir/lib/Transforms/Vectorize.cpp
+++ b/mlir/lib/Transforms/Vectorize.cpp
@@ -113,7 +113,7 @@ using namespace mlir;
///
/// At a high level, a vectorized load in a loop will resemble:
/// ```mlir
-/// for %i = ? to ? step ? {
+/// affine.for %i = ? to ? step ? {
/// %v_a = "vector_transfer_read" (A, %i) : (memref<?xf32>, index) ->
/// vector<128xf32>
/// }
@@ -309,7 +309,7 @@ using namespace mlir;
/// ```mlir
/// mlfunc @fill(%A : memref<128xf32>) -> () {
/// %f1 = constant 1.0 : f32
-/// for %i0 = 0 to 32 {
+/// affine.for %i0 = 0 to 32 {
/// store %f1, %A[%i0] : memref<128xf32, 0>
/// }
/// return
@@ -322,7 +322,7 @@ using namespace mlir;
/// is still subject to exploratory tradeoffs. In particular, say we want to
/// vectorize by a factor 128, we want to transform the following input:
/// ```mlir
-/// for %i = %M to %N {
+/// affine.for %i = %M to %N {
/// %a = load A[%i] : memref<?xf32>
/// }
/// ```
@@ -331,8 +331,8 @@ using namespace mlir;
/// memory promotion etc) say after stripmining (and potentially unrolling in
/// the case of LLVM's SLP vectorizer):
/// ```mlir
-/// for %i = floor(%M, 128) to ceil(%N, 128) {
-/// for %ii = max(%M, 128 * %i) to min(%N, 128*%i + 127) {
+/// affine.for %i = floor(%M, 128) to ceil(%N, 128) {
+/// affine.for %ii = max(%M, 128 * %i) to min(%N, 128*%i + 127) {
/// %a = load A[%ii] : memref<?xf32>
/// }
/// }
@@ -341,7 +341,7 @@ using namespace mlir;
/// Instead, we seek to vectorize early and freeze vector types before
/// scheduling, so we want to generate a pattern that resembles:
/// ```mlir
-/// for %i = ? to ? step ? {
+/// affine.for %i = ? to ? step ? {
/// %v_a = "vector_transfer_read" (A, %i) : (memref<?xf32>, index) ->
/// vector<128xf32>
/// }
@@ -362,7 +362,7 @@ using namespace mlir;
/// For the simple strawman example above, vectorizing for a 1-D vector
/// abstraction of size 128 returns code similar to:
/// ```mlir
-/// for %i = %M to %N step 128 {
+/// affine.for %i = %M to %N step 128 {
/// %v_a = "vector_transfer_read" (A, %i) : (memref<?xf32>, index) ->
/// vector<128xf32>
/// }
@@ -391,20 +391,20 @@ using namespace mlir;
/// %C = alloc (%M, %N) : memref<?x?xf32, 0>
/// %f1 = constant 1.0 : f32
/// %f2 = constant 2.0 : f32
-/// for %i0 = 0 to %M {
-/// for %i1 = 0 to %N {
+/// affine.for %i0 = 0 to %M {
+/// affine.for %i1 = 0 to %N {
/// // non-scoped %f1
/// store %f1, %A[%i0, %i1] : memref<?x?xf32, 0>
/// }
/// }
-/// for %i2 = 0 to %M {
-/// for %i3 = 0 to %N {
+/// affine.for %i2 = 0 to %M {
+/// affine.for %i3 = 0 to %N {
/// // non-scoped %f2
/// store %f2, %B[%i2, %i3] : memref<?x?xf32, 0>
/// }
/// }
-/// for %i4 = 0 to %M {
-/// for %i5 = 0 to %N {
+/// affine.for %i4 = 0 to %M {
+/// affine.for %i5 = 0 to %N {
/// %a5 = load %A[%i4, %i5] : memref<?x?xf32, 0>
/// %b5 = load %B[%i4, %i5] : memref<?x?xf32, 0>
/// %s5 = addf %a5, %b5 : f32
@@ -438,24 +438,24 @@ using namespace mlir;
/// %2 = alloc(%arg0, %arg1) : memref<?x?xf32>
/// %cst = constant 1.0 : f32
/// %cst_0 = constant 2.0 : f32
-/// for %i0 = 0 to %arg0 {
-/// for %i1 = 0 to %arg1 step 256 {
+/// affine.for %i0 = 0 to %arg0 {
+/// affine.for %i1 = 0 to %arg1 step 256 {
/// %cst_1 = constant splat<vector<256xf32>, 1.0> :
/// vector<256xf32>
/// "vector_transfer_write"(%cst_1, %0, %i0, %i1) :
/// (vector<256xf32>, memref<?x?xf32>, index, index) -> ()
/// }
/// }
-/// for %i2 = 0 to %arg0 {
-/// for %i3 = 0 to %arg1 step 256 {
+/// affine.for %i2 = 0 to %arg0 {
+/// affine.for %i3 = 0 to %arg1 step 256 {
/// %cst_2 = constant splat<vector<256xf32>, 2.0> :
/// vector<256xf32>
/// "vector_transfer_write"(%cst_2, %1, %i2, %i3) :
/// (vector<256xf32>, memref<?x?xf32>, index, index) -> ()
/// }
/// }
-/// for %i4 = 0 to %arg0 {
-/// for %i5 = 0 to %arg1 step 256 {
+/// affine.for %i4 = 0 to %arg0 {
+/// affine.for %i5 = 0 to %arg1 step 256 {
/// %3 = "vector_transfer_read"(%0, %i4, %i5) :
/// (memref<?x?xf32>, index, index) -> vector<256xf32>
/// %4 = "vector_transfer_read"(%1, %i4, %i5) :
@@ -494,24 +494,24 @@ using namespace mlir;
/// %2 = alloc(%arg0, %arg1) : memref<?x?xf32>
/// %cst = constant 1.0 : f32
/// %cst_0 = constant 2.0 : f32
-/// for %i0 = 0 to %arg0 step 32 {
-/// for %i1 = 0 to %arg1 step 256 {
+/// affine.for %i0 = 0 to %arg0 step 32 {
+/// affine.for %i1 = 0 to %arg1 step 256 {
/// %cst_1 = constant splat<vector<32x256xf32>, 1.0> :
/// vector<32x256xf32>
/// "vector_transfer_write"(%cst_1, %0, %i0, %i1) :
/// (vector<32x256xf32>, memref<?x?xf32>, index, index) -> ()
/// }
/// }
-/// for %i2 = 0 to %arg0 step 32 {
-/// for %i3 = 0 to %arg1 step 256 {
+/// affine.for %i2 = 0 to %arg0 step 32 {
+/// affine.for %i3 = 0 to %arg1 step 256 {
/// %cst_2 = constant splat<vector<32x256xf32>, 2.0> :
/// vector<32x256xf32>
/// "vector_transfer_write"(%cst_2, %1, %i2, %i3) :
/// (vector<32x256xf32>, memref<?x?xf32>, index, index) -> ()
/// }
/// }
-/// for %i4 = 0 to %arg0 step 32 {
-/// for %i5 = 0 to %arg1 step 256 {
+/// affine.for %i4 = 0 to %arg0 step 32 {
+/// affine.for %i5 = 0 to %arg1 step 256 {
/// %3 = "vector_transfer_read"(%0, %i4, %i5) :
/// (memref<?x?xf32>, index, index) -> vector<32x256xf32>
/// %4 = "vector_transfer_read"(%1, %i4, %i5) :
OpenPOWER on IntegriCloud