diff options
| author | River Riddle <riverriddle@google.com> | 2019-03-25 10:14:34 -0700 |
|---|---|---|
| committer | jpienaar <jpienaar@google.com> | 2019-03-29 17:39:03 -0700 |
| commit | 832567b3799f763ec3ba9480e1628c5a3de7fa6e (patch) | |
| tree | e0b890af7d7f1c309399ba282ac2bace75a0b21f /mlir/lib | |
| parent | 8f5fa566239f88f5b0f3435103b3dc028b0a672a (diff) | |
| download | bcm5719-llvm-832567b3799f763ec3ba9480e1628c5a3de7fa6e.tar.gz bcm5719-llvm-832567b3799f763ec3ba9480e1628c5a3de7fa6e.zip | |
NFC: Rename the 'for' operation in the AffineOps dialect to 'affine.for' and set the namespace of the AffineOps dialect to 'affine'.
PiperOrigin-RevId: 240165792
Diffstat (limited to 'mlir/lib')
| -rw-r--r-- | mlir/lib/AffineOps/AffineOps.cpp | 6 | ||||
| -rw-r--r-- | mlir/lib/Analysis/AffineAnalysis.cpp | 8 | ||||
| -rw-r--r-- | mlir/lib/Analysis/TestParallelismDetection.cpp | 5 | ||||
| -rw-r--r-- | mlir/lib/Analysis/Utils.cpp | 10 | ||||
| -rw-r--r-- | mlir/lib/EDSC/MLIREmitter.cpp | 3 | ||||
| -rw-r--r-- | mlir/lib/EDSC/Types.cpp | 6 | ||||
| -rw-r--r-- | mlir/lib/Transforms/DmaGeneration.cpp | 11 | ||||
| -rw-r--r-- | mlir/lib/Transforms/LoopFusion.cpp | 3 | ||||
| -rw-r--r-- | mlir/lib/Transforms/LoopTiling.cpp | 3 | ||||
| -rw-r--r-- | mlir/lib/Transforms/LoopUnroll.cpp | 4 | ||||
| -rw-r--r-- | mlir/lib/Transforms/LoopUnrollAndJam.cpp | 2 | ||||
| -rw-r--r-- | mlir/lib/Transforms/LowerAffine.cpp | 7 | ||||
| -rw-r--r-- | mlir/lib/Transforms/LowerVectorTransfers.cpp | 10 | ||||
| -rw-r--r-- | mlir/lib/Transforms/MaterializeVectors.cpp | 24 | ||||
| -rw-r--r-- | mlir/lib/Transforms/MemRefDataFlowOpt.cpp | 8 | ||||
| -rw-r--r-- | mlir/lib/Transforms/PipelineDataTransfer.cpp | 18 | ||||
| -rw-r--r-- | mlir/lib/Transforms/Utils/LoopUtils.cpp | 18 | ||||
| -rw-r--r-- | mlir/lib/Transforms/Utils/Utils.cpp | 4 | ||||
| -rw-r--r-- | mlir/lib/Transforms/Vectorize.cpp | 50 |
19 files changed, 103 insertions, 97 deletions
diff --git a/mlir/lib/AffineOps/AffineOps.cpp b/mlir/lib/AffineOps/AffineOps.cpp index 4badde9012b..92035489e21 100644 --- a/mlir/lib/AffineOps/AffineOps.cpp +++ b/mlir/lib/AffineOps/AffineOps.cpp @@ -36,7 +36,7 @@ using llvm::dbgs; //===----------------------------------------------------------------------===// AffineOpsDialect::AffineOpsDialect(MLIRContext *context) - : Dialect(/*namePrefix=*/"", context) { + : Dialect(/*namePrefix=*/"affine", context) { addOperations<AffineApplyOp, AffineForOp, AffineIfOp>(); } @@ -69,7 +69,7 @@ bool mlir::isValidDim(Value *value) { return isTopLevelSymbol(dimOp->getOperand()); return false; } - // This value is a block argument (which also includes 'for' loop IVs). + // This value is a block argument (which also includes 'affine.for' loop IVs). return true; } @@ -969,7 +969,7 @@ static void printBound(AffineBound bound, const char *prefix, OpAsmPrinter *p) { } void AffineForOp::print(OpAsmPrinter *p) { - *p << "for "; + *p << "affine.for "; p->printOperand(getBody()->getArgument(0)); *p << " = "; printBound(getLowerBound(), "max", p); diff --git a/mlir/lib/Analysis/AffineAnalysis.cpp b/mlir/lib/Analysis/AffineAnalysis.cpp index f786731e88a..e2e9ef68b17 100644 --- a/mlir/lib/Analysis/AffineAnalysis.cpp +++ b/mlir/lib/Analysis/AffineAnalysis.cpp @@ -708,8 +708,8 @@ void MemRefAccess::getAccessMap(AffineValueMap *accessMap) const { // For example, given the following MLIR code with with "source" and // "destination" accesses to the same memref labled, and symbols %M, %N, %K: // -// for %i0 = 0 to 100 { -// for %i1 = 0 to 50 { +// affine.for %i0 = 0 to 100 { +// affine.for %i1 = 0 to 50 { // %a0 = affine.apply // (d0, d1) -> (d0 * 2 - d1 * 4 + s1, d1 * 3 - s0) (%i0, %i1)[%M, %N] // // Source memref access. @@ -717,8 +717,8 @@ void MemRefAccess::getAccessMap(AffineValueMap *accessMap) const { // } // } // -// for %i2 = 0 to 100 { -// for %i3 = 0 to 50 { +// affine.for %i2 = 0 to 100 { +// affine.for %i3 = 0 to 50 { // %a1 = affine.apply // (d0, d1) -> (d0 * 7 + d1 * 9 - s1, d1 * 11 + s0) (%i2, %i3)[%K, %M] // // Destination memref access. diff --git a/mlir/lib/Analysis/TestParallelismDetection.cpp b/mlir/lib/Analysis/TestParallelismDetection.cpp index b954f0e67d9..7ed59b403cd 100644 --- a/mlir/lib/Analysis/TestParallelismDetection.cpp +++ b/mlir/lib/Analysis/TestParallelismDetection.cpp @@ -15,7 +15,7 @@ // limitations under the License. // ============================================================================= // -// This file implements a pass to detect parallel affine 'for' ops. +// This file implements a pass to detect parallel affine 'affine.for' ops. // //===----------------------------------------------------------------------===// @@ -40,7 +40,8 @@ FunctionPassBase *mlir::createParallelismDetectionTestPass() { return new TestParallelismDetection(); } -// Walks the function and emits a note for all 'for' ops detected as parallel. +// Walks the function and emits a note for all 'affine.for' ops detected as +// parallel. void TestParallelismDetection::runOnFunction() { Function *f = getFunction(); FuncBuilder b(f); diff --git a/mlir/lib/Analysis/Utils.cpp b/mlir/lib/Analysis/Utils.cpp index 5a6e1f84b35..6bc395c46bd 100644 --- a/mlir/lib/Analysis/Utils.cpp +++ b/mlir/lib/Analysis/Utils.cpp @@ -38,11 +38,11 @@ using namespace mlir; using llvm::SmallDenseMap; /// Populates 'loops' with IVs of the loops surrounding 'inst' ordered from -/// the outermost 'for' instruction to the innermost one. +/// the outermost 'affine.for' instruction to the innermost one. void mlir::getLoopIVs(Instruction &inst, SmallVectorImpl<AffineForOp> *loops) { auto *currInst = inst.getParentInst(); AffineForOp currAffineForOp; - // Traverse up the hierarchy collecing all 'for' instruction while + // Traverse up the hierarchy collecing all 'affine.for' instruction while // skipping over 'affine.if' instructions. while (currInst && ((currAffineForOp = currInst->dyn_cast<AffineForOp>()) || currInst->isa<AffineIfOp>())) { @@ -162,8 +162,8 @@ LogicalResult MemRefRegion::unionBoundingBox(const MemRefRegion &other) { // For example, the memref region for this load operation at loopDepth = 1 will // be as below: // -// for %i = 0 to 32 { -// for %ii = %i to (d0) -> (d0 + 8) (%i) { +// affine.for %i = 0 to 32 { +// affine.for %ii = %i to (d0) -> (d0 + 8) (%i) { // load %A[%ii] // } // } @@ -683,7 +683,7 @@ static Optional<int64_t> getMemoryFootprintBytes(Block &block, int memorySpace) { SmallDenseMap<Value *, std::unique_ptr<MemRefRegion>, 4> regions; - // Walk this 'for' instruction to gather all memory regions. + // Walk this 'affine.for' instruction to gather all memory regions. bool error = false; block.walk(start, end, [&](Instruction *opInst) { if (!opInst->isa<LoadOp>() && !opInst->isa<StoreOp>()) { diff --git a/mlir/lib/EDSC/MLIREmitter.cpp b/mlir/lib/EDSC/MLIREmitter.cpp index 1196748a0af..89c66b08941 100644 --- a/mlir/lib/EDSC/MLIREmitter.cpp +++ b/mlir/lib/EDSC/MLIREmitter.cpp @@ -146,7 +146,8 @@ Value *mlir::edsc::MLIREmitter::emitExpr(Expr e) { if (auto expr = e.dyn_cast<StmtBlockLikeExpr>()) { if (expr.getKind() == ExprKind::For) { auto exprGroups = expr.getAllArgumentGroups(); - assert(exprGroups.size() == 3 && "expected 3 expr groups in `for`"); + assert(exprGroups.size() == 3 && + "expected 3 expr groups in `affine.for`"); assert(!exprGroups[0].empty() && "expected at least one lower bound"); assert(!exprGroups[1].empty() && "expected at least one upper bound"); assert(exprGroups[2].size() == 1 && diff --git a/mlir/lib/EDSC/Types.cpp b/mlir/lib/EDSC/Types.cpp index 72c453718e7..ac8b98e38c3 100644 --- a/mlir/lib/EDSC/Types.cpp +++ b/mlir/lib/EDSC/Types.cpp @@ -526,8 +526,8 @@ Stmt mlir::edsc::For(llvm::ArrayRef<Expr> idxs, llvm::ArrayRef<MaxExpr> lbs, Stmt mlir::edsc::MaxMinFor(const Bindable &idx, ArrayRef<Expr> lbs, ArrayRef<Expr> ubs, Expr step, ArrayRef<Stmt> enclosedStmts) { - assert(!lbs.empty() && "'for' loop must have lower bounds"); - assert(!ubs.empty() && "'for' loop must have upper bounds"); + assert(!lbs.empty() && "'affine.for' loop must have lower bounds"); + assert(!ubs.empty() && "'affine.for' loop must have upper bounds"); // Use a null expression as a sentinel between lower and upper bound // expressions in the list of children. @@ -964,7 +964,7 @@ void mlir::edsc::Expr::print(raw_ostream &os) const { } else if (auto stmtLikeExpr = this->dyn_cast<StmtBlockLikeExpr>()) { switch (stmtLikeExpr.getKind()) { // We only print the lb, ub and step here, which are the StmtBlockLike - // part of the `for` StmtBlockLikeExpr. + // part of the `affine.for` StmtBlockLikeExpr. case ExprKind::For: { auto exprGroups = stmtLikeExpr.getAllArgumentGroups(); assert(exprGroups.size() == 3 && diff --git a/mlir/lib/Transforms/DmaGeneration.cpp b/mlir/lib/Transforms/DmaGeneration.cpp index 7f8c7e411e8..4fa040d73eb 100644 --- a/mlir/lib/Transforms/DmaGeneration.cpp +++ b/mlir/lib/Transforms/DmaGeneration.cpp @@ -343,7 +343,7 @@ bool DmaGeneration::generateDma(const MemRefRegion ®ion, Block *block, auto fastMemRefType = top.getMemRefType( fastBufferShape, memRefType.getElementType(), {}, fastMemorySpace); - // Create the fast memory space buffer just before the 'for' + // Create the fast memory space buffer just before the 'affine.for' // instruction. fastMemRef = prologue.create<AllocOp>(loc, fastMemRefType)->getResult(); // Record it. @@ -472,7 +472,7 @@ bool DmaGeneration::runOnBlock(Block *block) { // approach is conservative in some cases at the moment, we do a check later // and report an error with location info. // TODO(bondhugula): An 'affine.if' instruction is being treated similar to an - // operation instruction. 'affine.if''s could have 'for's in them; + // operation instruction. 'affine.if''s could have 'affine.for's in them; // treat them separately. // Get to the first load, store, or for op. @@ -494,7 +494,7 @@ bool DmaGeneration::runOnBlock(Block *block) { fastMemCapacityBytes); }; - // If the memory footprint of the 'for' loop is higher than fast + // If the memory footprint of the 'affine.for' loop is higher than fast // memory capacity (when provided), we recurse to DMA at an inner level // until we find a depth at which footprint fits in fast mem capacity. If // the footprint can't be calculated, we assume for now it fits. Recurse @@ -507,7 +507,7 @@ bool DmaGeneration::runOnBlock(Block *block) { runOnBlock(/*begin=*/curBegin, /*end=*/it); // Recurse onto the body of this loop. runOnBlock(forOp->getBody()); - // The next region starts right after the 'for' instruction. + // The next region starts right after the 'affine.for' instruction. curBegin = std::next(it); } else { // We have enough capacity, i.e., DMAs will be computed for the portion @@ -698,7 +698,8 @@ uint64_t DmaGeneration::runOnBlock(Block::iterator begin, Block::iterator end) { [&](const SmallMapVector<Value *, std::unique_ptr<MemRefRegion>, 4> ®ions) { for (const auto ®ionEntry : regions) { - // For each region, hoist DMA transfer past all invariant 'for's. + // For each region, hoist DMA transfer past all invariant + // 'affine.for's. Block::iterator dmaPlacementReadStart, dmaPlacementWriteStart; Block *dmaPlacementBlock; findHighestBlockForPlacement( diff --git a/mlir/lib/Transforms/LoopFusion.cpp b/mlir/lib/Transforms/LoopFusion.cpp index 8e1fc505348..84644bf11a0 100644 --- a/mlir/lib/Transforms/LoopFusion.cpp +++ b/mlir/lib/Transforms/LoopFusion.cpp @@ -641,7 +641,8 @@ bool MemRefDependenceGraph::init(Function *f) { // all loads and store accesses it contains. LoopNestStateCollector collector; collector.collect(&inst); - // Return false if a non 'for' region was found (not currently supported). + // Return false if a non 'affine.for' region was found (not currently + // supported). if (collector.hasNonForRegion) return false; Node node(nextNodeId++, &inst); diff --git a/mlir/lib/Transforms/LoopTiling.cpp b/mlir/lib/Transforms/LoopTiling.cpp index 0b629531df0..314864d3f3c 100644 --- a/mlir/lib/Transforms/LoopTiling.cpp +++ b/mlir/lib/Transforms/LoopTiling.cpp @@ -258,7 +258,8 @@ LogicalResult mlir::tileCodeGen(MutableArrayRef<AffineForOp> band, // Returns all maximal outermost perfect loop nests to tile. static void getTileableBands(Function *f, std::vector<SmallVector<AffineForOp, 6>> *bands) { - // Get maximal perfect nest of 'for' insts starting from root (inclusive). + // Get maximal perfect nest of 'affine.for' insts starting from root + // (inclusive). auto getMaximalPerfectLoopNest = [&](AffineForOp root) { SmallVector<AffineForOp, 6> band; AffineForOp currInst = root; diff --git a/mlir/lib/Transforms/LoopUnroll.cpp b/mlir/lib/Transforms/LoopUnroll.cpp index a16237e6452..173a171e589 100644 --- a/mlir/lib/Transforms/LoopUnroll.cpp +++ b/mlir/lib/Transforms/LoopUnroll.cpp @@ -158,8 +158,8 @@ void LoopUnroll::runOnFunction() { } } -/// Unrolls a 'for' inst. Returns success if the loop was unrolled, failure -/// otherwise. The default unroll factor is 4. +/// Unrolls a 'affine.for' inst. Returns success if the loop was unrolled, +/// failure otherwise. The default unroll factor is 4. LogicalResult LoopUnroll::runOnAffineForOp(AffineForOp forOp) { // Use the function callback if one was provided. if (getUnrollFactor) { diff --git a/mlir/lib/Transforms/LoopUnrollAndJam.cpp b/mlir/lib/Transforms/LoopUnrollAndJam.cpp index 03c06b4b450..240f3960488 100644 --- a/mlir/lib/Transforms/LoopUnrollAndJam.cpp +++ b/mlir/lib/Transforms/LoopUnrollAndJam.cpp @@ -96,7 +96,7 @@ void LoopUnrollAndJam::runOnFunction() { runOnAffineForOp(forOp); } -/// Unroll and jam a 'for' inst. Default unroll jam factor is +/// Unroll and jam a 'affine.for' inst. Default unroll jam factor is /// kDefaultUnrollJamFactor. Return failure if nothing was done. LogicalResult LoopUnrollAndJam::runOnAffineForOp(AffineForOp forOp) { // Unroll and jam by the factor that was passed if any. diff --git a/mlir/lib/Transforms/LowerAffine.cpp b/mlir/lib/Transforms/LowerAffine.cpp index 3061bcd254d..cb65720cee3 100644 --- a/mlir/lib/Transforms/LowerAffine.cpp +++ b/mlir/lib/Transforms/LowerAffine.cpp @@ -276,7 +276,7 @@ static Value *buildMinMaxReductionSeq(Location loc, CmpIPredicate predicate, return value; } -// Convert a "for" loop to a flow of blocks. Return `false` on success. +// Convert a "affine.for" loop to a flow of blocks. Return `false` on success. // // Create an SESE region for the loop (including its body) and append it to the // end of the current region. The loop region consists of the initialization @@ -323,8 +323,9 @@ bool LowerAffinePass::lowerAffineFor(AffineForOp forOp) { auto loc = forOp->getLoc(); auto *forInst = forOp->getInstruction(); - // Start by splitting the block containing the 'for' into two parts. The part - // before will get the init code, the part after will be the end point. + // Start by splitting the block containing the 'affine.for' into two parts. + // The part before will get the init code, the part after will be the end + // point. auto *initBlock = forInst->getBlock(); auto *endBlock = initBlock->splitBlock(forInst); diff --git a/mlir/lib/Transforms/LowerVectorTransfers.cpp b/mlir/lib/Transforms/LowerVectorTransfers.cpp index cde28c6517d..7f6be358189 100644 --- a/mlir/lib/Transforms/LowerVectorTransfers.cpp +++ b/mlir/lib/Transforms/LowerVectorTransfers.cpp @@ -56,9 +56,9 @@ /// // Read the slice `%A[%i0, %i1:%i1+256, %i2:%i2+32]` into /// // vector<32x256xf32> and pad with %f0 to handle the boundary case: /// %f0 = constant 0.0f : f32 -/// for %i0 = 0 to %0 { -/// for %i1 = 0 to %1 step 256 { -/// for %i2 = 0 to %2 step 32 { +/// affine.for %i0 = 0 to %0 { +/// affine.for %i1 = 0 to %1 step 256 { +/// affine.for %i2 = 0 to %2 step 32 { /// %v = vector_transfer_read %A, %i0, %i1, %i2, %f0 /// {permutation_map: (d0, d1, d2) -> (d2, d1)} : /// (memref<?x?x?xf32>, index, index, f32) -> vector<32x256xf32> @@ -70,8 +70,8 @@ /// abstraction): /// /// ```mlir {.mlir} -/// for %d2 = 0 to 256 { -/// for %d1 = 0 to 32 { +/// affine.for %d2 = 0 to 256 { +/// affine.for %d1 = 0 to 32 { /// %s = %A[%i0, %i1 + %d1, %i2 + %d2] : f32 /// %tmp[%d2, %d1] = %s /// } diff --git a/mlir/lib/Transforms/MaterializeVectors.cpp b/mlir/lib/Transforms/MaterializeVectors.cpp index 0a7eaabbb09..ebdb0c8e83e 100644 --- a/mlir/lib/Transforms/MaterializeVectors.cpp +++ b/mlir/lib/Transforms/MaterializeVectors.cpp @@ -100,10 +100,10 @@ /// mlfunc @materialize(%M : index, %N : index, %O : index, %P : index) { /// %A = alloc (%M, %N, %O, %P) : memref<?x?x?x?xf32, 0> /// %f1 = constant splat<vector<4x4x4xf32>, 1.000000e+00> : -/// vector<4x4x4xf32> for %i0 = 0 to %M step 4 { -/// for %i1 = 0 to %N step 4 { -/// for %i2 = 0 to %O { -/// for %i3 = 0 to %P step 4 { +/// vector<4x4x4xf32> affine.for %i0 = 0 to %M step 4 { +/// affine.for %i1 = 0 to %N step 4 { +/// affine.for %i2 = 0 to %O { +/// affine.for %i3 = 0 to %P step 4 { /// vector_transfer_write %f1, %A, %i0, %i1, %i2, %i3 /// {permutation_map: (d0, d1, d2, d3) -> (d3, d1, d0)} : /// vector<4x4x4xf32>, memref<?x?x?x?xf32, 0>, @@ -119,10 +119,10 @@ /// mlfunc @materialize(%M : index, %N : index, %O : index, %P : index) { /// %A = alloc (%M, %N, %O, %P) : memref<?x?x?x?xf32, 0> /// %f1 = constant splat<vector<4x4xf32>, 1.000000e+00> : vector<4x4x4xf32> -/// for %i0 = 0 to %arg0 step 4 { -/// for %i1 = 0 to %arg1 step 4 { -/// for %i2 = 0 to %arg2 { -/// for %i3 = 0 to %arg3 step 4 { +/// affine.for %i0 = 0 to %arg0 step 4 { +/// affine.for %i1 = 0 to %arg1 step 4 { +/// affine.for %i2 = 0 to %arg2 { +/// affine.for %i3 = 0 to %arg3 step 4 { /// %1 = affine.apply (d0, d1, d2, d3) -> (d0, d1, d2, d3) /// (%i0, %i1, %i2, %i3) /// vector_transfer_write f1, %0, %1#0, %1#1, %1#2, %1#3 @@ -286,10 +286,10 @@ static Value *substitute(Value *v, VectorType hwVectorType, /// super-vectorization has been applied: /// /// ```mlir -/// for %i0 = 0 to %M { -/// for %i1 = 0 to %N step 3 { -/// for %i2 = 0 to %O { -/// for %i3 = 0 to %P step 32 { +/// affine.for %i0 = 0 to %M { +/// affine.for %i1 = 0 to %N step 3 { +/// affine.for %i2 = 0 to %O { +/// affine.for %i3 = 0 to %P step 32 { /// %r = vector_transfer_read(%A, map(%i..)#0, map(%i..)#1, map(%i..)#2) /// -> vector<3x32xf32> /// ... diff --git a/mlir/lib/Transforms/MemRefDataFlowOpt.cpp b/mlir/lib/Transforms/MemRefDataFlowOpt.cpp index a35a159443d..a7045b3b541 100644 --- a/mlir/lib/Transforms/MemRefDataFlowOpt.cpp +++ b/mlir/lib/Transforms/MemRefDataFlowOpt.cpp @@ -19,7 +19,7 @@ // potentially getting rid of intermediate memref's entirely. // TODO(mlir-team): In the future, similar techniques could be used to eliminate // dead memref store's and perform more complex forwarding when support for -// SSA scalars live out of 'for'/'affine.if' statements is available. +// SSA scalars live out of 'affine.for'/'affine.if' statements is available. //===----------------------------------------------------------------------===// #include "mlir/Analysis/AffineAnalysis.h" @@ -55,7 +55,7 @@ namespace { // // (* A dependence being satisfied at a block: a dependence that is satisfied by // virtue of the destination instruction appearing textually / lexically after -// the source instruction within the body of a 'for' instruction; thus, a +// the source instruction within the body of a 'affine.for' instruction; thus, a // dependence is always either satisfied by a loop or by a block). // // The above conditions are simple to check, sufficient, and powerful for most @@ -139,8 +139,8 @@ void MemRefDataFlowOpt::forwardStoreToLoad(LoadOp loadOp) { // Check if this store is a candidate for forwarding; we only forward if // the dependence from the store is carried by the *body* of innermost // common surrounding loop. As an example this filters out cases like: - // for %i0 - // for %i1 + // affine.for %i0 + // affine.for %i1 // %idx = affine.apply (d0) -> (d0 + 1) (%i0) // store %A[%idx] // load %A[%i0] diff --git a/mlir/lib/Transforms/PipelineDataTransfer.cpp b/mlir/lib/Transforms/PipelineDataTransfer.cpp index 9809a146072..b59071aa9fe 100644 --- a/mlir/lib/Transforms/PipelineDataTransfer.cpp +++ b/mlir/lib/Transforms/PipelineDataTransfer.cpp @@ -66,11 +66,11 @@ static unsigned getTagMemRefPos(Instruction &dmaInst) { return 0; } -/// Doubles the buffer of the supplied memref on the specified 'for' instruction -/// by adding a leading dimension of size two to the memref. Replaces all uses -/// of the old memref by the new one while indexing the newly added dimension by -/// the loop IV of the specified 'for' instruction modulo 2. Returns false if -/// such a replacement cannot be performed. +/// Doubles the buffer of the supplied memref on the specified 'affine.for' +/// instruction by adding a leading dimension of size two to the memref. +/// Replaces all uses of the old memref by the new one while indexing the newly +/// added dimension by the loop IV of the specified 'affine.for' instruction +/// modulo 2. Returns false if such a replacement cannot be performed. static bool doubleBuffer(Value *oldMemRef, AffineForOp forOp) { auto *forBody = forOp->getBody(); FuncBuilder bInner(forBody, forBody->begin()); @@ -104,7 +104,7 @@ static bool doubleBuffer(Value *oldMemRef, AffineForOp forOp) { dynamicDimCount++)); } - // Create and place the alloc right before the 'for' instruction. + // Create and place the alloc right before the 'affine.for' instruction. Value *newMemRef = bOuter.create<AllocOp>(forInst->getLoc(), newMemRefType, allocOperands); @@ -139,9 +139,9 @@ static bool doubleBuffer(Value *oldMemRef, AffineForOp forOp) { /// Returns success if the IR is in a valid state. void PipelineDataTransfer::runOnFunction() { // Do a post order walk so that inner loop DMAs are processed first. This is - // necessary since 'for' instructions nested within would otherwise become - // invalid (erased) when the outer loop is pipelined (the pipelined one gets - // deleted and replaced by a prologue, a new steady-state loop and an + // necessary since 'affine.for' instructions nested within would otherwise + // become invalid (erased) when the outer loop is pipelined (the pipelined one + // gets deleted and replaced by a prologue, a new steady-state loop and an // epilogue). forOps.clear(); getFunction()->walkPostOrder<AffineForOp>( diff --git a/mlir/lib/Transforms/Utils/LoopUtils.cpp b/mlir/lib/Transforms/Utils/LoopUtils.cpp index e5f1fef990f..bf0c3ced2e2 100644 --- a/mlir/lib/Transforms/Utils/LoopUtils.cpp +++ b/mlir/lib/Transforms/Utils/LoopUtils.cpp @@ -71,7 +71,7 @@ void mlir::getCleanupLoopLowerBound(AffineForOp forOp, unsigned unrollFactor, auto lb = b->create<AffineApplyOp>(forOp->getLoc(), lbMap, lbOperands); // For each upper bound expr, get the range. - // Eg: for %i = lb to min (ub1, ub2), + // Eg: affine.for %i = lb to min (ub1, ub2), // where tripCountExprs yield (tr1, tr2), we create affine.apply's: // lb + tr1 - tr1 % ufactor, lb + tr2 - tr2 % ufactor; the results of all // these affine.apply's make up the cleanup loop lower bound. @@ -161,8 +161,8 @@ void mlir::promoteSingleIterationLoops(Function *f) { [](AffineForOp forOp) { promoteIfSingleIteration(forOp); }); } -/// Generates a 'for' inst with the specified lower and upper bounds while -/// generating the right IV remappings for the shifted instructions. The +/// Generates a 'affine.for' inst with the specified lower and upper bounds +/// while generating the right IV remappings for the shifted instructions. The /// instruction blocks that go into the loop are specified in instGroupQueue /// starting from the specified offset, and in that order; the first element of /// the pair specifies the shift applied to that group of instructions; note @@ -216,10 +216,10 @@ generateLoop(AffineMap lbMap, AffineMap ubMap, return loopChunk; } -/// Skew the instructions in the body of a 'for' instruction with the specified -/// instruction-wise shifts. The shifts are with respect to the original -/// execution order, and are multiplied by the loop 'step' before being applied. -/// A shift of zero for each instruction will lead to no change. +/// Skew the instructions in the body of a 'affine.for' instruction with the +/// specified instruction-wise shifts. The shifts are with respect to the +/// original execution order, and are multiplied by the loop 'step' before being +/// applied. A shift of zero for each instruction will lead to no change. // The skewing of instructions with respect to one another can be used for // example to allow overlap of asynchronous operations (such as DMA // communication) with computation, or just relative shifting of instructions @@ -267,7 +267,7 @@ LogicalResult mlir::instBodySkew(AffineForOp forOp, ArrayRef<uint64_t> shifts, // An array of instruction groups sorted by shift amount; each group has all // instructions with the same shift in the order in which they appear in the - // body of the 'for' inst. + // body of the 'affine.for' inst. std::vector<std::vector<Instruction *>> sortedInstGroups(maxShift + 1); unsigned pos = 0; for (auto &inst : *forOp->getBody()) { @@ -499,7 +499,7 @@ void mlir::sinkLoop(AffineForOp forOp, unsigned loopDepth) { // bounds, the resulting IR resembles: // // ```mlir -// for %i = max (`iv, ...) to min (`iv` + `offset`) { +// affine.for %i = max (`iv, ...) to min (`iv` + `offset`) { // ... // } // ``` diff --git a/mlir/lib/Transforms/Utils/Utils.cpp b/mlir/lib/Transforms/Utils/Utils.cpp index 7bf9993b7c8..7a44a6277a6 100644 --- a/mlir/lib/Transforms/Utils/Utils.cpp +++ b/mlir/lib/Transforms/Utils/Utils.cpp @@ -199,14 +199,14 @@ bool mlir::replaceAllMemRefUsesWith(Value *oldMemRef, Value *newMemRef, /// /// Before /// -/// for %i = 0 to #map(%N) +/// affine.for %i = 0 to #map(%N) /// %idx = affine.apply (d0) -> (d0 mod 2) (%i) /// "send"(%idx, %A, ...) /// "compute"(%idx) /// /// After /// -/// for %i = 0 to #map(%N) +/// affine.for %i = 0 to #map(%N) /// %idx = affine.apply (d0) -> (d0 mod 2) (%i) /// "send"(%idx, %A, ...) /// %idx_ = affine.apply (d0) -> (d0 mod 2) (%i) diff --git a/mlir/lib/Transforms/Vectorize.cpp b/mlir/lib/Transforms/Vectorize.cpp index 955e38f4b39..a52129ed0d6 100644 --- a/mlir/lib/Transforms/Vectorize.cpp +++ b/mlir/lib/Transforms/Vectorize.cpp @@ -113,7 +113,7 @@ using namespace mlir; /// /// At a high level, a vectorized load in a loop will resemble: /// ```mlir -/// for %i = ? to ? step ? { +/// affine.for %i = ? to ? step ? { /// %v_a = "vector_transfer_read" (A, %i) : (memref<?xf32>, index) -> /// vector<128xf32> /// } @@ -309,7 +309,7 @@ using namespace mlir; /// ```mlir /// mlfunc @fill(%A : memref<128xf32>) -> () { /// %f1 = constant 1.0 : f32 -/// for %i0 = 0 to 32 { +/// affine.for %i0 = 0 to 32 { /// store %f1, %A[%i0] : memref<128xf32, 0> /// } /// return @@ -322,7 +322,7 @@ using namespace mlir; /// is still subject to exploratory tradeoffs. In particular, say we want to /// vectorize by a factor 128, we want to transform the following input: /// ```mlir -/// for %i = %M to %N { +/// affine.for %i = %M to %N { /// %a = load A[%i] : memref<?xf32> /// } /// ``` @@ -331,8 +331,8 @@ using namespace mlir; /// memory promotion etc) say after stripmining (and potentially unrolling in /// the case of LLVM's SLP vectorizer): /// ```mlir -/// for %i = floor(%M, 128) to ceil(%N, 128) { -/// for %ii = max(%M, 128 * %i) to min(%N, 128*%i + 127) { +/// affine.for %i = floor(%M, 128) to ceil(%N, 128) { +/// affine.for %ii = max(%M, 128 * %i) to min(%N, 128*%i + 127) { /// %a = load A[%ii] : memref<?xf32> /// } /// } @@ -341,7 +341,7 @@ using namespace mlir; /// Instead, we seek to vectorize early and freeze vector types before /// scheduling, so we want to generate a pattern that resembles: /// ```mlir -/// for %i = ? to ? step ? { +/// affine.for %i = ? to ? step ? { /// %v_a = "vector_transfer_read" (A, %i) : (memref<?xf32>, index) -> /// vector<128xf32> /// } @@ -362,7 +362,7 @@ using namespace mlir; /// For the simple strawman example above, vectorizing for a 1-D vector /// abstraction of size 128 returns code similar to: /// ```mlir -/// for %i = %M to %N step 128 { +/// affine.for %i = %M to %N step 128 { /// %v_a = "vector_transfer_read" (A, %i) : (memref<?xf32>, index) -> /// vector<128xf32> /// } @@ -391,20 +391,20 @@ using namespace mlir; /// %C = alloc (%M, %N) : memref<?x?xf32, 0> /// %f1 = constant 1.0 : f32 /// %f2 = constant 2.0 : f32 -/// for %i0 = 0 to %M { -/// for %i1 = 0 to %N { +/// affine.for %i0 = 0 to %M { +/// affine.for %i1 = 0 to %N { /// // non-scoped %f1 /// store %f1, %A[%i0, %i1] : memref<?x?xf32, 0> /// } /// } -/// for %i2 = 0 to %M { -/// for %i3 = 0 to %N { +/// affine.for %i2 = 0 to %M { +/// affine.for %i3 = 0 to %N { /// // non-scoped %f2 /// store %f2, %B[%i2, %i3] : memref<?x?xf32, 0> /// } /// } -/// for %i4 = 0 to %M { -/// for %i5 = 0 to %N { +/// affine.for %i4 = 0 to %M { +/// affine.for %i5 = 0 to %N { /// %a5 = load %A[%i4, %i5] : memref<?x?xf32, 0> /// %b5 = load %B[%i4, %i5] : memref<?x?xf32, 0> /// %s5 = addf %a5, %b5 : f32 @@ -438,24 +438,24 @@ using namespace mlir; /// %2 = alloc(%arg0, %arg1) : memref<?x?xf32> /// %cst = constant 1.0 : f32 /// %cst_0 = constant 2.0 : f32 -/// for %i0 = 0 to %arg0 { -/// for %i1 = 0 to %arg1 step 256 { +/// affine.for %i0 = 0 to %arg0 { +/// affine.for %i1 = 0 to %arg1 step 256 { /// %cst_1 = constant splat<vector<256xf32>, 1.0> : /// vector<256xf32> /// "vector_transfer_write"(%cst_1, %0, %i0, %i1) : /// (vector<256xf32>, memref<?x?xf32>, index, index) -> () /// } /// } -/// for %i2 = 0 to %arg0 { -/// for %i3 = 0 to %arg1 step 256 { +/// affine.for %i2 = 0 to %arg0 { +/// affine.for %i3 = 0 to %arg1 step 256 { /// %cst_2 = constant splat<vector<256xf32>, 2.0> : /// vector<256xf32> /// "vector_transfer_write"(%cst_2, %1, %i2, %i3) : /// (vector<256xf32>, memref<?x?xf32>, index, index) -> () /// } /// } -/// for %i4 = 0 to %arg0 { -/// for %i5 = 0 to %arg1 step 256 { +/// affine.for %i4 = 0 to %arg0 { +/// affine.for %i5 = 0 to %arg1 step 256 { /// %3 = "vector_transfer_read"(%0, %i4, %i5) : /// (memref<?x?xf32>, index, index) -> vector<256xf32> /// %4 = "vector_transfer_read"(%1, %i4, %i5) : @@ -494,24 +494,24 @@ using namespace mlir; /// %2 = alloc(%arg0, %arg1) : memref<?x?xf32> /// %cst = constant 1.0 : f32 /// %cst_0 = constant 2.0 : f32 -/// for %i0 = 0 to %arg0 step 32 { -/// for %i1 = 0 to %arg1 step 256 { +/// affine.for %i0 = 0 to %arg0 step 32 { +/// affine.for %i1 = 0 to %arg1 step 256 { /// %cst_1 = constant splat<vector<32x256xf32>, 1.0> : /// vector<32x256xf32> /// "vector_transfer_write"(%cst_1, %0, %i0, %i1) : /// (vector<32x256xf32>, memref<?x?xf32>, index, index) -> () /// } /// } -/// for %i2 = 0 to %arg0 step 32 { -/// for %i3 = 0 to %arg1 step 256 { +/// affine.for %i2 = 0 to %arg0 step 32 { +/// affine.for %i3 = 0 to %arg1 step 256 { /// %cst_2 = constant splat<vector<32x256xf32>, 2.0> : /// vector<32x256xf32> /// "vector_transfer_write"(%cst_2, %1, %i2, %i3) : /// (vector<32x256xf32>, memref<?x?xf32>, index, index) -> () /// } /// } -/// for %i4 = 0 to %arg0 step 32 { -/// for %i5 = 0 to %arg1 step 256 { +/// affine.for %i4 = 0 to %arg0 step 32 { +/// affine.for %i5 = 0 to %arg1 step 256 { /// %3 = "vector_transfer_read"(%0, %i4, %i5) : /// (memref<?x?xf32>, index, index) -> vector<32x256xf32> /// %4 = "vector_transfer_read"(%1, %i4, %i5) : |

