Introduce affine terminator

Due to legacy reasons (ML/CFG function separation), regions in affine control flow operations require contained blocks not to have terminators. This is inconsistent with the notion of the block and may complicate code motion between regions of affine control operations and other regions. Introduce `affine.terminator`, a special terminator operation that must be used to terminate blocks inside affine operations and transfers the control back to he region enclosing the affine operation. For brevity and readability reasons, allow `affine.for` and `affine.if` to omit the `affine.terminator` in their regions when using custom printing and parsing format. The custom parser injects the `affine.terminator` if it is missing so as to always have it present in constructed operations. Update transformations to account for the presence of terminator. In particular, most code motion transformation between loops should leave the terminator in place, and code motion between loops and non-affine blocks should drop the terminator. PiperOrigin-RevId: 240536998
author: Alex Zinenko <zinenko@google.com> 2019-03-27 05:11:58 -0700
committer: jpienaar <jpienaar@google.com> 2019-03-29 17:44:24 -0700
commit: 5a5bba0279a5754c8e7aa2a9bf415aee2a0f1774 (patch)
tree: aa7553c11c35ffb030135528adb3bf47c54705cc /mlir/lib/Transforms
parent: af45236c70ed457fd093c88154a520db2d99f021 (diff)
download: bcm5719-llvm-5a5bba0279a5754c8e7aa2a9bf415aee2a0f1774.tar.gz
bcm5719-llvm-5a5bba0279a5754c8e7aa2a9bf415aee2a0f1774.zip
5 files changed, 73 insertions, 62 deletions
diff --git a/mlir/lib/Transforms/LoopFusion.cpp b/mlir/lib/Transforms/LoopFusion.cpp
index d76aca20b6d..8c29d1a76b4 100644
--- a/mlir/lib/Transforms/LoopFusion.cpp
+++ b/mlir/lib/Transforms/LoopFusion.cpp
@@ -1055,14 +1055,14 @@ computeLoopInterchangePermutation(ArrayRef<Instruction *> ops,
 // pushing loop carried dependence to a greater depth in the loop nest.
 static void sinkSequentialLoops(MemRefDependenceGraph::Node *node) {
   assert(node->inst->isa<AffineForOp>());
-  // Get perfectly nested sequence of loops starting at root of loop nest.
+  // Get perfectly nested sequence of loops starting at root of loop nest
+  // (the first op being another AffineFor, and the second op - a terminator).
   // TODO(andydavis,bondhugula) Share this with similar code in loop tiling.
   SmallVector<AffineForOp, 4> loops;
   AffineForOp curr = node->inst->cast<AffineForOp>();
   loops.push_back(curr);
   auto *currBody = curr.getBody();
-  while (!currBody->empty() &&
-         std::next(currBody->begin()) == currBody->end() &&
+  while (currBody->begin() == std::prev(currBody->end(), 2) &&
          (curr = curr.getBody()->front().dyn_cast<AffineForOp>())) {
     loops.push_back(curr);
     currBody = curr.getBody();
diff --git a/mlir/lib/Transforms/LoopTiling.cpp b/mlir/lib/Transforms/LoopTiling.cpp
index d9f74808ad8..c235190b4b7 100644
--- a/mlir/lib/Transforms/LoopTiling.cpp
+++ b/mlir/lib/Transforms/LoopTiling.cpp
@@ -89,10 +89,12 @@ FunctionPassBase *mlir::createLoopTilingPass(uint64_t cacheSizeBytes) {
 }
 
 // Move the loop body of AffineForOp 'src' from 'src' into the specified
-// location in destination's body.
+// location in destination's body, ignoring the terminator.
 static inline void moveLoopBody(AffineForOp src, AffineForOp dest,
                                 Block::iterator loc) {
-  dest.getBody()->getOperations().splice(loc, src.getBody()->getOperations());
+  auto &insts = src.getBody()->getOperations();
+  dest.getBody()->getOperations().splice(loc, insts, insts.begin(),
+                                         std::prev(insts.end()));
 }
 
 // Move the loop body of AffineForOp 'src' from 'src' to the start of dest's
@@ -202,7 +204,6 @@ LogicalResult mlir::tileCodeGen(MutableArrayRef<AffineForOp> band,
     FuncBuilder b(topLoop);
     // Loop bounds will be set later.
     auto pointLoop = b.create<AffineForOp>(loc, 0, 0);
-    pointLoop.createBody();
     pointLoop.getBody()->getOperations().splice(
         pointLoop.getBody()->begin(), topLoop->getBlock()->getOperations(),
         topLoop);
@@ -217,7 +218,6 @@ LogicalResult mlir::tileCodeGen(MutableArrayRef<AffineForOp> band,
     FuncBuilder b(topLoop);
     // Loop bounds will be set later.
     auto tileSpaceLoop = b.create<AffineForOp>(loc, 0, 0);
-    tileSpaceLoop.createBody();
     tileSpaceLoop.getBody()->getOperations().splice(
         tileSpaceLoop.getBody()->begin(), topLoop->getBlock()->getOperations(),
         topLoop);
@@ -264,7 +264,7 @@ static void getTileableBands(Function &f,
     AffineForOp currInst = root;
     do {
       band.push_back(currInst);
-    } while (currInst.getBody()->getOperations().size() == 1 &&
+    } while (currInst.getBody()->getOperations().size() == 2 &&
              (currInst = currInst.getBody()->front().dyn_cast<AffineForOp>()));
     bands->push_back(band);
   };
diff --git a/mlir/lib/Transforms/LoopUnrollAndJam.cpp b/mlir/lib/Transforms/LoopUnrollAndJam.cpp
index 3fa4eab93da..3ea20c0c282 100644
--- a/mlir/lib/Transforms/LoopUnrollAndJam.cpp
+++ b/mlir/lib/Transforms/LoopUnrollAndJam.cpp
@@ -125,7 +125,7 @@ LogicalResult mlir::loopUnrollJamByFactor(AffineForOp forOp,
                                           uint64_t unrollJamFactor) {
   // Gathers all maximal sub-blocks of instructions that do not themselves
   // include a for inst (a instruction could have a descendant for inst though
-  // in its tree).
+  // in its tree).  Ignore the block terminators.
   struct JamBlockGatherer {
     // Store iterators to the first and last inst of each sub-block found.
     std::vector<std::pair<Block::iterator, Block::iterator>> subBlocks;
@@ -137,7 +137,7 @@ LogicalResult mlir::loopUnrollJamByFactor(AffineForOp forOp,
           walk(block);
     }
     void walk(Block &block) {
-      for (auto it = block.begin(), e = block.end(); it != e;) {
+      for (auto it = block.begin(), e = std::prev(block.end()); it != e;) {
         auto subBlockStart = it;
         while (it != e && !it->isa<AffineForOp>())
           ++it;
@@ -155,7 +155,8 @@ LogicalResult mlir::loopUnrollJamByFactor(AffineForOp forOp,
   if (unrollJamFactor == 1)
     return promoteIfSingleIteration(forOp);
 
-  if (forOp.getBody()->empty())
+  if (forOp.getBody()->empty() ||
+      forOp.getBody()->begin() == std::prev(forOp.getBody()->end()))
     return failure();
 
   // Loops where both lower and upper bounds are multi-result maps won't be
diff --git a/mlir/lib/Transforms/LowerAffine.cpp b/mlir/lib/Transforms/LowerAffine.cpp
index 5046bf2596b..acc9481e89c 100644
--- a/mlir/lib/Transforms/LowerAffine.cpp
+++ b/mlir/lib/Transforms/LowerAffine.cpp
@@ -335,14 +335,15 @@ bool LowerAffinePass::lowerAffineFor(AffineForOp forOp) {
   conditionBlock->insertBefore(endBlock);
   auto *iv = conditionBlock->addArgument(IndexType::get(forInst->getContext()));
 
-  // Create the body block, moving the body of the forOp over to it.
+  // Create the body block, moving the body of the forOp over to it and dropping
+  // the affine terminator.
   auto *bodyBlock = new Block();
   bodyBlock->insertBefore(endBlock);
 
   auto *oldBody = forOp.getBody();
   bodyBlock->getOperations().splice(bodyBlock->begin(),
                                     oldBody->getOperations(), oldBody->begin(),
-                                    oldBody->end());
+                                    std::prev(oldBody->end()));
 
   // The code in the body of the forOp now uses 'iv' as its indvar.
   forOp.getInductionVar()->replaceAllUsesWith(iv);
@@ -406,7 +407,7 @@ bool LowerAffinePass::lowerAffineFor(AffineForOp forOp) {
 // enabling easy nesting of "if" instructions and if-then-else-if chains.
 //
 //      +--------------------------------+
-//      | <code before the AffineIfOp>       |
+//      | <code before the AffineIfOp>   |
 //      | %zero = constant 0 : index     |
 //      | %v = affine.apply #expr1(%ops) |
 //      | %c = cmpi "sge" %v, %zero      |
@@ -450,7 +451,7 @@ bool LowerAffinePass::lowerAffineFor(AffineForOp forOp) {
 //         v   v
 //      +--------------------------------+
 //      | continue:                      |
-//      |   <code after the AffineIfOp>      |
+//      |   <code after the AffineIfOp>  |
 //      +--------------------------------+
 //
 bool LowerAffinePass::lowerAffineIf(AffineIfOp ifOp) {
@@ -469,7 +470,8 @@ bool LowerAffinePass::lowerAffineIf(AffineIfOp ifOp) {
   Block *thenBlock = new Block();
   thenBlock->insertBefore(continueBlock);
 
-  // If the 'then' block is not empty, then splice the instructions.
+  // If the 'then' block is not empty, then splice the instructions except for
+  // the terminator.
   auto &oldThenBlocks = ifOp.getThenBlocks();
   if (!oldThenBlocks.empty()) {
     // We currently only handle one 'then' block.
@@ -478,9 +480,9 @@ bool LowerAffinePass::lowerAffineIf(AffineIfOp ifOp) {
 
     Block *oldThen = &oldThenBlocks.front();
 
-    thenBlock->getOperations().splice(thenBlock->begin(),
-                                      oldThen->getOperations(),
-                                      oldThen->begin(), oldThen->end());
+    thenBlock->getOperations().splice(
+        thenBlock->begin(), oldThen->getOperations(), oldThen->begin(),
+        std::prev(oldThen->end()));
   }
 
   FuncBuilder builder(thenBlock);
@@ -499,9 +501,9 @@ bool LowerAffinePass::lowerAffineIf(AffineIfOp ifOp) {
     elseBlock = new Block();
     elseBlock->insertBefore(continueBlock);
 
-    elseBlock->getOperations().splice(elseBlock->begin(),
-                                      oldElse->getOperations(),
-                                      oldElse->begin(), oldElse->end());
+    elseBlock->getOperations().splice(
+        elseBlock->begin(), oldElse->getOperations(), oldElse->begin(),
+        std::prev(oldElse->end()));
     builder.setInsertionPointToEnd(elseBlock);
     builder.create<BranchOp>(loc, continueBlock);
   }
diff --git a/mlir/lib/Transforms/Utils/LoopUtils.cpp b/mlir/lib/Transforms/Utils/LoopUtils.cpp
index 9a7db193d29..2760e8b8bd3 100644
--- a/mlir/lib/Transforms/Utils/LoopUtils.cpp
+++ b/mlir/lib/Transforms/Utils/LoopUtils.cpp
@@ -145,8 +145,10 @@ LogicalResult mlir::promoteIfSingleIteration(AffineForOp forOp) {
       }
     }
   }
-  // Move the loop body instructions to the loop's containing block.
+  // Move the loop body instructions, except for terminator, to the loop's
+  // containing block.
   auto *block = forInst->getBlock();
+  forOp.getBody()->getOperations().back().erase();
   block->getOperations().splice(Block::iterator(forInst),
                                 forOp.getBody()->getOperations());
   forOp.erase();
@@ -182,12 +184,12 @@ generateLoop(AffineMap lbMap, AffineMap ubMap,
   auto loopChunk =
       b->create<AffineForOp>(srcForInst.getLoc(), lbOperands, lbMap, ubOperands,
                              ubMap, srcForInst.getStep());
-  loopChunk.createBody();
   auto *loopChunkIV = loopChunk.getInductionVar();
   auto *srcIV = srcForInst.getInductionVar();
 
   BlockAndValueMapping operandMap;
 
+  FuncBuilder bodyBuilder = loopChunk.getBodyBuilder();
   for (auto it = instGroupQueue.begin() + offset, e = instGroupQueue.end();
        it != e; ++it) {
     uint64_t shift = it->first;
@@ -197,10 +199,9 @@ generateLoop(AffineMap lbMap, AffineMap ubMap,
     // Generate the remapping if the shift is not zero: remappedIV = newIV -
     // shift.
     if (!srcIV->use_empty() && shift != 0) {
-      FuncBuilder b(loopChunk.getBody());
-      auto ivRemap = b.create<AffineApplyOp>(
+      auto ivRemap = bodyBuilder.create<AffineApplyOp>(
           srcForInst.getLoc(),
-          b.getSingleDimShiftAffineMap(
+          bodyBuilder.getSingleDimShiftAffineMap(
               -static_cast<int64_t>(srcForInst.getStep() * shift)),
           loopChunkIV);
       operandMap.map(srcIV, ivRemap);
@@ -208,9 +209,10 @@ generateLoop(AffineMap lbMap, AffineMap ubMap,
       operandMap.map(srcIV, loopChunkIV);
     }
     for (auto *inst : insts) {
-      loopChunk.getBody()->push_back(inst->clone(operandMap, b->getContext()));
+      if (!inst->isa<AffineTerminatorOp>())
+        bodyBuilder.clone(*inst, operandMap);
     }
-  }
+  };
   if (succeeded(promoteIfSingleIteration(loopChunk)))
     return AffineForOp();
   return loopChunk;
@@ -233,7 +235,7 @@ generateLoop(AffineMap lbMap, AffineMap ubMap,
 // method.
 LogicalResult mlir::instBodySkew(AffineForOp forOp, ArrayRef<uint64_t> shifts,
                                  bool unrollPrologueEpilogue) {
-  if (forOp.getBody()->empty())
+  if (forOp.getBody()->begin() == std::prev(forOp.getBody()->end()))
     return success();
 
   // If the trip counts aren't constant, we would need versioning and
@@ -385,7 +387,8 @@ LogicalResult mlir::loopUnrollByFactor(AffineForOp forOp,
   if (unrollFactor == 1)
     return promoteIfSingleIteration(forOp);
 
-  if (forOp.getBody()->empty())
+  if (forOp.getBody()->empty() ||
+      forOp.getBody()->begin() == std::prev(forOp.getBody()->end()))
     return failure();
 
   // Loops where the lower bound is a max expression isn't supported for
@@ -428,13 +431,13 @@ LogicalResult mlir::loopUnrollByFactor(AffineForOp forOp,
   int64_t step = forOp.getStep();
   forOp.setStep(step * unrollFactor);
 
-  // Builder to insert unrolled bodies right after the last instruction in the
-  // body of 'forOp'.
-  FuncBuilder builder(forOp.getBody(), forOp.getBody()->end());
+  // Builder to insert unrolled bodies just before the terminator of the body of
+  // 'forOp'.
+  FuncBuilder builder = forOp.getBodyBuilder();
 
-  // Keep a pointer to the last instruction in the original block so that we
-  // know what to clone (since we are doing this in-place).
-  Block::iterator srcBlockEnd = std::prev(forOp.getBody()->end());
+  // Keep a pointer to the last non-terminator instruction in the original block
+  // so that we know what to clone (since we are doing this in-place).
+  Block::iterator srcBlockEnd = std::prev(forOp.getBody()->end(), 2);
 
   // Unroll the contents of 'forOp' (append unrollFactor-1 additional copies).
   auto *forOpIV = forOp.getInductionVar();
@@ -465,23 +468,27 @@ LogicalResult mlir::loopUnrollByFactor(AffineForOp forOp,
 }
 
 /// Performs loop interchange on 'forOpA' and 'forOpB', where 'forOpB' is
-/// nested within 'forOpA' as the only instruction in its block.
+/// nested within 'forOpA' as the only non-terminator operation in its block.
 void mlir::interchangeLoops(AffineForOp forOpA, AffineForOp forOpB) {
   auto *forOpAInst = forOpA.getOperation();
-  // 1) Slice forOpA's instruction list (which is just forOpB) just before
-  // forOpA (in forOpA's parent's block) this should leave 'forOpA's
-  // instruction list empty (because its perfectly nested).
+
   assert(&*forOpA.getBody()->begin() == forOpB.getOperation());
-  forOpAInst->getBlock()->getOperations().splice(
-      Block::iterator(forOpAInst), forOpA.getBody()->getOperations());
-  // 2) Slice forOpB's instruction list into forOpA's instruction list (this
-  // leaves forOpB's instruction list empty).
-  forOpA.getBody()->getOperations().splice(forOpA.getBody()->begin(),
-                                           forOpB.getBody()->getOperations());
-  // 3) Slice forOpA into forOpB's instruction list.
-  forOpB.getBody()->getOperations().splice(
-      forOpB.getBody()->begin(), forOpAInst->getBlock()->getOperations(),
-      Block::iterator(forOpAInst));
+  auto &forOpABody = forOpA.getBody()->getOperations();
+  auto &forOpBBody = forOpB.getBody()->getOperations();
+
+  // 1) Splice forOpA's non-terminator operations (which is just forOpB) just
+  // before forOpA (in ForOpA's parent's block) this should leave 'forOpA's
+  // body containing only the terminator.
+  forOpAInst->getBlock()->getOperations().splice(Block::iterator(forOpAInst),
+                                                 forOpABody, forOpABody.begin(),
+                                                 std::prev(forOpABody.end()));
+  // 2) Splice forOpB's non-terminator operations into the beginning of forOpA's
+  // body (this leaves forOpB's body containing only the terminator).
+  forOpABody.splice(forOpABody.begin(), forOpBBody, forOpBBody.begin(),
+                    std::prev(forOpBBody.end()));
+  // 3) Splice forOpA into the beginning of forOpB's body.
+  forOpBBody.splice(forOpBBody.begin(), forOpAInst->getBlock()->getOperations(),
+                    Block::iterator(forOpAInst));
 }
 
 /// Performs a series of loop interchanges to sink 'forOp' 'loopDepth' levels
@@ -516,25 +523,27 @@ static void augmentMapAndBounds(FuncBuilder *b, Value *iv, AffineMap *map,
 
 // Clone the original body of `forOp` into the body of `newForOp` while
 // substituting `oldIv` in place of
-// `forOp.getInductionVariable()`.
+// `forOp.getInductionVariable()` and ignoring the terminator.
 // Note: `newForOp` may be nested under `forOp`.
 static void cloneLoopBodyInto(AffineForOp forOp, Value *oldIv,
                               AffineForOp newForOp) {
   BlockAndValueMapping map;
   map.map(oldIv, newForOp.getInductionVar());
-  FuncBuilder b(newForOp.getBody(), newForOp.getBody()->end());
-  for (auto it = forOp.getBody()->begin(), end = forOp.getBody()->end();
-       it != end; ++it) {
+  FuncBuilder b = newForOp.getBodyBuilder();
+  for (auto &inst : *forOp.getBody()) {
     // Step over newForOp in case it is nested under forOp.
-    if (&*it == newForOp.getOperation()) {
+    if (&inst == newForOp.getOperation()) {
+      continue;
+    }
+    if (inst.isa<AffineTerminatorOp>()) {
       continue;
     }
-    auto *inst = b.clone(*it, map);
+    auto *instClone = b.clone(inst, map);
     unsigned idx = 0;
-    for (auto r : it->getResults()) {
+    for (auto r : inst.getResults()) {
       // Since we do a forward pass over the body, we iteratively augment
       // the `map` with everything we clone.
-      map.map(r, inst->getResult(idx++));
+      map.map(r, instClone->getResult(idx++));
     }
   }
 }
@@ -574,11 +583,10 @@ stripmineSink(AffineForOp forOp, uint64_t factor,
 
   SmallVector<AffineForOp, 8> innerLoops;
   for (auto t : targets) {
-    // Insert newForOp at the end of `t`.
-    FuncBuilder b(t.getBody(), t.getBody()->end());
+    // Insert newForOp before the terminator of `t`.
+    FuncBuilder b = t.getBodyBuilder();
     auto newForOp = b.create<AffineForOp>(t.getLoc(), lbOperands, lbMap,
                                           ubOperands, ubMap, originalStep);
-    newForOp.createBody();
     cloneLoopBodyInto(t, forOp.getInductionVar(), newForOp);
     // Remove all instructions from `t` except `newForOp`.
     auto rit = ++newForOp.getOperation()->getReverseIterator();
author	Alex Zinenko <zinenko@google.com>	2019-03-27 05:11:58 -0700
committer	jpienaar <jpienaar@google.com>	2019-03-29 17:44:24 -0700
commit	5a5bba0279a5754c8e7aa2a9bf415aee2a0f1774 (patch)
tree	aa7553c11c35ffb030135528adb3bf47c54705cc /mlir/lib/Transforms
parent	af45236c70ed457fd093c88154a520db2d99f021 (diff)
download	bcm5719-llvm-5a5bba0279a5754c8e7aa2a9bf415aee2a0f1774.tar.gz bcm5719-llvm-5a5bba0279a5754c8e7aa2a9bf415aee2a0f1774.zip