Create private exclusive / single use affine computation slice for an op stmt.

- add util to create a private / exclusive / single use affine computation slice for an op stmt (see method doc comment); a single multi-result affine_apply op is prepended to the op stmt to provide all results needed for its operands as a function of loop iterators and symbols. - use it for DMA pipelining (to create private slices for DMA start stmt's); resolve TODOs/feature request (b/117159533) - move createComposedAffineApplyOp to Transforms/Utils; free it from taking a memref as input / generalize it. PiperOrigin-RevId: 216926818
author: Uday Bondhugula <bondhugula@google.com> 2018-10-12 14:54:54 -0700
committer: jpienaar <jpienaar@google.com> 2019-03-29 13:29:21 -0700
commit: 86eac4618c06a54c1f6d95a8c9d94b15dda5e35b (patch)
tree: a6574eb104b867e24814319c44b26d953527f4c2 /mlir/lib/Transforms/PipelineDataTransfer.cpp
parent: 9e3b928e32285bf47d366d5685d2c65e616544cb (diff)
download: bcm5719-llvm-86eac4618c06a54c1f6d95a8c9d94b15dda5e35b.tar.gz
bcm5719-llvm-86eac4618c06a54c1f6d95a8c9d94b15dda5e35b.zip
1 files changed, 31 insertions, 65 deletions
diff --git a/mlir/lib/Transforms/PipelineDataTransfer.cpp b/mlir/lib/Transforms/PipelineDataTransfer.cpp
index dd8b9a7615c..bb60d8e9d78 100644
--- a/mlir/lib/Transforms/PipelineDataTransfer.cpp
+++ b/mlir/lib/Transforms/PipelineDataTransfer.cpp
@@ -21,7 +21,7 @@
 
 #include "mlir/Transforms/Passes.h"
 
-#include "mlir/IR/AffineExpr.h"
+#include "mlir/Analysis/AffineAnalysis.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/StandardOps/StandardOps.h"
@@ -94,8 +94,7 @@ static bool doubleBuffer(MLValue *oldMemRef, ForStmt *forStmt) {
   auto *newMemRefType = doubleShape(cast<MemRefType>(oldMemRef->getType()));
 
   // Create and place the alloc at the top level.
-  auto *func = forStmt->getFunction();
-  MLFuncBuilder topBuilder(func, func->begin());
+  MLFuncBuilder topBuilder(forStmt->getFunction());
   auto *newMemRef = cast<MLValue>(
       topBuilder.create<AllocOp>(forStmt->getLoc(), newMemRefType)
           ->getResult());
@@ -105,13 +104,9 @@ static bool doubleBuffer(MLValue *oldMemRef, ForStmt *forStmt) {
       bInner.getAffineMap(/*dimCount=*/1, /*symbolCount=*/0, {d0 % 2}, {});
   auto ivModTwoOp =
       bInner.create<AffineApplyOp>(forStmt->getLoc(), modTwoMap, forStmt);
-  if (!replaceAllMemRefUsesWith(oldMemRef, newMemRef, ivModTwoOp->getResult(0)))
+  if (!replaceAllMemRefUsesWith(oldMemRef, newMemRef,
+                                cast<MLValue>(ivModTwoOp->getResult(0))))
     return false;
-  // We don't need ivMod2Op any more - this is cloned by
-  // replaceAllMemRefUsesWith wherever the memref replacement happens. Once
-  // b/117159533 is addressed, we'll eventually only need to pass
-  // ivModTwoOp->getResult(0) to replaceAllMemRefUsesWith.
-  cast<OperationStmt>(ivModTwoOp->getOperation())->eraseFromBlock();
   return true;
 }
 
@@ -169,16 +164,18 @@ PassResult PipelineDataTransfer::runOnMLFunction(MLFunction *f) {
   for (auto *dmaStartStmt : dmaStartStmts) {
     MLValue *oldMemRef = cast<MLValue>(dmaStartStmt->getOperand(
         getHigherMemRefPos(dmaStartStmt->getAs<DmaStartOp>())));
-    if (!doubleBuffer(oldMemRef, forStmt))
+    if (!doubleBuffer(oldMemRef, forStmt)) {
       return PassResult::Failure;
+    }
   }
 
   // Double the buffers for tag memref's.
   for (auto *dmaFinishStmt : dmaFinishStmts) {
     MLValue *oldTagMemRef = cast<MLValue>(
         dmaFinishStmt->getOperand(getTagMemRefPos(*dmaFinishStmt)));
-    if (!doubleBuffer(oldTagMemRef, forStmt))
+    if (!doubleBuffer(oldTagMemRef, forStmt)) {
       return PassResult::Failure;
+    }
   }
 
   // Collect all compute ops.
@@ -186,75 +183,43 @@ PassResult PipelineDataTransfer::runOnMLFunction(MLFunction *f) {
   computeOps.reserve(forStmt->getStatements().size());
   // Store delay for statement for later lookup for AffineApplyOp's.
   DenseMap<const Statement *, unsigned> opDelayMap;
-  for (const auto &stmt : *forStmt) {
+  for (auto &stmt : *forStmt) {
     auto *opStmt = dyn_cast<OperationStmt>(&stmt);
     if (!opStmt) {
       // All for and if stmt's are treated as pure compute operations.
-      // TODO(bondhugula): check whether such statements do not have any DMAs
-      // nested within.
       opDelayMap[&stmt] = 1;
     } else if (opStmt->is<DmaStartOp>()) {
       // DMA starts are not shifted.
-      opDelayMap[&stmt] = 0;
+      opDelayMap[opStmt] = 0;
+      // Set shifts for DMA start stmt's affine operand computation slices to 0.
+      if (auto *slice = mlir::createAffineComputationSlice(opStmt)) {
+        opDelayMap[slice] = 0;
+      } else {
+        // If a slice wasn't created, the reachable affine_apply op's from its
+        // operands are the ones that go with it.
+        SmallVector<OperationStmt *, 4> affineApplyStmts;
+        SmallVector<MLValue *, 4> operands(opStmt->getOperands());
+        getReachableAffineApplyOps(operands, affineApplyStmts);
+        for (auto *op : affineApplyStmts) {
+          opDelayMap[op] = 0;
+        }
+      }
     } else if (opStmt->is<DmaWaitOp>()) {
       // DMA finish op shifted by one.
-      opDelayMap[&stmt] = 1;
-    } else if (!opStmt->is<AffineApplyOp>()) {
-      // Compute op shifted by one.
-      opDelayMap[&stmt] = 1;
+      opDelayMap[opStmt] = 1;
+    } else {
+      // Everything else is a compute op; so shifted by one (op's supplying
+      // 'affine' operands to DMA start's have already been set right shifts.
+      opDelayMap[opStmt] = 1;
       computeOps.push_back(&stmt);
     }
-    // Shifts for affine apply op's determined later.
-  }
-
-  // Get the ancestor of a 'stmt' that lies in forStmt's block.
-  auto getAncestorInForBlock =
-      [&](const Statement *stmt, const StmtBlock &block) -> const Statement * {
-    // Traverse up the statement hierarchy starting from the owner of operand to
-    // find the ancestor statement that resides in the block of 'forStmt'.
-    while (stmt != nullptr && stmt->getBlock() != &block) {
-      stmt = stmt->getParentStmt();
-    }
-    return stmt;
-  };
-
-  // Determine delays for affine apply op's: look up delay from its consumer op.
-  // This code will be thrown away once we have a way to obtain indices through
-  // a composed affine_apply op. See TODO(b/117159533). Such a composed
-  // affine_apply will be used exclusively by a given memref deferencing op.
-  for (const auto &stmt : *forStmt) {
-    auto *opStmt = dyn_cast<OperationStmt>(&stmt);
-    // Skip statements that aren't affine apply ops.
-    if (!opStmt || !opStmt->is<AffineApplyOp>())
-      continue;
-    // Traverse uses of each result of the affine apply op.
-    for (auto *res : opStmt->getResults()) {
-      for (auto &use : res->getUses()) {
-        auto *ancestorInForBlock =
-            getAncestorInForBlock(use.getOwner(), *forStmt);
-        assert(ancestorInForBlock &&
-               "traversing parent should reach forStmt block");
-        auto *opCheck = dyn_cast<OperationStmt>(ancestorInForBlock);
-        if (!opCheck || opCheck->is<AffineApplyOp>())
-          continue;
-        assert(opDelayMap.find(ancestorInForBlock) != opDelayMap.end());
-        if (opDelayMap.find(&stmt) != opDelayMap.end()) {
-          // This is where we enforce all uses of this affine_apply to have
-          // the same shifts - so that we know what shift to use for the
-          // affine_apply to preserve semantics.
-          assert(opDelayMap[&stmt] == opDelayMap[ancestorInForBlock]);
-        } else {
-          // Obtain delay from its consumer.
-          opDelayMap[&stmt] = opDelayMap[ancestorInForBlock];
-        }
-      }
-    }
   }
 
   // Get delays stored in map.
   std::vector<uint64_t> delays(forStmt->getStatements().size());
   unsigned s = 0;
   for (const auto &stmt : *forStmt) {
+    assert(opDelayMap.find(&stmt) != opDelayMap.end());
     delays[s++] = opDelayMap[&stmt];
   }
 
@@ -263,8 +228,9 @@ PassResult PipelineDataTransfer::runOnMLFunction(MLFunction *f) {
     return PassResult::Failure;
   }
 
-  if (stmtBodySkew(forStmt, delays))
+  if (stmtBodySkew(forStmt, delays)) {
     return PassResult::Failure;
+  }
 
   return PassResult::Success;
 }
author	Uday Bondhugula <bondhugula@google.com>	2018-10-12 14:54:54 -0700
committer	jpienaar <jpienaar@google.com>	2019-03-29 13:29:21 -0700
commit	86eac4618c06a54c1f6d95a8c9d94b15dda5e35b (patch)
tree	a6574eb104b867e24814319c44b26d953527f4c2 /mlir/lib/Transforms/PipelineDataTransfer.cpp
parent	9e3b928e32285bf47d366d5685d2c65e616544cb (diff)
download	bcm5719-llvm-86eac4618c06a54c1f6d95a8c9d94b15dda5e35b.tar.gz bcm5719-llvm-86eac4618c06a54c1f6d95a8c9d94b15dda5e35b.zip