Adds the ability to compute the MemRefRegion of a sliced loop nest. Utilizes this feature during loop fusion cost computation, to compute what the write region of a fusion candidate loop nest slice would be (without having to materialize the slice or change the IR).

*) Adds parameter to public API of MemRefRegion::compute for passing in the slice loop bounds to compute the memref region of the loop nest slice. *) Exposes public method MemRefRegion::getRegionSize for computing the size of the memref region in bytes. PiperOrigin-RevId: 232706165
author: MLIR Team <no-reply@google.com> 2019-02-06 11:01:10 -0800
committer: jpienaar <jpienaar@google.com> 2019-03-29 16:17:15 -0700
commit: b9dde91ea6ec449ecd4203fb06fdf726001a0c37 (patch)
tree: e757b8d3cc5874995fc50b492123c2a0fcefd5a3
parent: 31f2b3ffa1a5f5233488e7d1415a7b7e2512a791 (diff)
download: bcm5719-llvm-b9dde91ea6ec449ecd4203fb06fdf726001a0c37.tar.gz
bcm5719-llvm-b9dde91ea6ec449ecd4203fb06fdf726001a0c37.zip
5 files changed, 230 insertions, 90 deletions
diff --git a/mlir/include/mlir/Analysis/Utils.h b/mlir/include/mlir/Analysis/Utils.h
index 6cd688045c5..d6a67f65c7c 100644
--- a/mlir/include/mlir/Analysis/Utils.h
+++ b/mlir/include/mlir/Analysis/Utils.h
@@ -61,6 +61,45 @@ void getLoopIVs(const Instruction &inst,
 /// surrounding this instruction.
 unsigned getNestingDepth(const Instruction &stmt);
 
+/// ComputationSliceState aggregates loop bound AffineMaps and their associated
+/// operands for a set of loops within a loop nest (typically the set of loops
+/// surrounding a store operation). Loop bound AffineMaps which are non-null
+/// represent slices of that loop's iteration space.
+struct ComputationSliceState {
+  // List of lower bound AffineMaps.
+  SmallVector<AffineMap, 4> lbs;
+  // List of upper bound AffineMaps.
+  SmallVector<AffineMap, 4> ubs;
+  // List of lower bound operands (lbOperands[i] are used by 'lbs[i]').
+  std::vector<SmallVector<Value *, 4>> lbOperands;
+  // List of upper bound operands (ubOperands[i] are used by 'ubs[i]').
+  std::vector<SmallVector<Value *, 4>> ubOperands;
+};
+
+/// Computes computation slice loop bounds for the loop nest surrounding
+/// 'srcAccess', where the returned loop bound AffineMaps are functions of
+/// loop IVs from the loop nest surrounding 'dstAccess'.
+/// Returns true on success, false otherwise.
+bool getBackwardComputationSliceState(const MemRefAccess &srcAccess,
+                                      const MemRefAccess &dstAccess,
+                                      unsigned dstLoopDepth,
+                                      ComputationSliceState *sliceState);
+
+/// Creates a clone of the computation contained in the loop nest surrounding
+/// 'srcOpInst', slices the iteration space of src loop based on slice bounds
+/// in 'sliceState', and inserts the computation slice at the beginning of the
+/// instruction block of the loop at 'dstLoopDepth' in the loop nest surrounding
+/// 'dstOpInst'. Returns the top-level loop of the computation slice on
+/// success, returns nullptr otherwise.
+// Loop depth is a crucial optimization choice that determines where to
+// materialize the results of the backward slice - presenting a trade-off b/w
+// storage and redundant computation in several cases.
+// TODO(andydavis) Support computation slices with common surrounding loops.
+OpPointer<AffineForOp>
+insertBackwardComputationSlice(Instruction *srcOpInst, Instruction *dstOpInst,
+                               unsigned dstLoopDepth,
+                               ComputationSliceState *sliceState);
+
 /// A region of a memref's data space; this is typically constructed by
 /// analyzing load/store op's on this memref and the index space of loops
 /// surrounding such op's.
@@ -86,7 +125,17 @@ struct MemRefRegion {
   /// symbolic identifiers which could include any of the loop IVs surrounding
   /// opInst up until 'loopDepth' and another additional Function symbols
   /// involved with the access (for eg., those appear in affine_apply's, loop
-  /// bounds, etc.).
+  /// bounds, etc.). If 'sliceState' is non-null, operands from 'sliceState'
+  /// are added as symbols, and the following constraints are added to the
+  /// system:
+  /// *) Inequality constraints which represent loop bounds for 'sliceState'
+  ///    operands which are loop IVS (these represent the destination loop IVs
+  ///    of the slice, and are added as symbols to MemRefRegion's constraint
+  ///    system).
+  /// *) Inequality constraints for the slice bounds in 'sliceState', which
+  ///    represent the bounds on the loop IVs in this constraint system w.r.t
+  ///    to slice operands (which correspond to symbols).
+  ///
   ///  For example, the memref region for this operation at loopDepth = 1 will
   ///  be:
   ///
@@ -99,7 +148,8 @@ struct MemRefRegion {
   ///   {memref = %A, write = false, {%i <= m0 <= %i + 7} }
   /// The last field is a 2-d FlatAffineConstraints symbolic in %i.
   ///
-  bool compute(Instruction *inst, unsigned loopDepth);
+  bool compute(Instruction *inst, unsigned loopDepth,
+               ComputationSliceState *sliceState = nullptr);
 
   FlatAffineConstraints *getConstraints() { return &cst; }
   const FlatAffineConstraints *getConstraints() const { return &cst; }
@@ -128,6 +178,9 @@ struct MemRefRegion {
     return cst.getConstantBoundOnDimSize(pos, lb);
   }
 
+  /// Returns the size of this MemRefRegion in bytes.
+  Optional<int64_t> getRegionSize();
+
   bool unionBoundingBox(const MemRefRegion &other);
 
   /// Returns the rank of the memref that this region corresponds to.
@@ -169,52 +222,12 @@ bool boundCheckLoadOrStoreOp(LoadOrStoreOpPointer loadOrStoreOp,
 unsigned getNumCommonSurroundingLoops(const Instruction &A,
                                       const Instruction &B);
 
-/// ComputationSliceState aggregates loop bound AffineMaps and their associated
-/// operands for a set of loops within a loop nest (typically the set of loops
-/// surrounding a store operation). Loop bound AffineMaps which are non-null
-/// represent slices of that loop's iteration space.
-struct ComputationSliceState {
-  // List of lower bound AffineMaps.
-  SmallVector<AffineMap, 4> lbs;
-  // List of upper bound AffineMaps.
-  SmallVector<AffineMap, 4> ubs;
-  // List of lower bound operands (lbOperands[i] are used by 'lbs[i]').
-  std::vector<SmallVector<Value *, 4>> lbOperands;
-  // List of upper bound operands (ubOperands[i] are used by 'ubs[i]').
-  std::vector<SmallVector<Value *, 4>> ubOperands;
-};
-
-/// Computes computation slice loop bounds for the loop nest surrounding
-/// 'srcAccess', where the returned loop bound AffineMaps are functions of
-/// loop IVs from the loop nest surrounding 'dstAccess'.
-/// Returns true on success, false otherwise.
-bool getBackwardComputationSliceState(const MemRefAccess &srcAccess,
-                                      const MemRefAccess &dstAccess,
-                                      unsigned dstLoopDepth,
-                                      ComputationSliceState *sliceState);
-
-/// Creates a clone of the computation contained in the loop nest surrounding
-/// 'srcOpInst', slices the iteration space of src loop based on slice bounds
-/// in 'sliceState', and inserts the computation slice at the beginning of the
-/// instruction block of the loop at 'dstLoopDepth' in the loop nest surrounding
-/// 'dstOpInst'. Returns the top-level loop of the computation slice on
-/// success, returns nullptr otherwise.
-// Loop depth is a crucial optimization choice that determines where to
-// materialize the results of the backward slice - presenting a trade-off b/w
-// storage and redundant computation in several cases.
-// TODO(andydavis) Support computation slices with common surrounding loops.
-OpPointer<AffineForOp>
-insertBackwardComputationSlice(Instruction *srcOpInst, Instruction *dstOpInst,
-                               unsigned dstLoopDepth,
-                               ComputationSliceState *sliceState);
-
 /// Gets the memory footprint of all data touched in the specified memory space
 /// in bytes; if the memory space is unspecified, considers all memory spaces.
 Optional<int64_t> getMemoryFootprintBytes(ConstOpPointer<AffineForOp> forOp,
                                           int memorySpace = -1);
 Optional<int64_t> getMemoryFootprintBytes(const Block &block,
                                           int memorySpace = -1);
-
 } // end namespace mlir
 
 #endif // MLIR_ANALYSIS_UTILS_H
diff --git a/mlir/include/mlir/IR/AffineStructures.h b/mlir/include/mlir/IR/AffineStructures.h
index 9c88436dcb0..2acee9a7f39 100644
--- a/mlir/include/mlir/IR/AffineStructures.h
+++ b/mlir/include/mlir/IR/AffineStructures.h
@@ -378,6 +378,17 @@ public:
                       SmallVectorImpl<AffineMap> *lbMaps,
                       SmallVectorImpl<AffineMap> *ubMaps);
 
+  /// Adds slice lower bounds represented by lower bounds in 'lbMaps' and upper
+  /// bounds in 'ubMaps' to the constraint system. Note that both lower/upper
+  /// bounds share the same operand list 'operands'.
+  /// This function assumes that position 'lbMaps.size' == 'ubMaps.size',
+  /// and that positions [0, lbMaps.size) represent dimensional identifiers
+  /// which correspond to the loop IVs whose iteration bounds are being sliced.
+  /// Note that both lower/upper bounds use operands from 'operands'.
+  /// Returns true on success, returns false for unimplemented cases.
+  bool addSliceBounds(ArrayRef<AffineMap> lbMaps, ArrayRef<AffineMap> ubMaps,
+                      ArrayRef<Value *> operands);
+
   // Adds an inequality (>= 0) from the coefficients specified in inEq.
   void addInequality(ArrayRef<int64_t> inEq);
   // Adds an equality from the coefficients specified in eq.
diff --git a/mlir/lib/Analysis/Utils.cpp b/mlir/lib/Analysis/Utils.cpp
index 2e753f8d10a..bdc5d19d0be 100644
--- a/mlir/lib/Analysis/Utils.cpp
+++ b/mlir/lib/Analysis/Utils.cpp
@@ -122,7 +122,8 @@ bool MemRefRegion::unionBoundingBox(const MemRefRegion &other) {
 //
 // TODO(bondhugula): extend this to any other memref dereferencing ops
 // (dma_start, dma_wait).
-bool MemRefRegion::compute(Instruction *inst, unsigned loopDepth) {
+bool MemRefRegion::compute(Instruction *inst, unsigned loopDepth,
+                           ComputationSliceState *sliceState) {
   assert((inst->isa<LoadOp>() || inst->isa<StoreOp>()) &&
          "load/store op expected");
 
@@ -147,18 +148,33 @@ bool MemRefRegion::compute(Instruction *inst, unsigned loopDepth) {
   access.getAccessMap(&accessValueMap);
   AffineMap accessMap = accessValueMap.getAffineMap();
 
+  unsigned numDims = accessMap.getNumDims();
+  unsigned numSymbols = accessMap.getNumSymbols();
+  unsigned numOperands = accessValueMap.getNumOperands();
+  // Merge operands with slice operands.
+  SmallVector<Value *, 4> operands;
+  operands.resize(numOperands);
+  for (unsigned i = 0; i < numOperands; ++i)
+    operands[i] = accessValueMap.getOperand(i);
+
+  if (sliceState != nullptr) {
+    // Append slice operands to 'operands' as symbols.
+    operands.append(sliceState->lbOperands[0].begin(),
+                    sliceState->lbOperands[0].end());
+    // Update 'numSymbols' by operands from 'sliceState'.
+    numSymbols += sliceState->lbOperands[0].size();
+  }
+
   // We'll first associate the dims and symbols of the access map to the dims
   // and symbols resp. of cst. This will change below once cst is
   // fully constructed out.
-  cst.reset(accessMap.getNumDims(), accessMap.getNumSymbols(), 0,
-            accessValueMap.getOperands());
+  cst.reset(numDims, numSymbols, 0, operands);
 
   // Add equality constraints.
-  unsigned numDims = accessMap.getNumDims();
-  unsigned numSymbols = accessMap.getNumSymbols();
   // Add inequalties for loop lower/upper bounds.
   for (unsigned i = 0; i < numDims + numSymbols; ++i) {
-    if (auto loop = getForInductionVarOwner(accessValueMap.getOperand(i))) {
+    auto *operand = operands[i];
+    if (auto loop = getForInductionVarOwner(operand)) {
       // Note that cst can now have more dimensions than accessMap if the
       // bounds expressions involve outer loops or other symbols.
       // TODO(bondhugula): rewrite this to use getInstIndexSet; this way
@@ -167,7 +183,7 @@ bool MemRefRegion::compute(Instruction *inst, unsigned loopDepth) {
         return false;
     } else {
       // Has to be a valid symbol.
-      auto *symbol = accessValueMap.getOperand(i);
+      auto *symbol = operand;
       assert(isValidSymbol(symbol));
       // Check if the symbol is a constant.
       if (auto *inst = symbol->getDefiningInst()) {
@@ -178,6 +194,33 @@ bool MemRefRegion::compute(Instruction *inst, unsigned loopDepth) {
     }
   }
 
+  // Add lower/upper bounds on loop IVs using bounds from 'sliceState'.
+  if (sliceState != nullptr) {
+    // Add dim and symbol slice operands.
+    for (const auto &operand : sliceState->lbOperands[0]) {
+      unsigned loc;
+      if (!cst.findId(*operand, &loc)) {
+        if (isValidSymbol(operand)) {
+          cst.addSymbolId(cst.getNumSymbolIds(), const_cast<Value *>(operand));
+          loc = cst.getNumDimIds() + cst.getNumSymbolIds() - 1;
+          // Check if the symbol is a constant.
+          if (auto *opInst = operand->getDefiningInst()) {
+            if (auto constOp = opInst->dyn_cast<ConstantIndexOp>()) {
+              cst.setIdToConstant(*operand, constOp->getValue());
+            }
+          }
+        } else {
+          cst.addDimId(cst.getNumDimIds(), const_cast<Value *>(operand));
+          loc = cst.getNumDimIds() - 1;
+        }
+      }
+    }
+    // Add upper/lower bounds from 'sliceState' to 'cst'.
+    if (!cst.addSliceBounds(sliceState->lbs, sliceState->ubs,
+                            sliceState->lbOperands[0]))
+      return false;
+  }
+
   // Add access function equalities to connect loop IVs to data dimensions.
   if (!cst.composeMap(&accessValueMap)) {
     LLVM_DEBUG(llvm::dbgs() << "getMemRefRegion: compose affine map failed\n");
@@ -233,6 +276,32 @@ static unsigned getMemRefEltSizeInBytes(MemRefType memRefType) {
   return llvm::divideCeil(sizeInBits, 8);
 }
 
+// Returns the size of the region.
+Optional<int64_t> MemRefRegion::getRegionSize() {
+  auto memRefType = memref->getType().cast<MemRefType>();
+
+  auto layoutMaps = memRefType.getAffineMaps();
+  if (layoutMaps.size() > 1 ||
+      (layoutMaps.size() == 1 && !layoutMaps[0].isIdentity())) {
+    LLVM_DEBUG(llvm::dbgs() << "Non-identity layout map not yet supported\n");
+    return false;
+  }
+
+  // Indices to use for the DmaStart op.
+  // Indices for the original memref being DMAed from/to.
+  SmallVector<Value *, 4> memIndices;
+  // Indices for the faster buffer being DMAed into/from.
+  SmallVector<Value *, 4> bufIndices;
+
+  // Compute the extents of the buffer.
+  Optional<int64_t> numElements = getConstantBoundingSizeAndShape();
+  if (!numElements.hasValue()) {
+    LLVM_DEBUG(llvm::dbgs() << "Dynamic shapes not yet supported\n");
+    return None;
+  }
+  return getMemRefEltSizeInBytes(memRefType) * numElements.getValue();
+}
+
 /// Returns the size of memref data in bytes if it's statically shaped, None
 /// otherwise.  If the element of the memref has vector type, takes into account
 /// size of the vector as well.
@@ -420,8 +489,6 @@ bool mlir::getBackwardComputationSliceState(const MemRefAccess &srcAccess,
 // entire destination index set. Subtract out the dependent destination
 // iterations from destination index set and check for emptiness --- this is one
 // solution.
-// TODO(andydavis) Remove dependence on 'srcLoopDepth' here. Instead project
-// out loop IVs we don't care about and produce smaller slice.
 OpPointer<AffineForOp> mlir::insertBackwardComputationSlice(
     Instruction *srcOpInst, Instruction *dstOpInst, unsigned dstLoopDepth,
     ComputationSliceState *sliceState) {
@@ -537,33 +604,6 @@ unsigned mlir::getNumCommonSurroundingLoops(const Instruction &A,
   return numCommonLoops;
 }
 
-// Returns the size of the region.
-static Optional<int64_t> getRegionSize(const MemRefRegion &region) {
-  auto *memref = region.memref;
-  auto memRefType = memref->getType().cast<MemRefType>();
-
-  auto layoutMaps = memRefType.getAffineMaps();
-  if (layoutMaps.size() > 1 ||
-      (layoutMaps.size() == 1 && !layoutMaps[0].isIdentity())) {
-    LLVM_DEBUG(llvm::dbgs() << "Non-identity layout map not yet supported\n");
-    return false;
-  }
-
-  // Indices to use for the DmaStart op.
-  // Indices for the original memref being DMAed from/to.
-  SmallVector<Value *, 4> memIndices;
-  // Indices for the faster buffer being DMAed into/from.
-  SmallVector<Value *, 4> bufIndices;
-
-  // Compute the extents of the buffer.
-  Optional<int64_t> numElements = region.getConstantBoundingSizeAndShape();
-  if (!numElements.hasValue()) {
-    LLVM_DEBUG(llvm::dbgs() << "Dynamic shapes not yet supported\n");
-    return None;
-  }
-  return getMemRefEltSizeInBytes(memRefType) * numElements.getValue();
-}
-
 Optional<int64_t>
 mlir::getMemoryFootprintBytes(ConstOpPointer<AffineForOp> forOp,
                               int memorySpace) {
@@ -601,7 +641,7 @@ Optional<int64_t> mlir::getMemoryFootprintBytes(const Block &block,
 
   int64_t totalSizeInBytes = 0;
   for (const auto &region : regions) {
-    auto size = getRegionSize(*region);
+    auto size = region->getRegionSize();
     if (!size.hasValue())
       return None;
     totalSizeInBytes += size.getValue();
diff --git a/mlir/lib/IR/AffineStructures.cpp b/mlir/lib/IR/AffineStructures.cpp
index 1306199dc79..d92b1b60182 100644
--- a/mlir/lib/IR/AffineStructures.cpp
+++ b/mlir/lib/IR/AffineStructures.cpp
@@ -1129,6 +1129,66 @@ void FlatAffineConstraints::getSliceBounds(unsigned num, MLIRContext *context,
   }
 }
 
+// Adds slice lower/upper bounds from 'lbMaps'/'upMaps' to the constraint
+// system. This function assumes that position 'lbMaps.size' == 'ubMaps.size',
+// and that positions [0, lbMaps.size) represent dimensional identifiers which
+// correspond to the loop IVs whose iteration bounds are being sliced.
+// Note that both lower/upper bounds use operands from 'operands'.
+// Returns true on success. Returns false for unimplemented cases such as
+// semi-affine expressions or expressions with mod/floordiv.
+bool FlatAffineConstraints::addSliceBounds(ArrayRef<AffineMap> lbMaps,
+                                           ArrayRef<AffineMap> ubMaps,
+                                           ArrayRef<Value *> operands) {
+  assert(lbMaps.size() == ubMaps.size());
+  // Record positions of the operands in the constraint system.
+  SmallVector<unsigned, 8> positions;
+  for (const auto &operand : operands) {
+    unsigned loc;
+    if (!findId(*operand, &loc))
+      assert(0 && "expected to be found");
+    positions.push_back(loc);
+  }
+
+  auto addLowerOrUpperBound = [&](unsigned pos, AffineMap boundMap,
+                                  bool lower) -> bool {
+    FlatAffineConstraints localVarCst;
+    std::vector<SmallVector<int64_t, 8>> flatExprs;
+    if (!getFlattenedAffineExprs(boundMap, &flatExprs, &localVarCst)) {
+      LLVM_DEBUG(llvm::dbgs() << "semi-affine expressions not yet supported\n");
+      return false;
+    }
+    if (localVarCst.getNumLocalIds() > 0) {
+      LLVM_DEBUG(llvm::dbgs()
+                 << "loop bounds with mod/floordiv expr's not yet supported\n");
+      return false;
+    }
+
+    for (const auto &flatExpr : flatExprs) {
+      SmallVector<int64_t, 4> ineq(getNumCols(), 0);
+      ineq[pos] = lower ? 1 : -1;
+      for (unsigned j = 0, e = boundMap.getNumInputs(); j < e; j++) {
+        ineq[positions[j]] = lower ? -flatExpr[j] : flatExpr[j];
+      }
+      // Constant term.
+      ineq[getNumCols() - 1] =
+          lower ? -flatExpr[flatExpr.size() - 1]
+                // Upper bound in flattenedExpr is an exclusive one.
+                : flatExpr[flatExpr.size() - 1] - 1;
+      addInequality(ineq);
+    }
+    return true;
+  };
+
+  for (unsigned i = 0, e = lbMaps.size(); i < e; ++i) {
+    if (!addLowerOrUpperBound(i, lbMaps[i], /*lower=*/true))
+      return false;
+    if (!addLowerOrUpperBound(i, ubMaps[i], /*lower=*/false))
+      return false;
+  }
+
+  return true;
+}
+
 void FlatAffineConstraints::addEquality(ArrayRef<int64_t> eq) {
   assert(eq.size() == getNumCols());
   unsigned offset = equalities.size();
diff --git a/mlir/lib/Transforms/LoopFusion.cpp b/mlir/lib/Transforms/LoopFusion.cpp
index 3e0bf046ddf..8d5f51059bf 100644
--- a/mlir/lib/Transforms/LoopFusion.cpp
+++ b/mlir/lib/Transforms/LoopFusion.cpp
@@ -1118,12 +1118,23 @@ static bool isFusionProfitable(Instruction *srcOpInst,
                      /*tripCountOverrideMap=*/nullptr,
                      /*computeCostMap=*/nullptr);
 
+  // Compute src loop nest write region size.
+  MemRefRegion srcWriteRegion(srcOpInst->getLoc());
+  srcWriteRegion.compute(srcOpInst, /*loopDepth=*/0);
+  Optional<int64_t> maybeSrcWriteRegionSizeBytes =
+      srcWriteRegion.getRegionSize();
+  if (!maybeSrcWriteRegionSizeBytes.hasValue())
+    return false;
+  int64_t srcWriteRegionSizeBytes = maybeSrcWriteRegionSizeBytes.getValue();
+
   // Compute op instance count for the src loop nest.
   uint64_t dstLoopNestCost =
       getComputeCost(dstLoopIVs[0]->getInstruction(), &dstLoopNestStats,
                      /*tripCountOverrideMap=*/nullptr,
                      /*computeCostMap=*/nullptr);
 
+  // Evaluate all depth choices for materializing the slice in the destination
+  // loop nest.
   llvm::SmallDenseMap<Instruction *, uint64_t, 8> sliceTripCountMap;
   DenseMap<Instruction *, int64_t> computeCostMap;
   for (unsigned i = maxDstLoopDepth; i >= 1; --i) {
@@ -1187,11 +1198,21 @@ static bool isFusionProfitable(Instruction *srcOpInst,
             (static_cast<double>(srcLoopNestCost) + dstLoopNestCost) -
         1;
 
-    // TODO(bondhugula): This is an ugly approximation. Fix this by finding a
-    // good way to calculate the footprint of the memref in the slice and
-    // divide it by the total memory footprint of the fused computation.
-    double storageReduction =
-        static_cast<double>(srcLoopNestCost) / sliceIterationCount;
+    // Compute what the slice write MemRefRegion would be, if the src loop
+    // nest slice 'sliceStates[i - 1]' were to be inserted into the dst loop
+    // nest at loop depth 'i'
+    MemRefRegion sliceWriteRegion(srcOpInst->getLoc());
+    sliceWriteRegion.compute(srcOpInst, /*loopDepth=*/0, &sliceStates[i - 1]);
+    Optional<int64_t> maybeSliceWriteRegionSizeBytes =
+        sliceWriteRegion.getRegionSize();
+    if (!maybeSliceWriteRegionSizeBytes.hasValue() ||
+        maybeSliceWriteRegionSizeBytes.getValue() == 0)
+      continue;
+    int64_t sliceWriteRegionSizeBytes =
+        maybeSliceWriteRegionSizeBytes.getValue();
+
+    double storageReduction = static_cast<double>(srcWriteRegionSizeBytes) /
+                              static_cast<double>(sliceWriteRegionSizeBytes);
 
     LLVM_DEBUG({
       std::stringstream msg;
@@ -1219,12 +1240,7 @@ static bool isFusionProfitable(Instruction *srcOpInst,
       maxStorageReduction = storageReduction;
       bestDstLoopDepth = i;
       minFusedLoopNestComputeCost = fusedLoopNestComputeCost;
-      // TODO(bondhugula,andydavis): find a good way to compute the memory
-      // footprint of the materialized slice.
-      // Approximating this to the compute cost of the slice. This could be an
-      // under-approximation or an overapproximation, but in many cases
-      // accurate.
-      sliceMemEstimate = sliceIterationCount;
+      sliceMemEstimate = sliceWriteRegionSizeBytes;
     }
   }
author	MLIR Team <no-reply@google.com>	2019-02-06 11:01:10 -0800
committer	jpienaar <jpienaar@google.com>	2019-03-29 16:17:15 -0700
commit	b9dde91ea6ec449ecd4203fb06fdf726001a0c37 (patch)
tree	e757b8d3cc5874995fc50b492123c2a0fcefd5a3
parent	31f2b3ffa1a5f5233488e7d1415a7b7e2512a791 (diff)
download	bcm5719-llvm-b9dde91ea6ec449ecd4203fb06fdf726001a0c37.tar.gz bcm5719-llvm-b9dde91ea6ec449ecd4203fb06fdf726001a0c37.zip