summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMLIR Team <no-reply@google.com>2019-02-06 11:01:10 -0800
committerjpienaar <jpienaar@google.com>2019-03-29 16:17:15 -0700
commitb9dde91ea6ec449ecd4203fb06fdf726001a0c37 (patch)
treee757b8d3cc5874995fc50b492123c2a0fcefd5a3
parent31f2b3ffa1a5f5233488e7d1415a7b7e2512a791 (diff)
downloadbcm5719-llvm-b9dde91ea6ec449ecd4203fb06fdf726001a0c37.tar.gz
bcm5719-llvm-b9dde91ea6ec449ecd4203fb06fdf726001a0c37.zip
Adds the ability to compute the MemRefRegion of a sliced loop nest. Utilizes this feature during loop fusion cost computation, to compute what the write region of a fusion candidate loop nest slice would be (without having to materialize the slice or change the IR).
*) Adds parameter to public API of MemRefRegion::compute for passing in the slice loop bounds to compute the memref region of the loop nest slice. *) Exposes public method MemRefRegion::getRegionSize for computing the size of the memref region in bytes. PiperOrigin-RevId: 232706165
-rw-r--r--mlir/include/mlir/Analysis/Utils.h97
-rw-r--r--mlir/include/mlir/IR/AffineStructures.h11
-rw-r--r--mlir/lib/Analysis/Utils.cpp114
-rw-r--r--mlir/lib/IR/AffineStructures.cpp60
-rw-r--r--mlir/lib/Transforms/LoopFusion.cpp38
5 files changed, 230 insertions, 90 deletions
diff --git a/mlir/include/mlir/Analysis/Utils.h b/mlir/include/mlir/Analysis/Utils.h
index 6cd688045c5..d6a67f65c7c 100644
--- a/mlir/include/mlir/Analysis/Utils.h
+++ b/mlir/include/mlir/Analysis/Utils.h
@@ -61,6 +61,45 @@ void getLoopIVs(const Instruction &inst,
/// surrounding this instruction.
unsigned getNestingDepth(const Instruction &stmt);
+/// ComputationSliceState aggregates loop bound AffineMaps and their associated
+/// operands for a set of loops within a loop nest (typically the set of loops
+/// surrounding a store operation). Loop bound AffineMaps which are non-null
+/// represent slices of that loop's iteration space.
+struct ComputationSliceState {
+ // List of lower bound AffineMaps.
+ SmallVector<AffineMap, 4> lbs;
+ // List of upper bound AffineMaps.
+ SmallVector<AffineMap, 4> ubs;
+ // List of lower bound operands (lbOperands[i] are used by 'lbs[i]').
+ std::vector<SmallVector<Value *, 4>> lbOperands;
+ // List of upper bound operands (ubOperands[i] are used by 'ubs[i]').
+ std::vector<SmallVector<Value *, 4>> ubOperands;
+};
+
+/// Computes computation slice loop bounds for the loop nest surrounding
+/// 'srcAccess', where the returned loop bound AffineMaps are functions of
+/// loop IVs from the loop nest surrounding 'dstAccess'.
+/// Returns true on success, false otherwise.
+bool getBackwardComputationSliceState(const MemRefAccess &srcAccess,
+ const MemRefAccess &dstAccess,
+ unsigned dstLoopDepth,
+ ComputationSliceState *sliceState);
+
+/// Creates a clone of the computation contained in the loop nest surrounding
+/// 'srcOpInst', slices the iteration space of src loop based on slice bounds
+/// in 'sliceState', and inserts the computation slice at the beginning of the
+/// instruction block of the loop at 'dstLoopDepth' in the loop nest surrounding
+/// 'dstOpInst'. Returns the top-level loop of the computation slice on
+/// success, returns nullptr otherwise.
+// Loop depth is a crucial optimization choice that determines where to
+// materialize the results of the backward slice - presenting a trade-off b/w
+// storage and redundant computation in several cases.
+// TODO(andydavis) Support computation slices with common surrounding loops.
+OpPointer<AffineForOp>
+insertBackwardComputationSlice(Instruction *srcOpInst, Instruction *dstOpInst,
+ unsigned dstLoopDepth,
+ ComputationSliceState *sliceState);
+
/// A region of a memref's data space; this is typically constructed by
/// analyzing load/store op's on this memref and the index space of loops
/// surrounding such op's.
@@ -86,7 +125,17 @@ struct MemRefRegion {
/// symbolic identifiers which could include any of the loop IVs surrounding
/// opInst up until 'loopDepth' and another additional Function symbols
/// involved with the access (for eg., those appear in affine_apply's, loop
- /// bounds, etc.).
+ /// bounds, etc.). If 'sliceState' is non-null, operands from 'sliceState'
+ /// are added as symbols, and the following constraints are added to the
+ /// system:
+ /// *) Inequality constraints which represent loop bounds for 'sliceState'
+ /// operands which are loop IVS (these represent the destination loop IVs
+ /// of the slice, and are added as symbols to MemRefRegion's constraint
+ /// system).
+ /// *) Inequality constraints for the slice bounds in 'sliceState', which
+ /// represent the bounds on the loop IVs in this constraint system w.r.t
+ /// to slice operands (which correspond to symbols).
+ ///
/// For example, the memref region for this operation at loopDepth = 1 will
/// be:
///
@@ -99,7 +148,8 @@ struct MemRefRegion {
/// {memref = %A, write = false, {%i <= m0 <= %i + 7} }
/// The last field is a 2-d FlatAffineConstraints symbolic in %i.
///
- bool compute(Instruction *inst, unsigned loopDepth);
+ bool compute(Instruction *inst, unsigned loopDepth,
+ ComputationSliceState *sliceState = nullptr);
FlatAffineConstraints *getConstraints() { return &cst; }
const FlatAffineConstraints *getConstraints() const { return &cst; }
@@ -128,6 +178,9 @@ struct MemRefRegion {
return cst.getConstantBoundOnDimSize(pos, lb);
}
+ /// Returns the size of this MemRefRegion in bytes.
+ Optional<int64_t> getRegionSize();
+
bool unionBoundingBox(const MemRefRegion &other);
/// Returns the rank of the memref that this region corresponds to.
@@ -169,52 +222,12 @@ bool boundCheckLoadOrStoreOp(LoadOrStoreOpPointer loadOrStoreOp,
unsigned getNumCommonSurroundingLoops(const Instruction &A,
const Instruction &B);
-/// ComputationSliceState aggregates loop bound AffineMaps and their associated
-/// operands for a set of loops within a loop nest (typically the set of loops
-/// surrounding a store operation). Loop bound AffineMaps which are non-null
-/// represent slices of that loop's iteration space.
-struct ComputationSliceState {
- // List of lower bound AffineMaps.
- SmallVector<AffineMap, 4> lbs;
- // List of upper bound AffineMaps.
- SmallVector<AffineMap, 4> ubs;
- // List of lower bound operands (lbOperands[i] are used by 'lbs[i]').
- std::vector<SmallVector<Value *, 4>> lbOperands;
- // List of upper bound operands (ubOperands[i] are used by 'ubs[i]').
- std::vector<SmallVector<Value *, 4>> ubOperands;
-};
-
-/// Computes computation slice loop bounds for the loop nest surrounding
-/// 'srcAccess', where the returned loop bound AffineMaps are functions of
-/// loop IVs from the loop nest surrounding 'dstAccess'.
-/// Returns true on success, false otherwise.
-bool getBackwardComputationSliceState(const MemRefAccess &srcAccess,
- const MemRefAccess &dstAccess,
- unsigned dstLoopDepth,
- ComputationSliceState *sliceState);
-
-/// Creates a clone of the computation contained in the loop nest surrounding
-/// 'srcOpInst', slices the iteration space of src loop based on slice bounds
-/// in 'sliceState', and inserts the computation slice at the beginning of the
-/// instruction block of the loop at 'dstLoopDepth' in the loop nest surrounding
-/// 'dstOpInst'. Returns the top-level loop of the computation slice on
-/// success, returns nullptr otherwise.
-// Loop depth is a crucial optimization choice that determines where to
-// materialize the results of the backward slice - presenting a trade-off b/w
-// storage and redundant computation in several cases.
-// TODO(andydavis) Support computation slices with common surrounding loops.
-OpPointer<AffineForOp>
-insertBackwardComputationSlice(Instruction *srcOpInst, Instruction *dstOpInst,
- unsigned dstLoopDepth,
- ComputationSliceState *sliceState);
-
/// Gets the memory footprint of all data touched in the specified memory space
/// in bytes; if the memory space is unspecified, considers all memory spaces.
Optional<int64_t> getMemoryFootprintBytes(ConstOpPointer<AffineForOp> forOp,
int memorySpace = -1);
Optional<int64_t> getMemoryFootprintBytes(const Block &block,
int memorySpace = -1);
-
} // end namespace mlir
#endif // MLIR_ANALYSIS_UTILS_H
diff --git a/mlir/include/mlir/IR/AffineStructures.h b/mlir/include/mlir/IR/AffineStructures.h
index 9c88436dcb0..2acee9a7f39 100644
--- a/mlir/include/mlir/IR/AffineStructures.h
+++ b/mlir/include/mlir/IR/AffineStructures.h
@@ -378,6 +378,17 @@ public:
SmallVectorImpl<AffineMap> *lbMaps,
SmallVectorImpl<AffineMap> *ubMaps);
+ /// Adds slice lower bounds represented by lower bounds in 'lbMaps' and upper
+ /// bounds in 'ubMaps' to the constraint system. Note that both lower/upper
+ /// bounds share the same operand list 'operands'.
+ /// This function assumes that position 'lbMaps.size' == 'ubMaps.size',
+ /// and that positions [0, lbMaps.size) represent dimensional identifiers
+ /// which correspond to the loop IVs whose iteration bounds are being sliced.
+ /// Note that both lower/upper bounds use operands from 'operands'.
+ /// Returns true on success, returns false for unimplemented cases.
+ bool addSliceBounds(ArrayRef<AffineMap> lbMaps, ArrayRef<AffineMap> ubMaps,
+ ArrayRef<Value *> operands);
+
// Adds an inequality (>= 0) from the coefficients specified in inEq.
void addInequality(ArrayRef<int64_t> inEq);
// Adds an equality from the coefficients specified in eq.
diff --git a/mlir/lib/Analysis/Utils.cpp b/mlir/lib/Analysis/Utils.cpp
index 2e753f8d10a..bdc5d19d0be 100644
--- a/mlir/lib/Analysis/Utils.cpp
+++ b/mlir/lib/Analysis/Utils.cpp
@@ -122,7 +122,8 @@ bool MemRefRegion::unionBoundingBox(const MemRefRegion &other) {
//
// TODO(bondhugula): extend this to any other memref dereferencing ops
// (dma_start, dma_wait).
-bool MemRefRegion::compute(Instruction *inst, unsigned loopDepth) {
+bool MemRefRegion::compute(Instruction *inst, unsigned loopDepth,
+ ComputationSliceState *sliceState) {
assert((inst->isa<LoadOp>() || inst->isa<StoreOp>()) &&
"load/store op expected");
@@ -147,18 +148,33 @@ bool MemRefRegion::compute(Instruction *inst, unsigned loopDepth) {
access.getAccessMap(&accessValueMap);
AffineMap accessMap = accessValueMap.getAffineMap();
+ unsigned numDims = accessMap.getNumDims();
+ unsigned numSymbols = accessMap.getNumSymbols();
+ unsigned numOperands = accessValueMap.getNumOperands();
+ // Merge operands with slice operands.
+ SmallVector<Value *, 4> operands;
+ operands.resize(numOperands);
+ for (unsigned i = 0; i < numOperands; ++i)
+ operands[i] = accessValueMap.getOperand(i);
+
+ if (sliceState != nullptr) {
+ // Append slice operands to 'operands' as symbols.
+ operands.append(sliceState->lbOperands[0].begin(),
+ sliceState->lbOperands[0].end());
+ // Update 'numSymbols' by operands from 'sliceState'.
+ numSymbols += sliceState->lbOperands[0].size();
+ }
+
// We'll first associate the dims and symbols of the access map to the dims
// and symbols resp. of cst. This will change below once cst is
// fully constructed out.
- cst.reset(accessMap.getNumDims(), accessMap.getNumSymbols(), 0,
- accessValueMap.getOperands());
+ cst.reset(numDims, numSymbols, 0, operands);
// Add equality constraints.
- unsigned numDims = accessMap.getNumDims();
- unsigned numSymbols = accessMap.getNumSymbols();
// Add inequalties for loop lower/upper bounds.
for (unsigned i = 0; i < numDims + numSymbols; ++i) {
- if (auto loop = getForInductionVarOwner(accessValueMap.getOperand(i))) {
+ auto *operand = operands[i];
+ if (auto loop = getForInductionVarOwner(operand)) {
// Note that cst can now have more dimensions than accessMap if the
// bounds expressions involve outer loops or other symbols.
// TODO(bondhugula): rewrite this to use getInstIndexSet; this way
@@ -167,7 +183,7 @@ bool MemRefRegion::compute(Instruction *inst, unsigned loopDepth) {
return false;
} else {
// Has to be a valid symbol.
- auto *symbol = accessValueMap.getOperand(i);
+ auto *symbol = operand;
assert(isValidSymbol(symbol));
// Check if the symbol is a constant.
if (auto *inst = symbol->getDefiningInst()) {
@@ -178,6 +194,33 @@ bool MemRefRegion::compute(Instruction *inst, unsigned loopDepth) {
}
}
+ // Add lower/upper bounds on loop IVs using bounds from 'sliceState'.
+ if (sliceState != nullptr) {
+ // Add dim and symbol slice operands.
+ for (const auto &operand : sliceState->lbOperands[0]) {
+ unsigned loc;
+ if (!cst.findId(*operand, &loc)) {
+ if (isValidSymbol(operand)) {
+ cst.addSymbolId(cst.getNumSymbolIds(), const_cast<Value *>(operand));
+ loc = cst.getNumDimIds() + cst.getNumSymbolIds() - 1;
+ // Check if the symbol is a constant.
+ if (auto *opInst = operand->getDefiningInst()) {
+ if (auto constOp = opInst->dyn_cast<ConstantIndexOp>()) {
+ cst.setIdToConstant(*operand, constOp->getValue());
+ }
+ }
+ } else {
+ cst.addDimId(cst.getNumDimIds(), const_cast<Value *>(operand));
+ loc = cst.getNumDimIds() - 1;
+ }
+ }
+ }
+ // Add upper/lower bounds from 'sliceState' to 'cst'.
+ if (!cst.addSliceBounds(sliceState->lbs, sliceState->ubs,
+ sliceState->lbOperands[0]))
+ return false;
+ }
+
// Add access function equalities to connect loop IVs to data dimensions.
if (!cst.composeMap(&accessValueMap)) {
LLVM_DEBUG(llvm::dbgs() << "getMemRefRegion: compose affine map failed\n");
@@ -233,6 +276,32 @@ static unsigned getMemRefEltSizeInBytes(MemRefType memRefType) {
return llvm::divideCeil(sizeInBits, 8);
}
+// Returns the size of the region.
+Optional<int64_t> MemRefRegion::getRegionSize() {
+ auto memRefType = memref->getType().cast<MemRefType>();
+
+ auto layoutMaps = memRefType.getAffineMaps();
+ if (layoutMaps.size() > 1 ||
+ (layoutMaps.size() == 1 && !layoutMaps[0].isIdentity())) {
+ LLVM_DEBUG(llvm::dbgs() << "Non-identity layout map not yet supported\n");
+ return false;
+ }
+
+ // Indices to use for the DmaStart op.
+ // Indices for the original memref being DMAed from/to.
+ SmallVector<Value *, 4> memIndices;
+ // Indices for the faster buffer being DMAed into/from.
+ SmallVector<Value *, 4> bufIndices;
+
+ // Compute the extents of the buffer.
+ Optional<int64_t> numElements = getConstantBoundingSizeAndShape();
+ if (!numElements.hasValue()) {
+ LLVM_DEBUG(llvm::dbgs() << "Dynamic shapes not yet supported\n");
+ return None;
+ }
+ return getMemRefEltSizeInBytes(memRefType) * numElements.getValue();
+}
+
/// Returns the size of memref data in bytes if it's statically shaped, None
/// otherwise. If the element of the memref has vector type, takes into account
/// size of the vector as well.
@@ -420,8 +489,6 @@ bool mlir::getBackwardComputationSliceState(const MemRefAccess &srcAccess,
// entire destination index set. Subtract out the dependent destination
// iterations from destination index set and check for emptiness --- this is one
// solution.
-// TODO(andydavis) Remove dependence on 'srcLoopDepth' here. Instead project
-// out loop IVs we don't care about and produce smaller slice.
OpPointer<AffineForOp> mlir::insertBackwardComputationSlice(
Instruction *srcOpInst, Instruction *dstOpInst, unsigned dstLoopDepth,
ComputationSliceState *sliceState) {
@@ -537,33 +604,6 @@ unsigned mlir::getNumCommonSurroundingLoops(const Instruction &A,
return numCommonLoops;
}
-// Returns the size of the region.
-static Optional<int64_t> getRegionSize(const MemRefRegion &region) {
- auto *memref = region.memref;
- auto memRefType = memref->getType().cast<MemRefType>();
-
- auto layoutMaps = memRefType.getAffineMaps();
- if (layoutMaps.size() > 1 ||
- (layoutMaps.size() == 1 && !layoutMaps[0].isIdentity())) {
- LLVM_DEBUG(llvm::dbgs() << "Non-identity layout map not yet supported\n");
- return false;
- }
-
- // Indices to use for the DmaStart op.
- // Indices for the original memref being DMAed from/to.
- SmallVector<Value *, 4> memIndices;
- // Indices for the faster buffer being DMAed into/from.
- SmallVector<Value *, 4> bufIndices;
-
- // Compute the extents of the buffer.
- Optional<int64_t> numElements = region.getConstantBoundingSizeAndShape();
- if (!numElements.hasValue()) {
- LLVM_DEBUG(llvm::dbgs() << "Dynamic shapes not yet supported\n");
- return None;
- }
- return getMemRefEltSizeInBytes(memRefType) * numElements.getValue();
-}
-
Optional<int64_t>
mlir::getMemoryFootprintBytes(ConstOpPointer<AffineForOp> forOp,
int memorySpace) {
@@ -601,7 +641,7 @@ Optional<int64_t> mlir::getMemoryFootprintBytes(const Block &block,
int64_t totalSizeInBytes = 0;
for (const auto &region : regions) {
- auto size = getRegionSize(*region);
+ auto size = region->getRegionSize();
if (!size.hasValue())
return None;
totalSizeInBytes += size.getValue();
diff --git a/mlir/lib/IR/AffineStructures.cpp b/mlir/lib/IR/AffineStructures.cpp
index 1306199dc79..d92b1b60182 100644
--- a/mlir/lib/IR/AffineStructures.cpp
+++ b/mlir/lib/IR/AffineStructures.cpp
@@ -1129,6 +1129,66 @@ void FlatAffineConstraints::getSliceBounds(unsigned num, MLIRContext *context,
}
}
+// Adds slice lower/upper bounds from 'lbMaps'/'upMaps' to the constraint
+// system. This function assumes that position 'lbMaps.size' == 'ubMaps.size',
+// and that positions [0, lbMaps.size) represent dimensional identifiers which
+// correspond to the loop IVs whose iteration bounds are being sliced.
+// Note that both lower/upper bounds use operands from 'operands'.
+// Returns true on success. Returns false for unimplemented cases such as
+// semi-affine expressions or expressions with mod/floordiv.
+bool FlatAffineConstraints::addSliceBounds(ArrayRef<AffineMap> lbMaps,
+ ArrayRef<AffineMap> ubMaps,
+ ArrayRef<Value *> operands) {
+ assert(lbMaps.size() == ubMaps.size());
+ // Record positions of the operands in the constraint system.
+ SmallVector<unsigned, 8> positions;
+ for (const auto &operand : operands) {
+ unsigned loc;
+ if (!findId(*operand, &loc))
+ assert(0 && "expected to be found");
+ positions.push_back(loc);
+ }
+
+ auto addLowerOrUpperBound = [&](unsigned pos, AffineMap boundMap,
+ bool lower) -> bool {
+ FlatAffineConstraints localVarCst;
+ std::vector<SmallVector<int64_t, 8>> flatExprs;
+ if (!getFlattenedAffineExprs(boundMap, &flatExprs, &localVarCst)) {
+ LLVM_DEBUG(llvm::dbgs() << "semi-affine expressions not yet supported\n");
+ return false;
+ }
+ if (localVarCst.getNumLocalIds() > 0) {
+ LLVM_DEBUG(llvm::dbgs()
+ << "loop bounds with mod/floordiv expr's not yet supported\n");
+ return false;
+ }
+
+ for (const auto &flatExpr : flatExprs) {
+ SmallVector<int64_t, 4> ineq(getNumCols(), 0);
+ ineq[pos] = lower ? 1 : -1;
+ for (unsigned j = 0, e = boundMap.getNumInputs(); j < e; j++) {
+ ineq[positions[j]] = lower ? -flatExpr[j] : flatExpr[j];
+ }
+ // Constant term.
+ ineq[getNumCols() - 1] =
+ lower ? -flatExpr[flatExpr.size() - 1]
+ // Upper bound in flattenedExpr is an exclusive one.
+ : flatExpr[flatExpr.size() - 1] - 1;
+ addInequality(ineq);
+ }
+ return true;
+ };
+
+ for (unsigned i = 0, e = lbMaps.size(); i < e; ++i) {
+ if (!addLowerOrUpperBound(i, lbMaps[i], /*lower=*/true))
+ return false;
+ if (!addLowerOrUpperBound(i, ubMaps[i], /*lower=*/false))
+ return false;
+ }
+
+ return true;
+}
+
void FlatAffineConstraints::addEquality(ArrayRef<int64_t> eq) {
assert(eq.size() == getNumCols());
unsigned offset = equalities.size();
diff --git a/mlir/lib/Transforms/LoopFusion.cpp b/mlir/lib/Transforms/LoopFusion.cpp
index 3e0bf046ddf..8d5f51059bf 100644
--- a/mlir/lib/Transforms/LoopFusion.cpp
+++ b/mlir/lib/Transforms/LoopFusion.cpp
@@ -1118,12 +1118,23 @@ static bool isFusionProfitable(Instruction *srcOpInst,
/*tripCountOverrideMap=*/nullptr,
/*computeCostMap=*/nullptr);
+ // Compute src loop nest write region size.
+ MemRefRegion srcWriteRegion(srcOpInst->getLoc());
+ srcWriteRegion.compute(srcOpInst, /*loopDepth=*/0);
+ Optional<int64_t> maybeSrcWriteRegionSizeBytes =
+ srcWriteRegion.getRegionSize();
+ if (!maybeSrcWriteRegionSizeBytes.hasValue())
+ return false;
+ int64_t srcWriteRegionSizeBytes = maybeSrcWriteRegionSizeBytes.getValue();
+
// Compute op instance count for the src loop nest.
uint64_t dstLoopNestCost =
getComputeCost(dstLoopIVs[0]->getInstruction(), &dstLoopNestStats,
/*tripCountOverrideMap=*/nullptr,
/*computeCostMap=*/nullptr);
+ // Evaluate all depth choices for materializing the slice in the destination
+ // loop nest.
llvm::SmallDenseMap<Instruction *, uint64_t, 8> sliceTripCountMap;
DenseMap<Instruction *, int64_t> computeCostMap;
for (unsigned i = maxDstLoopDepth; i >= 1; --i) {
@@ -1187,11 +1198,21 @@ static bool isFusionProfitable(Instruction *srcOpInst,
(static_cast<double>(srcLoopNestCost) + dstLoopNestCost) -
1;
- // TODO(bondhugula): This is an ugly approximation. Fix this by finding a
- // good way to calculate the footprint of the memref in the slice and
- // divide it by the total memory footprint of the fused computation.
- double storageReduction =
- static_cast<double>(srcLoopNestCost) / sliceIterationCount;
+ // Compute what the slice write MemRefRegion would be, if the src loop
+ // nest slice 'sliceStates[i - 1]' were to be inserted into the dst loop
+ // nest at loop depth 'i'
+ MemRefRegion sliceWriteRegion(srcOpInst->getLoc());
+ sliceWriteRegion.compute(srcOpInst, /*loopDepth=*/0, &sliceStates[i - 1]);
+ Optional<int64_t> maybeSliceWriteRegionSizeBytes =
+ sliceWriteRegion.getRegionSize();
+ if (!maybeSliceWriteRegionSizeBytes.hasValue() ||
+ maybeSliceWriteRegionSizeBytes.getValue() == 0)
+ continue;
+ int64_t sliceWriteRegionSizeBytes =
+ maybeSliceWriteRegionSizeBytes.getValue();
+
+ double storageReduction = static_cast<double>(srcWriteRegionSizeBytes) /
+ static_cast<double>(sliceWriteRegionSizeBytes);
LLVM_DEBUG({
std::stringstream msg;
@@ -1219,12 +1240,7 @@ static bool isFusionProfitable(Instruction *srcOpInst,
maxStorageReduction = storageReduction;
bestDstLoopDepth = i;
minFusedLoopNestComputeCost = fusedLoopNestComputeCost;
- // TODO(bondhugula,andydavis): find a good way to compute the memory
- // footprint of the materialized slice.
- // Approximating this to the compute cost of the slice. This could be an
- // under-approximation or an overapproximation, but in many cases
- // accurate.
- sliceMemEstimate = sliceIterationCount;
+ sliceMemEstimate = sliceWriteRegionSizeBytes;
}
}
OpenPOWER on IntegriCloud