diff options
| author | Uday Bondhugula <bondhugula@google.com> | 2018-12-29 19:16:55 -0800 |
|---|---|---|
| committer | jpienaar <jpienaar@google.com> | 2019-03-29 14:47:28 -0700 |
| commit | b9fe6be6d4cefdd6aefeaee2c7ab5c475efd8e4f (patch) | |
| tree | fa34ed9efdedf94977001bd4a9449475b261a34a /mlir/lib/Transforms/MemRefDataFlowOpt.cpp | |
| parent | 6e3462d2518f5369f01ac8c93ce9ed3e28b43725 (diff) | |
| download | bcm5719-llvm-b9fe6be6d4cefdd6aefeaee2c7ab5c475efd8e4f.tar.gz bcm5719-llvm-b9fe6be6d4cefdd6aefeaee2c7ab5c475efd8e4f.zip | |
Introduce memref store to load forwarding - a simple memref dataflow analysis
- the load/store forwarding relies on memref dependence routines as well as
SSA/dominance to identify the memref store instance uniquely supplying a value
to a memref load, and replaces the result of that load with the value being
stored. The memref is also deleted when possible if only stores remain.
- add methods for post dominance for MLFunction blocks.
- remove duplicated getLoopDepth/getNestingDepth - move getNestingDepth,
getMemRefAccess, getNumCommonSurroundingLoops into Analysis/Utils (were
earlier static)
- add a helper method in FlatAffineConstraints - isRangeOneToOne.
PiperOrigin-RevId: 227252907
Diffstat (limited to 'mlir/lib/Transforms/MemRefDataFlowOpt.cpp')
| -rw-r--r-- | mlir/lib/Transforms/MemRefDataFlowOpt.cpp | 243 |
1 files changed, 243 insertions, 0 deletions
diff --git a/mlir/lib/Transforms/MemRefDataFlowOpt.cpp b/mlir/lib/Transforms/MemRefDataFlowOpt.cpp new file mode 100644 index 00000000000..d1af131d383 --- /dev/null +++ b/mlir/lib/Transforms/MemRefDataFlowOpt.cpp @@ -0,0 +1,243 @@ +//===- MemRefDataFlowOpt.cpp - MemRef DataFlow Optimization pass ------ -*-===// +// +// Copyright 2019 The MLIR Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// ============================================================================= +// +// This file implements a pass to forward memref stores to loads, thereby +// potentially getting rid of intermediate memref's entirely. +// TODO(mlir-team): In the future, similar techniques could be used to eliminate +// dead memref store's and perform more complex forwarding when support for +// SSA scalars live out of 'for'/'if' statements is available. +//===----------------------------------------------------------------------===// + +#include "mlir/Analysis/AffineAnalysis.h" +#include "mlir/Analysis/Utils.h" +#include "mlir/IR/InstVisitor.h" +#include "mlir/Pass.h" +#include "mlir/StandardOps/StandardOps.h" +#include "mlir/Transforms/Passes.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> + +#define DEBUG_TYPE "memref-dataflow-opt" + +using namespace mlir; + +namespace { + +// The store to load forwarding relies on three conditions: +// +// 1) there has to be a dependence from the store to the load satisfied at the +// block immediately within the innermost common surrounding loop of the load op +// and the store op, and such a dependence should associate with a single load +// location for a given source store iteration. +// +// 2) the store op should dominate the load op, +// +// 3) among all candidate store op's that satisfy (1) and (2), if there exists a +// store op that postdominates all those that satisfy (1), such a store op is +// provably the last writer to the particular memref location being loaded from +// by the load op, and its store value can be forwarded to the load. +// +// The above conditions are simple to check, sufficient, and powerful for most +// cases in practice - condition (1) and (3) are precise and necessary, while +// condition (2) is a sufficient one but not necessary (since it doesn't reason +// about loops that are guaranteed to execute at least one). +// +// TODO(mlir-team): more forwarding can be done when support for +// loop/conditional live-out SSA values is available. +// TODO(mlir-team): do general dead store elimination for memref's. This pass +// currently only eliminates the stores only if no other loads/uses (other +// than dealloc) remain. +// +struct MemRefDataFlowOpt : public FunctionPass, InstWalker<MemRefDataFlowOpt> { + explicit MemRefDataFlowOpt() : FunctionPass(&MemRefDataFlowOpt::passID) {} + + // Not applicable to CFG functions. + PassResult runOnCFGFunction(Function *f) override { return success(); } + PassResult runOnMLFunction(Function *f) override; + + void visitOperationInst(OperationInst *opInst); + + // A list of memref's that are potentially dead / could be eliminated. + std::vector<Value *> memrefsToErase; + + static char passID; +}; + +} // end anonymous namespace + +char MemRefDataFlowOpt::passID = 0; + +/// Creates a pass to perform optimizations relying on memref dataflow such as +/// store to load forwarding, elimination of dead stores, and dead allocs. +FunctionPass *mlir::createMemRefDataFlowOptPass() { + return new MemRefDataFlowOpt(); +} + +// This is a straightforward implementation not optimized for speed. Optimize +// this in the future if needed. +void MemRefDataFlowOpt::visitOperationInst(OperationInst *opInst) { + OperationInst *lastWriteStoreOp = nullptr; + + auto loadOp = opInst->dyn_cast<LoadOp>(); + if (!loadOp) + return; + + OperationInst *loadOpInst = opInst; + + // First pass over the use list to get minimum number of surrounding + // loops common between the load op and the store op, with min taken across + // all store ops. + SmallVector<OperationInst *, 8> storeOps; + unsigned minSurroundingLoops = getNestingDepth(*loadOpInst); + for (InstOperand &use : loadOp->getMemRef()->getUses()) { + auto storeOp = cast<OperationInst>(use.getOwner())->dyn_cast<StoreOp>(); + if (!storeOp) + continue; + auto *storeOpInst = storeOp->getInstruction(); + unsigned nsLoops = getNumCommonSurroundingLoops(*loadOpInst, *storeOpInst); + minSurroundingLoops = std::min(nsLoops, minSurroundingLoops); + storeOps.push_back(storeOpInst); + } + + // 1. Check if there is a dependence satisfied at depth equal to the depth + // of the loop body of the innermost common surrounding loop of the storeOp + // and loadOp. + // The list of store op candidates for forwarding - need to satisfy the + // conditions listed at the top. + SmallVector<OperationInst *, 8> fwdingCandidates; + // Store ops that have a dependence into the load (even if they aren't + // forwarding candidates). Each fwding candidate will be checked for a + // post-dominance on these. 'fwdingCandidates' are a subset of depSrcStores. + SmallVector<OperationInst *, 8> depSrcStores; + for (auto *storeOpInst : storeOps) { + MemRefAccess srcAccess, destAccess; + getMemRefAccess(storeOpInst, &srcAccess); + getMemRefAccess(loadOpInst, &destAccess); + FlatAffineConstraints dependenceConstraints; + unsigned nsLoops = getNumCommonSurroundingLoops(*loadOpInst, *storeOpInst); + // Dependences at loop depth <= minSurroundingLoops do NOT matter. + for (unsigned d = nsLoops + 1; d > minSurroundingLoops; d--) { + if (!checkMemrefAccessDependence(srcAccess, destAccess, d, + &dependenceConstraints, + /*dependenceComponents=*/nullptr)) + continue; + depSrcStores.push_back(storeOpInst); + // Check if this store is a candidate for forwarding; we only forward if + // the dependence from the store is carried by the *body* of innermost + // common surrounding loop. As an example this filters out cases like: + // for %i0 + // for %i1 + // %idx = affine_apply (d0) -> (d0 + 1) (%i0) + // store %A[%idx] + // load %A[%i0] + // + if (d != nsLoops + 1) + break; + + // 2. The store has to dominate the load op to be candidate. This is not + // strictly a necessary condition since dominance isn't a prerequisite for + // a memref element store to reach a load, but this is sufficient and + // reasonably powerful in practice. + if (!dominates(*storeOpInst, *loadOpInst)) + break; + + // Finally, forwarding is only possible if the load touches a single + // location in the memref across the enclosing loops *not* common with the + // store. This is filtering out cases like: + // for (i ...) + // a [i] = ... + // for (j ...) + // ... = a[j] + MemRefRegion region; + getMemRefRegion(loadOpInst, nsLoops, ®ion); + if (!region.getConstraints()->isRangeOneToOne( + /*start=*/0, /*limit=*/loadOp->getMemRefType().getRank())) + break; + + // After all these conditions, we have a candidate for forwarding! + fwdingCandidates.push_back(storeOpInst); + break; + } + } + + // Note: this can implemented in a cleaner way with postdominator tree + // traversals. Consider this for the future if needed. + for (auto *storeOpInst : fwdingCandidates) { + // 3. Of all the store op's that meet the above criteria, the store + // that postdominates all 'depSrcStores' (if such a store exists) is the + // unique store providing the value to the load, i.e., provably the last + // writer to that memref loc. + if (llvm::all_of(depSrcStores, [&](OperationInst *depStore) { + return postDominates(*storeOpInst, *depStore); + })) { + lastWriteStoreOp = storeOpInst; + break; + } + } + // TODO: optimization for future: those store op's that are determined to be + // postdominated above can actually be recorded and skipped on the 'i' loop + // iteration above --- since they can never post dominate everything. + + if (!lastWriteStoreOp) + return; + + // Perform the actual store to load forwarding. + Value *storeVal = lastWriteStoreOp->cast<StoreOp>()->getValueToStore(); + loadOp->getResult()->replaceAllUsesWith(storeVal); + // Record the memref for a later sweep to optimize away. + memrefsToErase.push_back(loadOp->getMemRef()); + loadOp->erase(); +} + +PassResult MemRefDataFlowOpt::runOnMLFunction(Function *f) { + memrefsToErase.clear(); + + // Walk all load's and perform load/store forwarding. + walk(f); + + // Check if the store fwd'ed memrefs are now left with only stores and can + // thus be completely deleted. Note: the canononicalize pass should be able + // to do this as well, but we'll do it here since we collected these anyway. + for (auto *memref : memrefsToErase) { + // If the memref hasn't been alloc'ed in this function, skip. + OperationInst *defInst = memref->getDefiningInst(); + if (!defInst || !cast<OperationInst>(defInst)->isa<AllocOp>()) + // TODO(mlir-team): if the memref was returned by a 'call' instruction, we + // could still erase it if the call has no side-effects. + continue; + if (std::any_of(memref->use_begin(), memref->use_end(), + [&](InstOperand &use) { + auto *ownerInst = cast<OperationInst>(use.getOwner()); + return (!ownerInst->isa<StoreOp>() && + !ownerInst->isa<DeallocOp>()); + })) + continue; + + // Erase all stores, the dealloc, and the alloc on the memref. + for (auto it = memref->use_begin(), e = memref->use_end(); it != e;) { + auto &use = *(it++); + cast<OperationInst>(use.getOwner())->erase(); + } + defInst->erase(); + } + + // This function never leaves the IR in an invalid state. + return success(); +} + +static PassRegistration<MemRefDataFlowOpt> + pass("memref-dataflow-opt", "Perform store/load forwarding for memrefs"); |

