summaryrefslogtreecommitdiffstats
path: root/mlir/lib/Transforms/MemRefDataFlowOpt.cpp
diff options
context:
space:
mode:
authorUday Bondhugula <bondhugula@google.com>2018-12-29 19:16:55 -0800
committerjpienaar <jpienaar@google.com>2019-03-29 14:47:28 -0700
commitb9fe6be6d4cefdd6aefeaee2c7ab5c475efd8e4f (patch)
treefa34ed9efdedf94977001bd4a9449475b261a34a /mlir/lib/Transforms/MemRefDataFlowOpt.cpp
parent6e3462d2518f5369f01ac8c93ce9ed3e28b43725 (diff)
downloadbcm5719-llvm-b9fe6be6d4cefdd6aefeaee2c7ab5c475efd8e4f.tar.gz
bcm5719-llvm-b9fe6be6d4cefdd6aefeaee2c7ab5c475efd8e4f.zip
Introduce memref store to load forwarding - a simple memref dataflow analysis
- the load/store forwarding relies on memref dependence routines as well as SSA/dominance to identify the memref store instance uniquely supplying a value to a memref load, and replaces the result of that load with the value being stored. The memref is also deleted when possible if only stores remain. - add methods for post dominance for MLFunction blocks. - remove duplicated getLoopDepth/getNestingDepth - move getNestingDepth, getMemRefAccess, getNumCommonSurroundingLoops into Analysis/Utils (were earlier static) - add a helper method in FlatAffineConstraints - isRangeOneToOne. PiperOrigin-RevId: 227252907
Diffstat (limited to 'mlir/lib/Transforms/MemRefDataFlowOpt.cpp')
-rw-r--r--mlir/lib/Transforms/MemRefDataFlowOpt.cpp243
1 files changed, 243 insertions, 0 deletions
diff --git a/mlir/lib/Transforms/MemRefDataFlowOpt.cpp b/mlir/lib/Transforms/MemRefDataFlowOpt.cpp
new file mode 100644
index 00000000000..d1af131d383
--- /dev/null
+++ b/mlir/lib/Transforms/MemRefDataFlowOpt.cpp
@@ -0,0 +1,243 @@
+//===- MemRefDataFlowOpt.cpp - MemRef DataFlow Optimization pass ------ -*-===//
+//
+// Copyright 2019 The MLIR Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// =============================================================================
+//
+// This file implements a pass to forward memref stores to loads, thereby
+// potentially getting rid of intermediate memref's entirely.
+// TODO(mlir-team): In the future, similar techniques could be used to eliminate
+// dead memref store's and perform more complex forwarding when support for
+// SSA scalars live out of 'for'/'if' statements is available.
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Analysis/AffineAnalysis.h"
+#include "mlir/Analysis/Utils.h"
+#include "mlir/IR/InstVisitor.h"
+#include "mlir/Pass.h"
+#include "mlir/StandardOps/StandardOps.h"
+#include "mlir/Transforms/Passes.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+
+#define DEBUG_TYPE "memref-dataflow-opt"
+
+using namespace mlir;
+
+namespace {
+
+// The store to load forwarding relies on three conditions:
+//
+// 1) there has to be a dependence from the store to the load satisfied at the
+// block immediately within the innermost common surrounding loop of the load op
+// and the store op, and such a dependence should associate with a single load
+// location for a given source store iteration.
+//
+// 2) the store op should dominate the load op,
+//
+// 3) among all candidate store op's that satisfy (1) and (2), if there exists a
+// store op that postdominates all those that satisfy (1), such a store op is
+// provably the last writer to the particular memref location being loaded from
+// by the load op, and its store value can be forwarded to the load.
+//
+// The above conditions are simple to check, sufficient, and powerful for most
+// cases in practice - condition (1) and (3) are precise and necessary, while
+// condition (2) is a sufficient one but not necessary (since it doesn't reason
+// about loops that are guaranteed to execute at least one).
+//
+// TODO(mlir-team): more forwarding can be done when support for
+// loop/conditional live-out SSA values is available.
+// TODO(mlir-team): do general dead store elimination for memref's. This pass
+// currently only eliminates the stores only if no other loads/uses (other
+// than dealloc) remain.
+//
+struct MemRefDataFlowOpt : public FunctionPass, InstWalker<MemRefDataFlowOpt> {
+ explicit MemRefDataFlowOpt() : FunctionPass(&MemRefDataFlowOpt::passID) {}
+
+ // Not applicable to CFG functions.
+ PassResult runOnCFGFunction(Function *f) override { return success(); }
+ PassResult runOnMLFunction(Function *f) override;
+
+ void visitOperationInst(OperationInst *opInst);
+
+ // A list of memref's that are potentially dead / could be eliminated.
+ std::vector<Value *> memrefsToErase;
+
+ static char passID;
+};
+
+} // end anonymous namespace
+
+char MemRefDataFlowOpt::passID = 0;
+
+/// Creates a pass to perform optimizations relying on memref dataflow such as
+/// store to load forwarding, elimination of dead stores, and dead allocs.
+FunctionPass *mlir::createMemRefDataFlowOptPass() {
+ return new MemRefDataFlowOpt();
+}
+
+// This is a straightforward implementation not optimized for speed. Optimize
+// this in the future if needed.
+void MemRefDataFlowOpt::visitOperationInst(OperationInst *opInst) {
+ OperationInst *lastWriteStoreOp = nullptr;
+
+ auto loadOp = opInst->dyn_cast<LoadOp>();
+ if (!loadOp)
+ return;
+
+ OperationInst *loadOpInst = opInst;
+
+ // First pass over the use list to get minimum number of surrounding
+ // loops common between the load op and the store op, with min taken across
+ // all store ops.
+ SmallVector<OperationInst *, 8> storeOps;
+ unsigned minSurroundingLoops = getNestingDepth(*loadOpInst);
+ for (InstOperand &use : loadOp->getMemRef()->getUses()) {
+ auto storeOp = cast<OperationInst>(use.getOwner())->dyn_cast<StoreOp>();
+ if (!storeOp)
+ continue;
+ auto *storeOpInst = storeOp->getInstruction();
+ unsigned nsLoops = getNumCommonSurroundingLoops(*loadOpInst, *storeOpInst);
+ minSurroundingLoops = std::min(nsLoops, minSurroundingLoops);
+ storeOps.push_back(storeOpInst);
+ }
+
+ // 1. Check if there is a dependence satisfied at depth equal to the depth
+ // of the loop body of the innermost common surrounding loop of the storeOp
+ // and loadOp.
+ // The list of store op candidates for forwarding - need to satisfy the
+ // conditions listed at the top.
+ SmallVector<OperationInst *, 8> fwdingCandidates;
+ // Store ops that have a dependence into the load (even if they aren't
+ // forwarding candidates). Each fwding candidate will be checked for a
+ // post-dominance on these. 'fwdingCandidates' are a subset of depSrcStores.
+ SmallVector<OperationInst *, 8> depSrcStores;
+ for (auto *storeOpInst : storeOps) {
+ MemRefAccess srcAccess, destAccess;
+ getMemRefAccess(storeOpInst, &srcAccess);
+ getMemRefAccess(loadOpInst, &destAccess);
+ FlatAffineConstraints dependenceConstraints;
+ unsigned nsLoops = getNumCommonSurroundingLoops(*loadOpInst, *storeOpInst);
+ // Dependences at loop depth <= minSurroundingLoops do NOT matter.
+ for (unsigned d = nsLoops + 1; d > minSurroundingLoops; d--) {
+ if (!checkMemrefAccessDependence(srcAccess, destAccess, d,
+ &dependenceConstraints,
+ /*dependenceComponents=*/nullptr))
+ continue;
+ depSrcStores.push_back(storeOpInst);
+ // Check if this store is a candidate for forwarding; we only forward if
+ // the dependence from the store is carried by the *body* of innermost
+ // common surrounding loop. As an example this filters out cases like:
+ // for %i0
+ // for %i1
+ // %idx = affine_apply (d0) -> (d0 + 1) (%i0)
+ // store %A[%idx]
+ // load %A[%i0]
+ //
+ if (d != nsLoops + 1)
+ break;
+
+ // 2. The store has to dominate the load op to be candidate. This is not
+ // strictly a necessary condition since dominance isn't a prerequisite for
+ // a memref element store to reach a load, but this is sufficient and
+ // reasonably powerful in practice.
+ if (!dominates(*storeOpInst, *loadOpInst))
+ break;
+
+ // Finally, forwarding is only possible if the load touches a single
+ // location in the memref across the enclosing loops *not* common with the
+ // store. This is filtering out cases like:
+ // for (i ...)
+ // a [i] = ...
+ // for (j ...)
+ // ... = a[j]
+ MemRefRegion region;
+ getMemRefRegion(loadOpInst, nsLoops, &region);
+ if (!region.getConstraints()->isRangeOneToOne(
+ /*start=*/0, /*limit=*/loadOp->getMemRefType().getRank()))
+ break;
+
+ // After all these conditions, we have a candidate for forwarding!
+ fwdingCandidates.push_back(storeOpInst);
+ break;
+ }
+ }
+
+ // Note: this can implemented in a cleaner way with postdominator tree
+ // traversals. Consider this for the future if needed.
+ for (auto *storeOpInst : fwdingCandidates) {
+ // 3. Of all the store op's that meet the above criteria, the store
+ // that postdominates all 'depSrcStores' (if such a store exists) is the
+ // unique store providing the value to the load, i.e., provably the last
+ // writer to that memref loc.
+ if (llvm::all_of(depSrcStores, [&](OperationInst *depStore) {
+ return postDominates(*storeOpInst, *depStore);
+ })) {
+ lastWriteStoreOp = storeOpInst;
+ break;
+ }
+ }
+ // TODO: optimization for future: those store op's that are determined to be
+ // postdominated above can actually be recorded and skipped on the 'i' loop
+ // iteration above --- since they can never post dominate everything.
+
+ if (!lastWriteStoreOp)
+ return;
+
+ // Perform the actual store to load forwarding.
+ Value *storeVal = lastWriteStoreOp->cast<StoreOp>()->getValueToStore();
+ loadOp->getResult()->replaceAllUsesWith(storeVal);
+ // Record the memref for a later sweep to optimize away.
+ memrefsToErase.push_back(loadOp->getMemRef());
+ loadOp->erase();
+}
+
+PassResult MemRefDataFlowOpt::runOnMLFunction(Function *f) {
+ memrefsToErase.clear();
+
+ // Walk all load's and perform load/store forwarding.
+ walk(f);
+
+ // Check if the store fwd'ed memrefs are now left with only stores and can
+ // thus be completely deleted. Note: the canononicalize pass should be able
+ // to do this as well, but we'll do it here since we collected these anyway.
+ for (auto *memref : memrefsToErase) {
+ // If the memref hasn't been alloc'ed in this function, skip.
+ OperationInst *defInst = memref->getDefiningInst();
+ if (!defInst || !cast<OperationInst>(defInst)->isa<AllocOp>())
+ // TODO(mlir-team): if the memref was returned by a 'call' instruction, we
+ // could still erase it if the call has no side-effects.
+ continue;
+ if (std::any_of(memref->use_begin(), memref->use_end(),
+ [&](InstOperand &use) {
+ auto *ownerInst = cast<OperationInst>(use.getOwner());
+ return (!ownerInst->isa<StoreOp>() &&
+ !ownerInst->isa<DeallocOp>());
+ }))
+ continue;
+
+ // Erase all stores, the dealloc, and the alloc on the memref.
+ for (auto it = memref->use_begin(), e = memref->use_end(); it != e;) {
+ auto &use = *(it++);
+ cast<OperationInst>(use.getOwner())->erase();
+ }
+ defInst->erase();
+ }
+
+ // This function never leaves the IR in an invalid state.
+ return success();
+}
+
+static PassRegistration<MemRefDataFlowOpt>
+ pass("memref-dataflow-opt", "Perform store/load forwarding for memrefs");
OpenPOWER on IntegriCloud