diff options
| -rw-r--r-- | mlir/include/mlir/Transforms/Passes.h | 4 | ||||
| -rw-r--r-- | mlir/lib/Transforms/CMakeLists.txt | 1 | ||||
| -rw-r--r-- | mlir/lib/Transforms/LoopInvariantCodeMotion.cpp | 130 | ||||
| -rw-r--r-- | mlir/test/Transforms/loop-invariant-code-motion.mlir | 189 |
4 files changed, 324 insertions, 0 deletions
diff --git a/mlir/include/mlir/Transforms/Passes.h b/mlir/include/mlir/Transforms/Passes.h index 845e398debd..dc5d57fba4e 100644 --- a/mlir/include/mlir/Transforms/Passes.h +++ b/mlir/include/mlir/Transforms/Passes.h @@ -83,6 +83,10 @@ FunctionPassBase *createLoopFusionPass(unsigned fastMemorySpace = 0, uint64_t localBufSizeThreshold = 0, bool maximalFusion = false); +/// Creates a loop invariant code motion pass that hoists loop invariant +/// instructions out of the loop. +FunctionPassBase *createLoopInvariantCodeMotionPass(); + /// Creates a pass to pipeline explicit movement of data across levels of the /// memory hierarchy. FunctionPassBase *createPipelineDataTransferPass(); diff --git a/mlir/lib/Transforms/CMakeLists.txt b/mlir/lib/Transforms/CMakeLists.txt index 30efc1e7436..9e7083f2e00 100644 --- a/mlir/lib/Transforms/CMakeLists.txt +++ b/mlir/lib/Transforms/CMakeLists.txt @@ -5,6 +5,7 @@ add_llvm_library(MLIRTransforms DialectConversion.cpp DmaGeneration.cpp LoopFusion.cpp + LoopInvariantCodeMotion.cpp LoopTiling.cpp LoopUnrollAndJam.cpp LoopUnroll.cpp diff --git a/mlir/lib/Transforms/LoopInvariantCodeMotion.cpp b/mlir/lib/Transforms/LoopInvariantCodeMotion.cpp new file mode 100644 index 00000000000..b1bee484709 --- /dev/null +++ b/mlir/lib/Transforms/LoopInvariantCodeMotion.cpp @@ -0,0 +1,130 @@ +//===- LoopInvariantCodeMotion.cpp - Code to perform loop fusion-----------===// +// +// Copyright 2019 The MLIR Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// ============================================================================= +// +// This file implements loop invariant code motion. +// +//===----------------------------------------------------------------------===// + +#include <iomanip> +#include <sstream> + +#include "mlir/AffineOps/AffineOps.h" +#include "mlir/Analysis/AffineAnalysis.h" +#include "mlir/Analysis/AffineStructures.h" +#include "mlir/Analysis/LoopAnalysis.h" +#include "mlir/Analysis/SliceAnalysis.h" +#include "mlir/Analysis/Utils.h" +#include "mlir/IR/AffineExpr.h" +#include "mlir/IR/AffineMap.h" +#include "mlir/IR/Builders.h" +#include "mlir/Pass/Pass.h" +#include "mlir/StandardOps/Ops.h" +#include "mlir/Transforms/LoopUtils.h" +#include "mlir/Transforms/Passes.h" +#include "mlir/Transforms/Utils.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +#define DEBUG_TYPE "licm" + +using llvm::SetVector; + +using namespace mlir; + +namespace { + +/// Loop invariant code motion (LICM) pass. +/// TODO(asabne) : The pass is missing zero-trip tests. +/// TODO(asabne) : Check for the presence of side effects before hoisting. +struct LoopInvariantCodeMotion : public FunctionPass<LoopInvariantCodeMotion> { + void runOnFunction() override; + void runOnAffineForOp(AffineForOp forOp); + std::vector<AffineForOp> forOps; +}; +} // end anonymous namespace + +FunctionPassBase *mlir::createLoopInvariantCodeMotionPass() { + return new LoopInvariantCodeMotion(); +} + +void LoopInvariantCodeMotion::runOnAffineForOp(AffineForOp forOp) { + auto *loopBody = forOp.getBody(); + + // This is the place where hoisted instructions would reside. + FuncBuilder b(forOp.getOperation()); + + // This vector is used to place loop invariant operations. + SmallVector<Operation *, 8> opsToMove; + + SetVector<Operation *> loopDefinedOps; + // Generate forward slice which contains ops that fall under the transitive + // definition closure following the loop induction variable. + getForwardSlice(forOp, &loopDefinedOps); + + for (auto i : loopDefinedOps) { + LLVM_DEBUG(i->print(llvm::dbgs() << "\nLoop-dependent op\n")); + } + + for (auto &op : *loopBody) { + // If the operation is loop invariant, insert it into opsToMove. + if (!op.isa<AffineForOp>() && !op.isa<AffineTerminatorOp>() && + loopDefinedOps.count(&op) != 1) { + LLVM_DEBUG(op.print(llvm::dbgs() << "\nLICM'ing op\n")); + opsToMove.push_back(&op); + } + } + + // For all instructions that we found to be invariant, place them sequentially + // right before the for loop. + for (auto *op : opsToMove) { + op->moveBefore(forOp); + } + + LLVM_DEBUG(forOp.getOperation()->print(llvm::dbgs() << "\nModified loop\n")); + + // If the for loop body has a single operation (the terminator), erase it. + if (forOp.getBody()->getOperations().size() == 1) { + assert(forOp.getBody()->getOperations().front().isa<AffineTerminatorOp>()); + forOp.erase(); + } +} + +void LoopInvariantCodeMotion::runOnFunction() { + forOps.clear(); + + // Gather all loops in a function, and order them in innermost-loop-first + // order. This way, we first LICM from the inner loop, and place the ops in + // the outer loop, which in turn can be further LICM'ed. This saves iterating + // on the inner loop operations while LICMing through the outer loop. + getFunction().walk<AffineForOp>( + [&](AffineForOp forOp) { forOps.push_back(forOp); }); + // We gather loops first, and then go over them later because we don't want to + // mess the iterators up. + for (auto forOp : forOps) { + auto *forInst = forOp.getOperation(); + LLVM_DEBUG(forInst->print(llvm::dbgs() << "\nOriginal loop\n")); + runOnAffineForOp(forOp); + } +} + +static PassRegistration<LoopInvariantCodeMotion> + pass("loop-invariant-code-motion", + "Hoist loop invariant instructions outside of the loop"); diff --git a/mlir/test/Transforms/loop-invariant-code-motion.mlir b/mlir/test/Transforms/loop-invariant-code-motion.mlir new file mode 100644 index 00000000000..f2276d8d83c --- /dev/null +++ b/mlir/test/Transforms/loop-invariant-code-motion.mlir @@ -0,0 +1,189 @@ +// RUN: mlir-opt %s -loop-invariant-code-motion -split-input-file -verify | FileCheck %s + +func @nested_loops_both_having_invariant_code() { + %m = alloc() : memref<10xf32> + %cf7 = constant 7.0 : f32 + %cf8 = constant 8.0 : f32 + + affine.for %i0 = 0 to 10 { + %v0 = addf %cf7, %cf8 : f32 + affine.for %i1 = 0 to 10 { + store %v0, %m[%i0] : memref<10xf32> + } + } + + // CHECK: %0 = alloc() : memref<10xf32> + // CHECK-NEXT: %cst = constant 7.000000e+00 : f32 + // CHECK-NEXT: %cst_0 = constant 8.000000e+00 : f32 + // CHECK-NEXT: %1 = addf %cst, %cst_0 : f32 + // CHECK-NEXT: affine.for %i0 = 0 to 10 { + // CHECK-NEXT: store %1, %0[%i0] : memref<10xf32> + // CHECK-NEXT: } + // CHECK-NEXT: return + return +} + +// The store-load forwarding can see through affine apply's since it relies on +// dependence information. +// CHECK-LABEL: func @store_affine_apply +func @store_affine_apply() -> memref<10xf32> { + %cf7 = constant 7.0 : f32 + %m = alloc() : memref<10xf32> + affine.for %i0 = 0 to 10 { + %t0 = affine.apply (d1) -> (d1 + 1)(%i0) + store %cf7, %m[%t0] : memref<10xf32> + } + return %m : memref<10xf32> +// CHECK: %cst = constant 7.000000e+00 : f32 +// CHECK-NEXT: %0 = alloc() : memref<10xf32> +// CHECK-NEXT: affine.for %i0 = 0 to 10 { +// CHECK-NEXT: %1 = affine.apply #map2(%i0) +// CHECK-NEXT: store %cst, %0[%1] : memref<10xf32> +// CHECK-NEXT: } +// CHECK-NEXT: return %0 : memref<10xf32> +} + +func @nested_loops_code_invariant_to_both() { + %m = alloc() : memref<10xf32> + %cf7 = constant 7.0 : f32 + %cf8 = constant 8.0 : f32 + + affine.for %i0 = 0 to 10 { + affine.for %i1 = 0 to 10 { + %v0 = addf %cf7, %cf8 : f32 + } + } + + // CHECK: %0 = alloc() : memref<10xf32> + // CHECK-NEXT: %cst = constant 7.000000e+00 : f32 + // CHECK-NEXT: %cst_0 = constant 8.000000e+00 : f32 + // CHECK-NEXT: %1 = addf %cst, %cst_0 : f32 + // CHECK-NEXT: return + return +} + +func @single_loop_nothing_invariant() { + %m1 = alloc() : memref<10xf32> + %m2 = alloc() : memref<10xf32> + affine.for %i0 = 0 to 10 { + %v0 = load %m1[%i0] : memref<10xf32> + %v1 = load %m2[%i0] : memref<10xf32> + %v2 = addf %v0, %v1 : f32 + store %v2, %m1[%i0] : memref<10xf32> + } + + // CHECK: %0 = alloc() : memref<10xf32> + // CHECK-NEXT: %1 = alloc() : memref<10xf32> + // CHECK-NEXT: affine.for %i0 = 0 to 10 { + // CHECK-NEXT: %2 = load %0[%i0] : memref<10xf32> + // CHECK-NEXT: %3 = load %1[%i0] : memref<10xf32> + // CHECK-NEXT: %4 = addf %2, %3 : f32 + // CHECK-NEXT: store %4, %0[%i0] : memref<10xf32> + // CHECK-NEXT: } + // CHECK-NEXT: return + return +} + + +func @invariant_code_inside_affine_if() { + %m = alloc() : memref<10xf32> + %cf8 = constant 8.0 : f32 + + affine.for %i0 = 0 to 10 { + %t0 = affine.apply (d1) -> (d1 + 1)(%i0) + affine.if (d0, d1) : (d1 - d0 >= 0) (%i0, %t0) { + %cf9 = addf %cf8, %cf8 : f32 + store %cf9, %m[%i0] : memref<10xf32> + + } + } + + // CHECK: %0 = alloc() : memref<10xf32> + // CHECK-NEXT: %cst = constant 8.000000e+00 : f32 + // CHECK-NEXT: affine.for %i0 = 0 to 10 { + // CHECK-NEXT: %1 = affine.apply #map2(%i0) + // CHECK-NEXT: affine.if #set0(%i0, %1) { + // CHECK-NEXT: %2 = addf %cst, %cst : f32 + // CHECK-NEXT: store %2, %0[%i0] : memref<10xf32> + // CHECK-NEXT: } + // CHECK-NEXT: } + // CHECK-NEXT: return + + return +} + + +func @nested_loops_with_common_and_uncommon_invariant_code() { + %m = alloc() : memref<10xf32> + %cf7 = constant 7.0 : f32 + %cf8 = constant 8.0 : f32 + + affine.for %i0 = 0 to 10 { + %v0 = addf %cf7, %cf8 : f32 + affine.for %i1 = 0 to 10 { + %v1 = addf %cf7, %cf7 : f32 + store %v0, %m[%i1] : memref<10xf32> + store %v0, %m[%i0] : memref<10xf32> + } + } + + // CHECK: %0 = alloc() : memref<10xf32> + // CHECK-NEXT: %cst = constant 7.000000e+00 : f32 + // CHECK-NEXT: %cst_0 = constant 8.000000e+00 : f32 + // CHECK-NEXT: %1 = addf %cst, %cst_0 : f32 + // CHECK-NEXT: %2 = addf %cst, %cst : f32 + // CHECK-NEXT: affine.for %i0 = 0 to 10 { + // CHECK-NEXT: store %1, %0[%i0] : memref<10xf32> + // CHECK-NEXT: affine.for %i1 = 0 to 10 { + // CHECK-NEXT: store %1, %0[%i1] : memref<10xf32> + // CHECK-NEXT: } + // CHECK-NEXT: } + // CHECK-NEXT: return + return +} + +func @invariant_affine_if() { + %m = alloc() : memref<10xf32> + %cf8 = constant 8.0 : f32 + affine.for %i0 = 0 to 10 { + affine.for %i1 = 0 to 10 { + affine.if (d0, d1) : (d1 - d0 >= 0) (%i0, %i0) { + %cf9 = addf %cf8, %cf8 : f32 + store %cf9, %m[%i0] : memref<10xf32> + + } + } + } + + // CHECK: %0 = alloc() : memref<10xf32> + // CHECK-NEXT: %cst = constant 8.000000e+00 : f32 + // CHECK-NEXT: affine.for %i0 = 0 to 10 { + // CHECK-NEXT: affine.if #set0(%i0, %i0) { + // CHECK-NEXT: %1 = addf %cst, %cst : f32 + // CHECK-NEXT: store %1, %0[%i0] : memref<10xf32> + // CHECK-NEXT: } + // CHECK-NEXT: } + // CHECK-NEXT: return + + return +} + +func @invariant_constant_and_load() { + %m = alloc() : memref<100xf32> + affine.for %i0 = 0 to 5 { + %c0 = constant 0 : index + %v = load %m[%c0] : memref<100xf32> + store %v, %m[%i0] : memref<100xf32> + } + + // CHECK: %0 = alloc() : memref<100xf32> + // CHECK-NEXT: %c0 = constant 0 : index + // CHECK-NEXT: %1 = load %0[%c0] : memref<100xf32> + // CHECK-NEXT: affine.for %i0 = 0 to 5 { + // CHECK-NEXT: store %1, %0[%i0] : memref<100xf32> + // CHECK-NEXT: } + // CHECK-NEXT: return + + return +} + |

