diff options
| -rw-r--r-- | mlir/include/mlir/IR/AffineMap.h | 3 | ||||
| -rw-r--r-- | mlir/include/mlir/Linalg/IR/LinalgLibraryOps.td | 1 | ||||
| -rw-r--r-- | mlir/include/mlir/Linalg/Utils/Utils.h | 5 | ||||
| -rw-r--r-- | mlir/lib/IR/AffineExpr.cpp | 2 | ||||
| -rw-r--r-- | mlir/lib/IR/AffineMap.cpp | 9 | ||||
| -rw-r--r-- | mlir/lib/Linalg/Transforms/Tiling.cpp | 164 | ||||
| -rw-r--r-- | mlir/lib/Linalg/Utils/Utils.cpp | 19 | ||||
| -rw-r--r-- | mlir/test/Linalg/tile.mlir | 126 | ||||
| -rw-r--r-- | mlir/test/Linalg/tile_conv.mlir | 39 | 
9 files changed, 243 insertions, 125 deletions
diff --git a/mlir/include/mlir/IR/AffineMap.h b/mlir/include/mlir/IR/AffineMap.h index a705daaab0e..a29db18ceb5 100644 --- a/mlir/include/mlir/IR/AffineMap.h +++ b/mlir/include/mlir/IR/AffineMap.h @@ -134,6 +134,9 @@ public:    /// Returns true if the AffineMap represents a symbol-less permutation map.    bool isPermutation(); +  /// Returns the map consisting of the `resultPos` subset. +  AffineMap getSubMap(ArrayRef<unsigned> resultPos); +    friend ::llvm::hash_code hash_value(AffineMap arg);  private: diff --git a/mlir/include/mlir/Linalg/IR/LinalgLibraryOps.td b/mlir/include/mlir/Linalg/IR/LinalgLibraryOps.td index 23eed3e054b..9dda06e230f 100644 --- a/mlir/include/mlir/Linalg/IR/LinalgLibraryOps.td +++ b/mlir/include/mlir/Linalg/IR/LinalgLibraryOps.td @@ -202,7 +202,6 @@ def ConvOp : LinalgLibrary_Op<"conv", [NInputsAndOutputs<2, 1>]> {    // TODO(ntv) padding.    // Following the TF source of truth above, strides and dilations are integer    // attributes of the same rank as the number of window dimensions. -  // This convention    let arguments = (ins View:$filter, View:$input, View:$output,                     OptionalAttr<I64ArrayAttr>:$strides,                     OptionalAttr<I64ArrayAttr>:$dilations); diff --git a/mlir/include/mlir/Linalg/Utils/Utils.h b/mlir/include/mlir/Linalg/Utils/Utils.h index acb743081e2..1c611c420f4 100644 --- a/mlir/include/mlir/Linalg/Utils/Utils.h +++ b/mlir/include/mlir/Linalg/Utils/Utils.h @@ -23,6 +23,8 @@  #include "mlir/Support/LLVM.h"  namespace mlir { +class AffineExpr; +class AffineMap;  class OperationFolder;  namespace edsc { @@ -37,6 +39,7 @@ public:    /// *only* way to capture the loop induction variable.    LoopRangeBuilder(ValueHandle *iv, ValueHandle range);    LoopRangeBuilder(ValueHandle *iv, Value *range); +  LoopRangeBuilder(ValueHandle *iv, linalg::SubViewOp::Range range);    LoopRangeBuilder(const LoopRangeBuilder &) = delete;    LoopRangeBuilder(LoopRangeBuilder &&) = default; @@ -59,6 +62,8 @@ public:                         llvm::ArrayRef<edsc::ValueHandle> ranges);    LoopNestRangeBuilder(llvm::ArrayRef<edsc::ValueHandle *> ivs,                         llvm::ArrayRef<Value *> ranges); +  LoopNestRangeBuilder(llvm::ArrayRef<edsc::ValueHandle *> ivs, +                       llvm::ArrayRef<linalg::SubViewOp::Range> ranges);    edsc::ValueHandle operator()(std::function<void(void)> fun = nullptr);  private: diff --git a/mlir/lib/IR/AffineExpr.cpp b/mlir/lib/IR/AffineExpr.cpp index 03dd4b81c45..55c3ebdacd9 100644 --- a/mlir/lib/IR/AffineExpr.cpp +++ b/mlir/lib/IR/AffineExpr.cpp @@ -559,7 +559,7 @@ AffineExpr AffineExpr::compose(AffineMap map) const {                                               map.getResults().end());    return replaceDimsAndSymbols(dimReplacements, {});  } -raw_ostream &operator<<(raw_ostream &os, AffineExpr &expr) { +raw_ostream &mlir::operator<<(raw_ostream &os, AffineExpr &expr) {    expr.print(os);    return os;  } diff --git a/mlir/lib/IR/AffineMap.cpp b/mlir/lib/IR/AffineMap.cpp index c0810891173..9adf1dfcecb 100644 --- a/mlir/lib/IR/AffineMap.cpp +++ b/mlir/lib/IR/AffineMap.cpp @@ -259,6 +259,15 @@ bool AffineMap::isPermutation() {    return isProjectedPermutation();  } +AffineMap AffineMap::getSubMap(ArrayRef<unsigned> resultPos) { +  SmallVector<AffineExpr, 4> exprs; +  exprs.reserve(resultPos.size()); +  for (auto idx : resultPos) { +    exprs.push_back(getResult(idx)); +  } +  return AffineMap::get(getNumDims(), getNumSymbols(), exprs); +} +  AffineMap mlir::simplifyAffineMap(AffineMap map) {    SmallVector<AffineExpr, 8> exprs;    for (auto e : map.getResults()) { diff --git a/mlir/lib/Linalg/Transforms/Tiling.cpp b/mlir/lib/Linalg/Transforms/Tiling.cpp index c2ad3e53fc2..6fdf5896171 100644 --- a/mlir/lib/Linalg/Transforms/Tiling.cpp +++ b/mlir/lib/Linalg/Transforms/Tiling.cpp @@ -21,6 +21,7 @@  #include "mlir/EDSC/Helpers.h"  #include "mlir/IR/AffineExpr.h" +#include "mlir/IR/AffineExprVisitor.h"  #include "mlir/IR/AffineMap.h"  #include "mlir/IR/OpImplementation.h"  #include "mlir/Linalg/IR/LinalgOps.h" @@ -66,7 +67,7 @@ static bool isZero(Value *v) {  // avoiding affine map manipulations.  // The returned ranges correspond to the loop ranges, in the proper order, that  // are tiled and for which new loops will be created. -static SmallVector<Value *, 4> +static SmallVector<SubViewOp::Range, 4>  makeTiledLoopRanges(OpBuilder &b, Location loc, AffineMap map,                      ArrayRef<Value *> allViewSizes,                      ArrayRef<Value *> allTileSizes, OperationFolder &folder) { @@ -84,11 +85,10 @@ makeTiledLoopRanges(OpBuilder &b, Location loc, AffineMap map,    }    // Create a new range with the applied tile sizes. -  SmallVector<Value *, 4> res; +  SmallVector<SubViewOp::Range, 4> res;    for (unsigned idx = 0, e = tileSizes.size(); idx < e; ++idx) { -    res.push_back(b.create<RangeOp>(loc, -                                    folder.create<ConstantIndexOp>(b, loc, 0), -                                    viewSizes[idx], tileSizes[idx])); +    res.push_back(SubViewOp::Range{constant_index(folder, 0), viewSizes[idx], +                                   tileSizes[idx]});    }    return res;  } @@ -105,26 +105,57 @@ static unsigned getPosInDomain(LinalgOp op, unsigned viewIndex, unsigned dim) {    return map.getResult(dim).cast<AffineDimExpr>().getPosition();  } -static bool isTiledView(LinalgOp linalgOp, unsigned viewIndex, -                        ArrayRef<Value *> tileSizes) { -  auto viewIteratorBegin = linalgOp.getInputsAndOutputs().begin(); -  Value *view = *(viewIteratorBegin + viewIndex); -  unsigned viewRank = view->getType().cast<ViewType>().getRank(); -  for (unsigned r = 0; r < viewRank; ++r) { -    // Loop position for the range dimension. -    auto pos = getPosInDomain(linalgOp, viewIndex, r); -    auto tileSize = tileSizes[pos]; -    if (!isZero(tileSize)) -      return true; +namespace { +// Helper visitor to determine whether an AffineExpr is tiled. +// This is achieved by traversing every AffineDimExpr with position `pos` and +// checking whether the corresponding `tileSizes[pos]` is non-zero. +// This also enforces only positive coefficients occur in multiplications. +// +// Example: +//   `d0 + 2 * d1 + d3` is tiled by [0, 0, 0, 2] but not by [0, 0, 2, 0] +// +struct TileCheck : public AffineExprVisitor<TileCheck> { +  TileCheck(ArrayRef<Value *> tileSizes) +      : isTiled(false), tileSizes(tileSizes) {} + +  void visitDimExpr(AffineDimExpr expr) { +    isTiled |= !isZero(tileSizes[expr.getPosition()]); +  } +  void visitAffineBinaryOpExpr(AffineBinaryOpExpr expr) { +    visit(expr.getLHS()); +    visit(expr.getRHS()); +    if (expr.getKind() == mlir::AffineExprKind::Mul) +      assert(expr.getRHS().cast<AffineConstantExpr>().getValue() > 0 && +             "nonpositive multipliying coefficient");    } +  bool isTiled; +  ArrayRef<Value *> tileSizes; +}; +} // namespace + +static bool isTiled(AffineExpr expr, ArrayRef<Value *> tileSizes) { +  if (!expr) +    return false; +  TileCheck t(tileSizes); +  t.visit(expr); +  return t.isTiled; +} + +// Checks whether the view with index `viewIndex` within `linalgOp` varies with +// respect to a non-zero `tileSize`. +static bool isTiled(AffineMap map, ArrayRef<Value *> tileSizes) { +  if (!map) +    return false; +  for (unsigned r = 0; r < map.getNumResults(); ++r) +    if (isTiled(map.getResult(r), tileSizes)) +      return true;    return false;  } -static SmallVector<Value *, 4> makeTiledViews(OpBuilder &b, Location loc, -                                              LinalgOp linalgOp, -                                              ArrayRef<Value *> ivs, -                                              ArrayRef<Value *> tileSizes, -                                              OperationFolder &folder) { +static SmallVector<Value *, 4> +makeTiledViews(OpBuilder &b, Location loc, LinalgOp linalgOp, +               ArrayRef<Value *> ivs, ArrayRef<Value *> tileSizes, +               ArrayRef<Value *> viewSizes, OperationFolder &folder) {    assert(ivs.size() == static_cast<size_t>(llvm::count_if(                             llvm::make_range(tileSizes.begin(), tileSizes.end()),                             [](Value *v) { return !isZero(v); })) && @@ -134,6 +165,20 @@ static SmallVector<Value *, 4> makeTiledViews(OpBuilder &b, Location loc,    using edsc::op::operator+;    using edsc::op::operator<; +  // Construct (potentially temporary) mins and maxes on which to apply maps +  // that define tile subviews. +  SmallVector<Value *, 8> mins, maxes; +  for (unsigned idx = 0, idxIvs = 0, e = tileSizes.size(); idx < e; ++idx) { +    if (isZero(tileSizes[idx])) { +      mins.push_back(constant_index(folder, 0)); +      maxes.push_back(viewSizes[idx]); +    } else { +      ValueHandle lb(ivs[idxIvs++]), step(tileSizes[idx]); +      mins.push_back(lb); +      maxes.push_back(lb + step); +    } +  } +    auto *op = linalgOp.getOperation();    SmallVector<Value *, 4> res; @@ -143,48 +188,41 @@ static SmallVector<Value *, 4> makeTiledViews(OpBuilder &b, Location loc,         ++viewIndex) {      Value *view = *(viewIteratorBegin + viewIndex);      unsigned viewRank = view->getType().cast<ViewType>().getRank(); -    // Early exit in the untiled case. -    if (!isTiledView(linalgOp, viewIndex, tileSizes)) { +    auto map = loopToOperandRangesMaps(linalgOp)[viewIndex]; +    // If the view is not tiled, we can use it as is. +    if (!isTiled(map, tileSizes)) {        res.push_back(view);        continue;      } -    // If not a scalar, then construct a new subview for the tile. +    // Construct a new subview for the tile.      SmallVector<SubViewOp::Range, 4> subViewOperands;      subViewOperands.reserve(viewRank * 3);      for (unsigned r = 0; r < viewRank; ++r) { -      // Loop position for the range dimension. -      auto pos = getPosInDomain(linalgOp, viewIndex, r); -      auto tileSize = tileSizes[pos]; -      if (isZero(tileSize)) { -        subViewOperands.push_back( -            SubViewOp::Range{folder.create<ConstantIndexOp>(b, loc, 0), -                             linalg::intrinsics::dim(view, r), -                             folder.create<ConstantIndexOp>(b, loc, 1)}); +      if (!isTiled(map.getSubMap({r}), tileSizes)) { +        subViewOperands.push_back(SubViewOp::Range{ +            constant_index(folder, 0), linalg::intrinsics::dim(view, r), +            constant_index(folder, 1)});          continue;        } -      // `tileSizes` of `0` don't have an induction variable counterpart. So -      // we count the number of zeros to align the index in `ivs` to pos. -      auto count = llvm::count_if( -          llvm::make_range(tileSizes.begin(), tileSizes.begin() + pos), -          [](Value *v) { return isZero(v); }); -      auto iv = ivs[pos - count]; - -      ScopedContext scope(b, loc); -      // TODO(ntv): lb = iv is a poor man's folding of max(0, i) == i which is -      // generally wrong but correct in the specific case of tiling linalg ops. -      // Tie this loose end in the future. -      ValueHandle lb(iv); -      ValueHandle step(tileSize); -      ValueHandle steppedLb = lb + step; +      auto m = map.getSubMap({r}); +      auto *min = applyMapToValues(b, loc, m, mins, folder).front(); +      auto *max = applyMapToValues(b, loc, m, maxes, folder).front();        // Tiling creates a new slice at the proper index, the slice step is 1        // (i.e. the slice view does not subsample, stepping occurs in the loop). -      subViewOperands.push_back(SubViewOp::Range{ -          iv, steppedLb, folder.create<ConstantIndexOp>(b, loc, 1)}); +      subViewOperands.push_back( +          SubViewOp::Range{min, max, constant_index(folder, 1)});      }      res.push_back(b.create<SubViewOp>(loc, view, subViewOperands));    } + +  // Traverse the mins/maxes and erase those that don't have uses left. +  mins.append(maxes.begin(), maxes.end()); +  for (auto *v : mins) +    if (v->use_empty()) +      v->getDefiningOp()->erase(); +    return res;  } @@ -340,12 +378,14 @@ mlir::linalg::tileLinalgOp(LinalgOp op, ArrayRef<Value *> tileSizes,    OpBuilder builder(op.getOperation());    ScopedContext scope(builder, op.getLoc());    // 2. Build the tiled loop ranges. -  auto loopRanges = makeTiledLoopRanges( -      scope.getBuilder(), scope.getLocation(), -      // The flattened loopToOperandRangesMaps is expected to be an invertible -      // permutation map (which is asserted in the inverse calculation). -      inversePermutation(concatAffineMaps(loopToOperandRangesMaps(op))), -      getViewSizes(op), tileSizes, folder); +  auto viewSizes = getViewSizes(op); +  // The flattened loopToOperandRangesMaps is expected to be an invertible +  // permutation map (asserted in the inverse calculation). +  auto viewSizesToLoopsMap = +      inversePermutation(concatAffineMaps(loopToOperandRangesMaps(op))); +  auto loopRanges = +      makeTiledLoopRanges(scope.getBuilder(), scope.getLocation(), +                          viewSizesToLoopsMap, viewSizes, tileSizes, folder);    // 3. Create the tiled loops.    LinalgOp res = op; @@ -355,10 +395,10 @@ mlir::linalg::tileLinalgOp(LinalgOp op, ArrayRef<Value *> tileSizes,      auto b = ScopedContext::getBuilder();      auto loc = ScopedContext::getLocation();      SmallVector<Value *, 4> ivValues(ivs.begin(), ivs.end()); -    // If/when the assertion below becomes false, we will have to templatize -    // `makeTiledViews`. +    // If/when the assertion below becomes false, templatize `makeTiledViews`.      assert(op.getNumInputsAndOutputs() == op.getOperation()->getNumOperands()); -    auto views = makeTiledViews(b, loc, op, ivValues, tileSizes, folder); +    auto views = +        makeTiledViews(b, loc, op, ivValues, tileSizes, viewSizes, folder);      // If no promotion, we are done.      auto promote = !viewsToPromote.empty() && @@ -412,7 +452,7 @@ mlir::linalg::tileLinalgOp(LinalgOp op, ArrayRef<Value *> tileSizes,        buffer_dealloc(pi.buffer);    }); -  // 7. Gather the newly created loops and return them with the new op. +  // 8. Gather the newly created loops and return them with the new op.    SmallVector<ForOp, 8> loops;    loops.reserve(ivs.size());    for (auto iv : ivs) @@ -469,6 +509,12 @@ static void tileLinalgOps(Function f, ArrayRef<int64_t> tileSizes,      if (opLoopsPair)        op.erase();    }); +  f.walk<LinalgOp>([](LinalgOp op) { +    if (!op.getOperation()->hasNoSideEffect()) +      return; +    if (op.getOperation()->use_empty()) +      op.erase(); +  });  }  namespace { @@ -499,8 +545,6 @@ LinalgTilingPass::LinalgTilingPass(ArrayRef<int64_t> sizes, bool promoteViews) {  LinalgTilingPassCLI::LinalgTilingPassCLI() : LinalgTilingPass() {    this->tileSizes.assign(clTileSizes.begin(), clTileSizes.end());    this->promoteViews = clPromoteFullTileViews; -  llvm::errs() << "\nAAAA: " << this->promoteViews << " " -               << clPromoteFullTileViews;  }  FunctionPassBase * diff --git a/mlir/lib/Linalg/Utils/Utils.cpp b/mlir/lib/Linalg/Utils/Utils.cpp index f00e7d8206c..da92e60fb5e 100644 --- a/mlir/lib/Linalg/Utils/Utils.cpp +++ b/mlir/lib/Linalg/Utils/Utils.cpp @@ -53,6 +53,15 @@ mlir::edsc::LoopRangeBuilder::LoopRangeBuilder(ValueHandle *iv,    enter(body, /*prev=*/1);  } +mlir::edsc::LoopRangeBuilder::LoopRangeBuilder(ValueHandle *iv, +                                               SubViewOp::Range range) { +  auto forOp = OperationHandle::createOp<linalg::ForOp>(range.min, range.max, +                                                        range.step); +  *iv = ValueHandle(forOp.getInductionVar()); +  auto *body = forOp.getBody(); +  enter(body, /*prev=*/1); +} +  ValueHandle  mlir::edsc::LoopRangeBuilder::operator()(std::function<void(void)> fun) {    if (fun) @@ -62,6 +71,15 @@ mlir::edsc::LoopRangeBuilder::operator()(std::function<void(void)> fun) {  }  mlir::edsc::LoopNestRangeBuilder::LoopNestRangeBuilder( +    ArrayRef<ValueHandle *> ivs, ArrayRef<SubViewOp::Range> ranges) { +  loops.reserve(ranges.size()); +  for (unsigned i = 0, e = ranges.size(); i < e; ++i) { +    loops.emplace_back(ivs[i], ranges[i]); +  } +  assert(loops.size() == ivs.size() && "Mismatch loops vs ivs size"); +} + +mlir::edsc::LoopNestRangeBuilder::LoopNestRangeBuilder(      ArrayRef<ValueHandle *> ivs, ArrayRef<ValueHandle> ranges) {    loops.reserve(ranges.size());    for (unsigned i = 0, e = ranges.size(); i < e; ++i) { @@ -101,6 +119,7 @@ static Value *emitOrFoldComposedAffineApply(OpBuilder &b, Location loc,                                              OperationFolder &state) {    SmallVector<Value *, 4> operands(operandsRef.begin(), operandsRef.end());    fullyComposeAffineMapAndOperands(&map, &operands); +  canonicalizeMapAndOperands(&map, &operands);    return state.create<AffineApplyOp>(b, loc, map, operands);  } diff --git a/mlir/test/Linalg/tile.mlir b/mlir/test/Linalg/tile.mlir index 27f5b705b82..c996ada46a7 100644 --- a/mlir/test/Linalg/tile.mlir +++ b/mlir/test/Linalg/tile.mlir @@ -17,54 +17,54 @@ func @matmul(%arg0: !linalg.view<?x?xf32>, %arg1: !linalg.view<?x?xf32>, %arg2:  // TILE-2-LABEL: func @matmul(%arg0: !linalg.view<?x?xf32>, %arg1: !linalg.view<?x?xf32>, %arg2: !linalg.view<?x?xf32>) {  //       TILE-2: %[[M:.*]] = linalg.dim %arg0, 0 : !linalg.view<?x?xf32>  //       TILE-2: linalg.for %i0 = %c0{{.*}} to %[[M]] step %c2 { -//  TILE-2-NEXT:   %[[a:.*]] = affine.apply #[[UB0]](%i0) -//  TILE-2-NEXT:   %[[K:.*]] = linalg.dim %arg0, 1 : !linalg.view<?x?xf32> -//  TILE-2-NEXT:   %[[sAi:.*]] = linalg.subview %arg0[%i0, %[[a]], %c1, %c0, %[[K]], %c1] : !linalg.view<?x?xf32> -//  TILE-2-NEXT:   %[[c:.*]] = affine.apply #[[UB0]](%i0) -//  TILE-2-NEXT:   %[[N:.*]] = linalg.dim %arg2, 1 : !linalg.view<?x?xf32> -//  TILE-2-NEXT:   %[[sCi:.*]] = linalg.subview %arg2[%i0, %[[c]], %c1, %c0, %[[N]], %c1] : !linalg.view<?x?xf32> -//  TILE-2-NEXT:   linalg.matmul(%[[sAi]], %arg1, %[[sCi]]) : !linalg.view<?x?xf32>, !linalg.view<?x?xf32>, !linalg.view<?x?xf32> +//       TILE-2:   %[[a:.*]] = affine.apply #[[UB0]](%i0) +//       TILE-2:   %[[K:.*]] = linalg.dim %arg0, 1 : !linalg.view<?x?xf32> +//       TILE-2:   %[[sAi:.*]] = linalg.subview %arg0[%i0, %[[a]], %c1, %c0, %[[K]], %c1] : !linalg.view<?x?xf32> +//       TILE-2:   %[[c:.*]] = affine.apply #[[UB0]](%i0) +//       TILE-2:   %[[N:.*]] = linalg.dim %arg2, 1 : !linalg.view<?x?xf32> +//       TILE-2:   %[[sCi:.*]] = linalg.subview %arg2[%i0, %[[c]], %c1, %c0, %[[N]], %c1] : !linalg.view<?x?xf32> +//       TILE-2:   linalg.matmul(%[[sAi]], %arg1, %[[sCi]]) : !linalg.view<?x?xf32>, !linalg.view<?x?xf32>, !linalg.view<?x?xf32>  // TILE-02-LABEL: func @matmul(%arg0: !linalg.view<?x?xf32>, %arg1: !linalg.view<?x?xf32>, %arg2: !linalg.view<?x?xf32>) {  //       TILE-02: %[[N:.*]] = linalg.dim %arg1, 1 : !linalg.view<?x?xf32>  //       TILE-02: linalg.for %i0 = %c0 to %[[N]] step %c2 { -//  TILE-02-NEXT:   %[[K:.*]] = linalg.dim %arg1, 0 : !linalg.view<?x?xf32> -//  TILE-02-NEXT:   %[[b:.*]] = affine.apply #[[UB0]](%i0) -//  TILE-02-NEXT:   %[[sBj:.*]] = linalg.subview %arg1[%c0, %[[K]], %c1, %i0, %[[b]], %c1] : !linalg.view<?x?xf32> -//  TILE-02-NEXT:   %[[M:.*]] = linalg.dim %arg2, 0 : !linalg.view<?x?xf32> -//  TILE-02-NEXT:   %[[c:.*]] = affine.apply #[[UB0]](%i0) -//  TILE-02-NEXT:   %[[sCj:.*]] = linalg.subview %arg2[%c0, %[[M]], %c1, %i0, %[[c]], %c1] : !linalg.view<?x?xf32> -//  TILE-02-NEXT:   linalg.matmul(%arg0, %[[sBj]], %[[sCj]]) : !linalg.view<?x?xf32>, !linalg.view<?x?xf32>, !linalg.view<?x?xf32> +//       TILE-02:   %[[K:.*]] = linalg.dim %arg1, 0 : !linalg.view<?x?xf32> +//       TILE-02:   %[[b:.*]] = affine.apply #[[UB0]](%i0) +//       TILE-02:   %[[sBj:.*]] = linalg.subview %arg1[%c0, %[[K]], %c1, %i0, %[[b]], %c1] : !linalg.view<?x?xf32> +//       TILE-02:   %[[M:.*]] = linalg.dim %arg2, 0 : !linalg.view<?x?xf32> +//       TILE-02:   %[[c:.*]] = affine.apply #[[UB0]](%i0) +//       TILE-02:   %[[sCj:.*]] = linalg.subview %arg2[%c0, %[[M]], %c1, %i0, %[[c]], %c1] : !linalg.view<?x?xf32> +//       TILE-02:   linalg.matmul(%arg0, %[[sBj]], %[[sCj]]) : !linalg.view<?x?xf32>, !linalg.view<?x?xf32>, !linalg.view<?x?xf32>  // TILE-002-LABEL: func @matmul(%arg0: !linalg.view<?x?xf32>, %arg1: !linalg.view<?x?xf32>, %arg2: !linalg.view<?x?xf32>) {  //       TILE-002: %[[K:.*]] = linalg.dim %arg0, 1 : !linalg.view<?x?xf32>  //       TILE-002: linalg.for %i0 = %c0{{.*}} to %[[K]] step %c2 { -//  TILE-002-NEXT:   %[[M:.*]] = linalg.dim %arg0, 0 : !linalg.view<?x?xf32> -//  TILE-002-NEXT:   %[[a:.*]] = affine.apply #[[UB0]](%i0) -//  TILE-002-NEXT:   %[[sAj:.*]] = linalg.subview %arg0[%c0, %[[M]], %c1, %i0, %[[a]], %c1] : !linalg.view<?x?xf32> -//  TILE-002-NEXT:   %[[b:.*]] = affine.apply #[[UB0]](%i0) -//  TILE-002-NEXT:   %[[N:.*]] = linalg.dim %arg1, 1 : !linalg.view<?x?xf32> -//  TILE-002-NEXT:   %[[sBj:.*]] = linalg.subview %arg1[%i0, %[[b]], %c1, %c0, %[[N]], %c1] : !linalg.view<?x?xf32> -//  TILE-002-NEXT:   linalg.matmul(%[[sAj]], %[[sBj]], %arg2) : !linalg.view<?x?xf32>, !linalg.view<?x?xf32>, !linalg.view<?x?xf32> +//       TILE-002:   %[[M:.*]] = linalg.dim %arg0, 0 : !linalg.view<?x?xf32> +//       TILE-002:   %[[a:.*]] = affine.apply #[[UB0]](%i0) +//       TILE-002:   %[[sAj:.*]] = linalg.subview %arg0[%c0, %[[M]], %c1, %i0, %[[a]], %c1] : !linalg.view<?x?xf32> +//       TILE-002:   %[[b:.*]] = affine.apply #[[UB0]](%i0) +//       TILE-002:   %[[N:.*]] = linalg.dim %arg1, 1 : !linalg.view<?x?xf32> +//       TILE-002:   %[[sBj:.*]] = linalg.subview %arg1[%i0, %[[b]], %c1, %c0, %[[N]], %c1] : !linalg.view<?x?xf32> +//       TILE-002:   linalg.matmul(%[[sAj]], %[[sBj]], %arg2) : !linalg.view<?x?xf32>, !linalg.view<?x?xf32>, !linalg.view<?x?xf32>  // TILE-234-LABEL: func @matmul(%arg0: !linalg.view<?x?xf32>, %arg1: !linalg.view<?x?xf32>, %arg2: !linalg.view<?x?xf32>) {  //       TILE-234: %[[M:.*]] = linalg.dim %arg0, 0 : !linalg.view<?x?xf32>  //       TILE-234: %[[K:.*]] = linalg.dim %arg0, 1 : !linalg.view<?x?xf32>  //       TILE-234: %[[N:.*]] = linalg.dim %arg1, 1 : !linalg.view<?x?xf32>  //       TILE-234:  linalg.for %i0 = %c0{{.*}} to %[[M]] step %c2 { -//  TILE-234-NEXT:    linalg.for %i1 = %c0{{.*}} to %[[N]] step %c3 { -//  TILE-234-NEXT:      linalg.for %i2 = %c0{{.*}} to %[[K]] step %c4 { -//  TILE-234-NEXT:        %[[ai:.*]] = affine.apply #[[UB0]](%i0) -//  TILE-234-NEXT:        %[[ak:.*]] = affine.apply #[[UB2]](%i2) -//  TILE-234-NEXT:        %[[sAik:.*]] = linalg.subview %arg0[%i0, %[[ai]], %c1, %i2, %[[ak]], %c1] : !linalg.view<?x?xf32> -//  TILE-234-NEXT:        %[[bk:.*]] = affine.apply #[[UB2]](%i2) -//  TILE-234-NEXT:        %[[bj:.*]] = affine.apply #[[UB1]](%i1) -//  TILE-234-NEXT:        %[[sBkj:.*]] = linalg.subview %arg1[%i2, %[[bk]], %c1, %i1, %[[bj]], %c1] : !linalg.view<?x?xf32> -//  TILE-234-NEXT:        %[[ci:.*]] = affine.apply #[[UB0]](%i0) -//  TILE-234-NEXT:        %[[cj:.*]] = affine.apply #[[UB1]](%i1) -//  TILE-234-NEXT:        %[[sCij:.*]] = linalg.subview %arg2[%i0, %[[ci]], %c1, %i1, %[[cj]], %c1] : !linalg.view<?x?xf32> +//       TILE-234:    linalg.for %i1 = %c0{{.*}} to %[[N]] step %c3 { +//       TILE-234:      linalg.for %i2 = %c0{{.*}} to %[[K]] step %c4 { +//       TILE-234:        %[[ai:.*]] = affine.apply #[[UB0]](%i0) +//       TILE-234:        %[[ak:.*]] = affine.apply #[[UB2]](%i2) +//       TILE-234:        %[[sAik:.*]] = linalg.subview %arg0[%i0, %[[ai]], %c1, %i2, %[[ak]], %c1] : !linalg.view<?x?xf32> +//       TILE-234:        %[[bk:.*]] = affine.apply #[[UB2]](%i2) +//       TILE-234:        %[[bj:.*]] = affine.apply #[[UB1]](%i1) +//       TILE-234:        %[[sBkj:.*]] = linalg.subview %arg1[%i2, %[[bk]], %c1, %i1, %[[bj]], %c1] : !linalg.view<?x?xf32> +//       TILE-234:        %[[ci:.*]] = affine.apply #[[UB0]](%i0) +//       TILE-234:        %[[cj:.*]] = affine.apply #[[UB1]](%i1) +//       TILE-234:        %[[sCij:.*]] = linalg.subview %arg2[%i0, %[[ci]], %c1, %i1, %[[cj]], %c1] : !linalg.view<?x?xf32>  // -//  TILE-234-NEXT:        linalg.matmul(%[[sAik]], %[[sBkj]], %[[sCij]]) : !linalg.view<?x?xf32>, !linalg.view<?x?xf32>, !linalg.view<?x?xf32> +//       TILE-234:        linalg.matmul(%[[sAik]], %[[sBkj]], %[[sCij]]) : !linalg.view<?x?xf32>, !linalg.view<?x?xf32>, !linalg.view<?x?xf32>  func @matvec(%arg0: !linalg.view<?x?xf32>, %arg1: !linalg.view<?xf32>, %arg2: !linalg.view<?xf32>) {    linalg.matvec(%arg0, %arg1, %arg2) : !linalg.view<?x?xf32>, !linalg.view<?xf32>, !linalg.view<?xf32> @@ -73,21 +73,21 @@ func @matvec(%arg0: !linalg.view<?x?xf32>, %arg1: !linalg.view<?xf32>, %arg2: !l  // TILE-2-LABEL: func @matvec(%arg0: !linalg.view<?x?xf32>, %arg1: !linalg.view<?xf32>, %arg2: !linalg.view<?xf32>) {  //       TILE-2: %[[M:.*]] = linalg.dim %arg0, 0 : !linalg.view<?x?xf32>  //       TILE-2: linalg.for %i0 = %c0{{.*}} to %[[M]] step %c2 { -//  TILE-2-NEXT:   %[[a:.*]] = affine.apply #[[UB0]](%i0) -//  TILE-2-NEXT:   %[[N:.*]] = linalg.dim %arg0, 1 : !linalg.view<?x?xf32> -//  TILE-2-NEXT:   %[[sAi:.*]] = linalg.subview %arg0[%i0, %[[a]], %c1, %c0, %[[N]], %c1] : !linalg.view<?x?xf32> -//  TILE-2-NEXT:   %[[c:.*]] = affine.apply #[[UB0]](%i0) -//  TILE-2-NEXT:   %[[sCi:.*]] = linalg.subview %arg2[%i0, %[[c]], %c1] : !linalg.view<?xf32> -//  TILE-2-NEXT:   linalg.matvec(%[[sAi]], %arg1, %[[sCi]]) : !linalg.view<?x?xf32>, !linalg.view<?xf32>, !linalg.view<?xf32> +//       TILE-2:   %[[a:.*]] = affine.apply #[[UB0]](%i0) +//       TILE-2:   %[[N:.*]] = linalg.dim %arg0, 1 : !linalg.view<?x?xf32> +//       TILE-2:   %[[sAi:.*]] = linalg.subview %arg0[%i0, %[[a]], %c1, %c0, %[[N]], %c1] : !linalg.view<?x?xf32> +//       TILE-2:   %[[c:.*]] = affine.apply #[[UB0]](%i0) +//       TILE-2:   %[[sCi:.*]] = linalg.subview %arg2[%i0, %[[c]], %c1] : !linalg.view<?xf32> +//       TILE-2:   linalg.matvec(%[[sAi]], %arg1, %[[sCi]]) : !linalg.view<?x?xf32>, !linalg.view<?xf32>, !linalg.view<?xf32>  // TILE-02-LABEL: func @matvec(%arg0: !linalg.view<?x?xf32>, %arg1: !linalg.view<?xf32>, %arg2: !linalg.view<?xf32>) {  //       TILE-02: %[[K:.*]] = linalg.dim %arg0, 1 : !linalg.view<?x?xf32>  //       TILE-02: linalg.for %i0 = %c0{{.*}} to %[[K]] step %c2 { -//  TILE-02-NEXT:   %[[M:.*]] = linalg.dim %arg0, 0 : !linalg.view<?x?xf32> -//  TILE-02-NEXT:   %[[a:.*]] = affine.apply #[[UB0]](%i0) -//  TILE-02-NEXT:   %[[sAj:.*]] = linalg.subview %arg0[%c0, %[[M]], %c1, %i0, %[[a]], %c1] : !linalg.view<?x?xf32> -//  TILE-02-NEXT:   %[[b:.*]] = affine.apply #[[UB0]](%i0) -//  TILE-02-NEXT:   %[[sBj:.*]] = linalg.subview %arg1[%i0, %[[b]], %c1] : !linalg.view<?xf32> +//       TILE-02:   %[[M:.*]] = linalg.dim %arg0, 0 : !linalg.view<?x?xf32> +//       TILE-02:   %[[a:.*]] = affine.apply #[[UB0]](%i0) +//       TILE-02:   %[[sAj:.*]] = linalg.subview %arg0[%c0, %[[M]], %c1, %i0, %[[a]], %c1] : !linalg.view<?x?xf32> +//       TILE-02:   %[[b:.*]] = affine.apply #[[UB0]](%i0) +//       TILE-02:   %[[sBj:.*]] = linalg.subview %arg1[%i0, %[[b]], %c1] : !linalg.view<?xf32>  //       TILE-02:   linalg.matvec(%[[sAj]], %[[sBj]], %arg2) : !linalg.view<?x?xf32>, !linalg.view<?xf32>, !linalg.view<?xf32>  // TILE-002-LABEL: func @matvec(%arg0: !linalg.view<?x?xf32>, %arg1: !linalg.view<?xf32>, %arg2: !linalg.view<?xf32>) { @@ -97,16 +97,16 @@ func @matvec(%arg0: !linalg.view<?x?xf32>, %arg1: !linalg.view<?xf32>, %arg2: !l  //       TILE-234: %[[M:.*]] = linalg.dim %arg0, 0 : !linalg.view<?x?xf32>  //       TILE-234: %[[K:.*]] = linalg.dim %arg0, 1 : !linalg.view<?x?xf32>  //       TILE-234:  linalg.for %i0 = %c0{{.*}} to %[[M]] step %c2 { -//  TILE-234-NEXT:    linalg.for %i1 = %c0{{.*}} to %[[K]] step %c3 { -//  TILE-234-NEXT:      %[[ai:.*]] = affine.apply #[[UB0]](%i0) -//  TILE-234-NEXT:      %[[aj:.*]] = affine.apply #[[UB1]](%i1) -//  TILE-234-NEXT:      %[[sAij:.*]] = linalg.subview %arg0[%i0, %[[ai]], %c1, %i1, %[[aj]], %c1] : !linalg.view<?x?xf32> -//  TILE-234-NEXT:      %[[bj:.*]] = affine.apply #[[UB1]](%i1) -//  TILE-234-NEXT:      %[[sBj:.*]] = linalg.subview %arg1[%i1, %[[bj]], %c1] : !linalg.view<?xf32> -//  TILE-234-NEXT:      %[[ci:.*]] = affine.apply #[[UB0]](%i0) -//  TILE-234-NEXT:      %[[sCi:.*]] = linalg.subview %arg2[%i0, %[[ci]], %c1] : !linalg.view<?xf32> +//       TILE-234:    linalg.for %i1 = %c0{{.*}} to %[[K]] step %c3 { +//       TILE-234:      %[[ai:.*]] = affine.apply #[[UB0]](%i0) +//       TILE-234:      %[[aj:.*]] = affine.apply #[[UB1]](%i1) +//       TILE-234:      %[[sAij:.*]] = linalg.subview %arg0[%i0, %[[ai]], %c1, %i1, %[[aj]], %c1] : !linalg.view<?x?xf32> +//       TILE-234:      %[[bj:.*]] = affine.apply #[[UB1]](%i1) +//       TILE-234:      %[[sBj:.*]] = linalg.subview %arg1[%i1, %[[bj]], %c1] : !linalg.view<?xf32> +//       TILE-234:      %[[ci:.*]] = affine.apply #[[UB0]](%i0) +//       TILE-234:      %[[sCi:.*]] = linalg.subview %arg2[%i0, %[[ci]], %c1] : !linalg.view<?xf32>  // -//  TILE-234-NEXT:      linalg.matvec(%[[sAij]], %[[sBj]], %[[sCi]]) : !linalg.view<?x?xf32>, !linalg.view<?xf32>, !linalg.view<?xf32> +//       TILE-234:      linalg.matvec(%[[sAij]], %[[sBj]], %[[sCi]]) : !linalg.view<?x?xf32>, !linalg.view<?xf32>, !linalg.view<?xf32>  func @dot(%arg0: !linalg.view<?xf32>, %arg1: !linalg.view<?xf32>, %arg2: !linalg.view<f32>) {    linalg.dot(%arg0, %arg1, %arg2) : !linalg.view<?xf32>, !linalg.view<?xf32>, !linalg.view<f32> @@ -115,11 +115,11 @@ func @dot(%arg0: !linalg.view<?xf32>, %arg1: !linalg.view<?xf32>, %arg2: !linalg  // TILE-2-LABEL: func @dot(%arg0: !linalg.view<?xf32>, %arg1: !linalg.view<?xf32>, %arg2: !linalg.view<f32>) {  //       TILE-2: %[[M:.*]] = linalg.dim %arg0, 0 : !linalg.view<?xf32>  //       TILE-2: linalg.for %i0 = %c0{{.*}} to %[[M]] step %c2 { -//  TILE-2-NEXT:   %[[a:.*]] = affine.apply #[[UB0]](%i0) -//  TILE-2-NEXT:   %[[sAi:.*]] = linalg.subview %arg0[%i0, %[[a]], %c1] : !linalg.view<?xf32> -//  TILE-2-NEXT:   %[[b:.*]] = affine.apply #[[UB0]](%i0) -//  TILE-2-NEXT:   %[[sBi:.*]] = linalg.subview %arg1[%i0, %[[b]], %c1] : !linalg.view<?xf32> -//  TILE-2-NEXT:   linalg.dot(%[[sAi]], %[[sBi]], {{.*}}) : !linalg.view<?xf32>, !linalg.view<?xf32>, !linalg.view<f32> +//       TILE-2:   %[[a:.*]] = affine.apply #[[UB0]](%i0) +//       TILE-2:   %[[sAi:.*]] = linalg.subview %arg0[%i0, %[[a]], %c1] : !linalg.view<?xf32> +//       TILE-2:   %[[b:.*]] = affine.apply #[[UB0]](%i0) +//       TILE-2:   %[[sBi:.*]] = linalg.subview %arg1[%i0, %[[b]], %c1] : !linalg.view<?xf32> +//       TILE-2:   linalg.dot(%[[sAi]], %[[sBi]], {{.*}}) : !linalg.view<?xf32>, !linalg.view<?xf32>, !linalg.view<f32>  // TILE-02-LABEL: func @dot(%arg0: !linalg.view<?xf32>, %arg1: !linalg.view<?xf32>, %arg2: !linalg.view<f32>) {  //   TILE-02-NOT: linalg.for @@ -130,8 +130,8 @@ func @dot(%arg0: !linalg.view<?xf32>, %arg1: !linalg.view<?xf32>, %arg2: !linalg  // TILE-234-LABEL: func @dot(%arg0: !linalg.view<?xf32>, %arg1: !linalg.view<?xf32>, %arg2: !linalg.view<f32>) {  //       TILE-234: %[[K:.*]] = linalg.dim %arg0, 0 : !linalg.view<?xf32>  //       TILE-234:  linalg.for %i0 = %c0{{.*}} to %[[K]] step %c2 { -//  TILE-234-NEXT:    %[[a:.*]] = affine.apply #[[UB0]](%i0) -//  TILE-234-NEXT:    %[[sAi:.*]] = linalg.subview %arg0[%i0, %[[a]], %c1] : !linalg.view<?xf32> -//  TILE-234-NEXT:    %[[b:.*]] = affine.apply #[[UB0]](%i0) -//  TILE-234-NEXT:    %[[sBi:.*]] = linalg.subview %arg1[%i0, %[[b]], %c1] : !linalg.view<?xf32> -//  TILE-234-NEXT:    linalg.dot(%[[sAi]], %[[sBi]], %arg2) : !linalg.view<?xf32>, !linalg.view<?xf32>, !linalg.view<f32> +//       TILE-234:    %[[a:.*]] = affine.apply #[[UB0]](%i0) +//       TILE-234:    %[[sAi:.*]] = linalg.subview %arg0[%i0, %[[a]], %c1] : !linalg.view<?xf32> +//       TILE-234:    %[[b:.*]] = affine.apply #[[UB0]](%i0) +//       TILE-234:    %[[sBi:.*]] = linalg.subview %arg1[%i0, %[[b]], %c1] : !linalg.view<?xf32> +//       TILE-234:    linalg.dot(%[[sAi]], %[[sBi]], %arg2) : !linalg.view<?xf32>, !linalg.view<?xf32>, !linalg.view<f32> diff --git a/mlir/test/Linalg/tile_conv.mlir b/mlir/test/Linalg/tile_conv.mlir new file mode 100644 index 00000000000..5055cd3b8a1 --- /dev/null +++ b/mlir/test/Linalg/tile_conv.mlir @@ -0,0 +1,39 @@ +// RUN: mlir-opt %s -linalg-tile -linalg-tile-sizes=2,3,0,0,4 | FileCheck %s -check-prefix=TILE-23004 + +// TILE-23004-DAG: #[[UB0:.*]] = (d0) -> (d0 + 2) +// TILE-23004-DAG: #[[UB1:.*]] = (d0) -> (d0 + 3) +// TILE-23004-DAG: #[[UB2:.*]] = (d0) -> (d0 + 4) +// TILE-23004-DAG: #[[D0x30pS0x10:.*]] = (d0)[s0] -> (d0 * 30 + s0 * 10) +// TILE-23004-DAG: #[[D0x30pS0x10p90:.*]] = (d0)[s0] -> (d0 * 30 + s0 * 10 + 90) +func @conv(%arg0: !linalg.view<?x?x?x?xf32>, %arg1: !linalg.view<?x?x?x?xf32>, %arg2: !linalg.view<?x?x?x?xf32>) { +  linalg.conv(%arg0, %arg1, %arg2) {dilations = [10, 20], strides = [30, 40]} : !linalg.view<?x?x?x?xf32>, !linalg.view<?x?x?x?xf32>, !linalg.view<?x?x?x?xf32> +  return +} +// TILE-23004-LABEL: func @conv(%arg0: !linalg.view<?x?x?x?xf32>, %arg1: !linalg.view<?x?x?x?xf32>, %arg2: !linalg.view<?x?x?x?xf32>) { +//       TILE-23004:  %[[Q:.*]] = linalg.dim %arg0, 2 : !linalg.view<?x?x?x?xf32> +//       TILE-23004:  %[[B:.*]] = linalg.dim %arg1, 0 : !linalg.view<?x?x?x?xf32> +//       TILE-23004: %[[PaddedInput0:.*]] = linalg.dim %arg1, 1 : !linalg.view<?x?x?x?xf32> +//       TILE-23004: %[[X0:.*]] = linalg.dim %arg2, 1 : !linalg.view<?x?x?x?xf32> +//       TILE-23004: linalg.for %i0 = %c0 to %[[B]] step %c2 { +//       TILE-23004:   linalg.for %i1 = %c0 to %[[X0]] step %c3 { +//       TILE-23004:     linalg.for %i2 = %c0 to %[[Q]] step %c4 { +//       TILE-23004:       %[[Z0:.*]] = linalg.dim %arg0, 0 : !linalg.view<?x?x?x?xf32> +//       TILE-23004:       %[[Z1:.*]] = linalg.dim %arg0, 1 : !linalg.view<?x?x?x?xf32> +//       TILE-23004:       %[[I2p4:.*]] = affine.apply #[[UB2]](%i2) +//       TILE-23004:       %[[K:.*]] = linalg.dim %arg0, 3 : !linalg.view<?x?x?x?xf32> +//       TILE-23004:       %[[FilterView:.*]] = linalg.subview %arg0[%c0, %[[Z0]], %c1, %c0, %[[Z1]], %c1, %i2, %[[I2p4]], %c1, %c0, %[[K]], %c1] : !linalg.view<?x?x?x?xf32> +// +//       TILE-23004:       %[[I0p3:.*]] = affine.apply #[[UB0]](%i0) +//       TILE-23004:       %[[I1:.*]] = affine.apply #[[D0x30pS0x10]](%i1)[%c0] +//       TILE-23004:       %[[I1pStep:.*]] = affine.apply #[[D0x30pS0x10p90]](%i1)[%[[PaddedInput0]]] +//       TILE-23004:       %[[SZ2:.*]] = linalg.dim %arg1, 2 : !linalg.view<?x?x?x?xf32> +//       TILE-23004:       %[[I2p2:.*]] = affine.apply #[[UB2]](%i2) +//       TILE-23004:       %[[InputView:.*]] = linalg.subview %arg1[%i0, %[[I0p3]], %c1, %[[I1]], %[[I1pStep]], %c1, %c0, %[[SZ2]], %c1, %i2, %[[I2p2]], %c1] : !linalg.view<?x?x?x?xf32> +// +//       TILE-23004:       %[[B:.*]] = affine.apply #[[UB0]](%i0) +//       TILE-23004:       %[[I1p3:.*]] = affine.apply #[[UB1]](%i1) +//       TILE-23004:       %[[X0:.*]] = linalg.dim %arg2, 2 : !linalg.view<?x?x?x?xf32> +//       TILE-23004:       %[[X1:.*]] = linalg.dim %arg2, 3 : !linalg.view<?x?x?x?xf32> +//       TILE-23004:       %[[OutputView:.*]] = linalg.subview %arg2[%i0, %[[B]], %c1, %i1, %[[I1p3]], %c1, %c0, %[[X0]], %c1, %c0, %[[X1]], %c1] : !linalg.view<?x?x?x?xf32> +// +//       TILE-23004:       linalg.conv(%[[FilterView]], %[[InputView]], %[[OutputView]]) : !linalg.view<?x?x?x?xf32>, !linalg.view<?x?x?x?xf32>, !linalg.view<?x?x?x?xf32>  | 

