diff options
| author | Andy Davis <andydavis@google.com> | 2019-04-04 10:42:45 -0700 |
|---|---|---|
| committer | Mehdi Amini <joker.eph@gmail.com> | 2019-04-05 07:41:12 -0700 |
| commit | d0d1b2a30deec0b8d00b5bc64ae1f465a2b8b8a6 (patch) | |
| tree | 5346ebe1fae9c967c8bc3bf40f0fb77076543759 /mlir | |
| parent | 55014813e33a3ce8757e899dc9d39f5d394798ec (diff) | |
| download | bcm5719-llvm-d0d1b2a30deec0b8d00b5bc64ae1f465a2b8b8a6.tar.gz bcm5719-llvm-d0d1b2a30deec0b8d00b5bc64ae1f465a2b8b8a6.zip | |
Fix bug in LoopTiling where creation of tile-space loop upper bound did not handle symbol operands correctly.
--
PiperOrigin-RevId: 241958502
Diffstat (limited to 'mlir')
| -rw-r--r-- | mlir/lib/Transforms/LoopTiling.cpp | 22 | ||||
| -rw-r--r-- | mlir/test/Transforms/loop-tiling.mlir | 79 |
2 files changed, 90 insertions, 11 deletions
diff --git a/mlir/lib/Transforms/LoopTiling.cpp b/mlir/lib/Transforms/LoopTiling.cpp index d262a5d14aa..956d50ec26f 100644 --- a/mlir/lib/Transforms/LoopTiling.cpp +++ b/mlir/lib/Transforms/LoopTiling.cpp @@ -148,20 +148,30 @@ constructTiledIndexSetHyperRect(MutableArrayRef<AffineForOp> origLoops, // Construct the upper bound map; the operands are the original operands // with 'i' (tile-space loop) appended to it. The new upper bound map is // the original one with an additional expression i + tileSize appended. - SmallVector<Value *, 4> ubOperands(origLoops[i].getUpperBoundOperands()); + auto ub = origLoops[i].getUpperBound(); + SmallVector<Value *, 4> ubOperands; + ubOperands.reserve(ub.getNumOperands() + 1); + auto origUbMap = ub.getMap(); + // Add dim operands from original upper bound. + for (unsigned j = 0, e = origUbMap.getNumDims(); j < e; ++j) { + ubOperands.push_back(ub.getOperand(j)); + } + // Add dim operand for new loop upper bound. ubOperands.push_back(newLoops[i].getInductionVar()); - - auto origUbMap = origLoops[i].getUpperBoundMap(); + // Add symbol operands from original upper bound. + for (unsigned j = 0, e = origUbMap.getNumSymbols(); j < e; ++j) { + ubOperands.push_back(ub.getOperand(origUbMap.getNumDims() + j)); + } SmallVector<AffineExpr, 4> boundExprs; boundExprs.reserve(1 + origUbMap.getNumResults()); - auto dim = b.getAffineDimExpr(origUbMap.getNumInputs()); + auto dim = b.getAffineDimExpr(origUbMap.getNumDims()); // The new upper bound map is the original one with an additional // expression i + tileSize appended. boundExprs.push_back(dim + tileSizes[i]); boundExprs.append(origUbMap.getResults().begin(), origUbMap.getResults().end()); - auto ubMap = - b.getAffineMap(origUbMap.getNumInputs() + 1, 0, boundExprs, {}); + auto ubMap = b.getAffineMap(origUbMap.getNumDims() + 1, + origUbMap.getNumSymbols(), boundExprs, {}); newLoops[width + i].setUpperBound(/*operands=*/ubOperands, ubMap); } else { // No need of the min expression. diff --git a/mlir/test/Transforms/loop-tiling.mlir b/mlir/test/Transforms/loop-tiling.mlir index ff1fd30ce20..c1da1720e52 100644 --- a/mlir/test/Transforms/loop-tiling.mlir +++ b/mlir/test/Transforms/loop-tiling.mlir @@ -1,12 +1,11 @@ -// RUN: mlir-opt %s -loop-tile -tile-size=32 | FileCheck %s +// RUN: mlir-opt %s -split-input-file -loop-tile -tile-size=32 | FileCheck %s // RUN: mlir-opt %s -split-input-file -loop-tile -tile-cache-size=512 | FileCheck %s --check-prefix=MODEL +// ----- + // CHECK-DAG: [[MAP0:#map[0-9]+]] = (d0) -> (d0 + 32) // CHECK-DAG: [[MAP1:#map[0-9]+]] = (d0) -> (d0 + 32, 50) // CHECK-DAG: [[IDENTITY:#map[0-9]+]] = (d0) -> (d0) -// CHECK-DAG: [[LB:#map[0-9]+]] = ()[s0] -> (0, s0) -// CHECK-DAG: [[UB:#map[0-9]+]] = ()[s0, s1] -> (s0, 4096 floordiv s1) -// CHECK-DAG: [[UB_INTRA_TILE:#map[0-9]+]] = (d0, d1, d2) -> (d2 + 32, s0, 4096 floordiv s1) // CHECK-LABEL: func @loop_tiling() // CHECK-NEXT: affine.for %i0 = 0 to 256 step 32 { @@ -54,6 +53,13 @@ func @loop_tiling() { return } +// ----- + +// CHECK-DAG: [[IDENTITY:#map[0-9]+]] = (d0) -> (d0) +// CHECK-DAG: [[LB:#map[0-9]+]] = ()[s0] -> (0, s0) +// CHECK-DAG: [[UB:#map[0-9]+]] = ()[s0, s1] -> (s0, 4096 floordiv s1) +// CHECK-DAG: [[UB_INTRA_TILE:#map[0-9]+]] = (d0)[s0, s1] -> (d0 + 32, s0, 4096 floordiv s1) + #lb = ()[s0] -> (0, s0) #ub = ()[s0, s1] -> (s0, 4096 floordiv s1) // CHECK-LABEL: func @loop_max_min_bound(%arg0: memref<?xi32>, %arg1: index, %arg2: index) { @@ -64,7 +70,7 @@ func @loop_max_min_bound(%A : memref<? x i32>, %L : index, %U : index) { } return // CHECK: affine.for %i0 = max [[LB]]()[%arg1] to min [[UB]]()[%0, %arg2] step 32 { -// CHECK-NEXT: affine.for %i1 = [[IDENTITY]](%i0) to min [[UB_INTRA_TILE]](%0, %arg2, %i0) { +// CHECK-NEXT: affine.for %i1 = [[IDENTITY]](%i0) to min [[UB_INTRA_TILE]](%i0)[%0, %arg2] { // CHECK-NEXT: %1 = affine.apply [[IDENTITY]](%i1) // CHECK-NEXT: } // CHECK-NEXT: } @@ -95,3 +101,66 @@ func @simple_matmul(%arg0: memref<8x8xvector<64xf32>>, %arg1: memref<8x8xvector< // MODEL: affine.for %i0 = 0 to 256 step 4 { // MODEL-NEXT: affine.for %i1 = 0 to 256 step 4 { // MODEL-NEXT: affine.for %i2 = 0 to 250 step 5 { + + +// ----- + +// CHECK-DAG: [[UBMAP:#map[0-9]+]] = (d0)[s0] -> (d0 + 32, s0) + +func @tile_with_symbolic_loop_upper_bounds(%arg0: memref<?x?xf32>, %arg1: memref<?x?xf32>, %arg2: memref<?x?xf32>) { + %cst = constant 0.000000e+00 : f32 + %0 = dim %arg0, 0 : memref<?x?xf32> + affine.for %i0 = 0 to %0 { + affine.for %i1 = 0 to %0 { + store %cst, %arg2[%i0, %i1] : memref<?x?xf32> + affine.for %i2 = 0 to %0 { + %1 = load %arg0[%i0, %i2] : memref<?x?xf32> + %2 = load %arg1[%i2, %i1] : memref<?x?xf32> + %3 = mulf %1, %2 : f32 + %4 = load %arg2[%i0, %i1] : memref<?x?xf32> + %5 = addf %4, %3 : f32 + store %5, %arg2[%i0, %i1] : memref<?x?xf32> + } + } + } + return +} + +// CHECK: %0 = dim %arg0, 0 : memref<?x?xf32> +// CHECK-NEXT: affine.for %i0 = 0 to %0 step 32 { +// CHECK-NEXT: affine.for %i1 = 0 to %0 step 32 { +// CHECK-NEXT: affine.for %i2 = #map2(%i0) to min [[UBMAP]](%i0)[%0] { +// CHECK-NEXT: affine.for %i3 = #map2(%i1) to min [[UBMAP]](%i1)[%0] { +// CHECK-NEXT: store %cst, %arg2[%i2, %i3] : memref<?x?xf32> +// CHECK-NEXT: affine.for %i4 = 0 to %0 { +// CHECK-NEXT: %1 = load %arg0[%i2, %i4] : memref<?x?xf32> +// CHECK-NEXT: %2 = load %arg1[%i4, %i3] : memref<?x?xf32> +// CHECK-NEXT: %3 = mulf %1, %2 : f32 +// CHECK-NEXT: %4 = load %arg2[%i2, %i3] : memref<?x?xf32> +// CHECK-NEXT: %5 = addf %4, %3 : f32 +// CHECK-NEXT: store %5, %arg2[%i2, %i3] : memref<?x?xf32> +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: return + +// ----- + +// CHECK-DAG: [[UBMAP:#map[0-9]+]] = (d0)[s0, s1] -> (d0 + 32, s0 + s1) + +func @tile_with_loop_upper_bounds_in_two_symbols(%arg0: memref<?xf32>, %limit: index) { + %dim0 = dim %arg0, 0 : memref<?xf32> + affine.for %i0 = 0 to ()[s0, s1] -> (s0 + s1) ()[%dim0, %limit] { + %v0 = load %arg0[%i0] : memref<?xf32> + } + return +} + +// CHECK: %0 = dim %arg0, 0 : memref<?xf32> +// CHECK-NEXT: affine.for %i0 = 0 to #map1()[%0, %arg1] step 32 { +// CHECK-NEXT: affine.for %i1 = #map2(%i0) to min [[UBMAP]](%i0)[%0, %arg1] { +// CHECK-NEXT: %1 = load %arg0[%i1] : memref<?xf32> +// CHECK-NEXT: } +// CHECK-NEXT: } |

