summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--mlir/lib/Transforms/MaterializeVectors.cpp28
-rw-r--r--mlir/test/Transforms/Vectorize/materialize.mlir39
-rw-r--r--mlir/test/Transforms/Vectorize/materialize_vectors_1d_to_1d.mlir144
-rw-r--r--mlir/test/Transforms/Vectorize/materialize_vectors_2d_to_1d.mlir186
-rw-r--r--mlir/test/Transforms/Vectorize/materialize_vectors_2d_to_2d.mlir78
5 files changed, 266 insertions, 209 deletions
diff --git a/mlir/lib/Transforms/MaterializeVectors.cpp b/mlir/lib/Transforms/MaterializeVectors.cpp
index 8c8e1e88a08..12a83c08c7c 100644
--- a/mlir/lib/Transforms/MaterializeVectors.cpp
+++ b/mlir/lib/Transforms/MaterializeVectors.cpp
@@ -373,12 +373,16 @@ reindexAffineIndices(FuncBuilder *b, VectorType hwVectorType,
auto stride = vectorShape[i - numMemRefIndices - numSuperVectorIndices];
affineExprs.push_back(d_i + offset * stride);
}
- auto affineMap = AffineMap::get(numIndices, 0, affineExprs, {});
- // TODO(ntv): support a concrete map and composition.
- auto app = b->create<AffineApplyOp>(b->getInsertionPoint()->getLoc(),
- affineMap, memrefIndices);
- return SmallVector<mlir::Value *, 8>{app->getResults()};
+ // Create a bunch of single result maps.
+ return functional::map(
+ [b, numIndices, memrefIndices](AffineExpr expr) {
+ auto map = AffineMap::get(numIndices, 0, expr, {});
+ auto app = makeNormalizedAffineApply(
+ b, b->getInsertionPoint()->getLoc(), map, memrefIndices);
+ return app->getResult(0);
+ },
+ affineExprs);
}
/// Returns attributes with the following substitutions applied:
@@ -553,11 +557,17 @@ static bool instantiateMaterialization(Instruction *inst,
// Create a builder here for unroll-and-jam effects.
FuncBuilder b(inst);
auto *opInst = cast<OperationInst>(inst);
+ // AffineApplyOp are ignored: instantiating the proper vector op will take
+ // care of AffineApplyOps by composing them properly.
+ if (opInst->isa<AffineApplyOp>()) {
+ return false;
+ }
if (auto write = opInst->dyn_cast<VectorTransferWriteOp>()) {
auto *clone = instantiate(&b, write, state->hwVectorType,
state->hwVectorInstance, state->substitutionsMap);
return clone == nullptr;
- } else if (auto read = opInst->dyn_cast<VectorTransferReadOp>()) {
+ }
+ if (auto read = opInst->dyn_cast<VectorTransferReadOp>()) {
auto *clone = instantiate(&b, read, state->hwVectorType,
state->hwVectorInstance, state->substitutionsMap);
if (!clone) {
@@ -570,10 +580,12 @@ static bool instantiateMaterialization(Instruction *inst,
// The only op with 0 results reaching this point must, by construction, be
// VectorTransferWriteOps and have been caught above. Ops with >= 2 results
// are not yet supported. So just support 1 result.
- if (opInst->getNumResults() != 1)
+ if (opInst->getNumResults() != 1) {
return inst->emitError("NYI: ops with != 1 results");
- if (opInst->getResult(0)->getType() != state->superVectorType)
+ }
+ if (opInst->getResult(0)->getType() != state->superVectorType) {
return inst->emitError("Op does not return a supervector.");
+ }
auto *clone =
instantiate(&b, opInst, state->hwVectorType, state->substitutionsMap);
if (!clone) {
diff --git a/mlir/test/Transforms/Vectorize/materialize.mlir b/mlir/test/Transforms/Vectorize/materialize.mlir
index a0b808e2541..b1c56fe143c 100644
--- a/mlir/test/Transforms/Vectorize/materialize.mlir
+++ b/mlir/test/Transforms/Vectorize/materialize.mlir
@@ -1,26 +1,33 @@
// RUN: mlir-opt %s -materialize-vectors -vector-size=4 -vector-size=4 | FileCheck %s
-// CHECK-DAG: #[[map_instance_0:map[0-9]+]] = (d0, d1, d2, d3) -> (d0, d1, d2, d3)
-// CHECK-DAG: #[[map_instance_1:map[0-9]+]] = (d0, d1, d2, d3) -> (d0, d1 + 1, d2, d3)
-// CHECK-DAG: #[[map_instance_2:map[0-9]+]] = (d0, d1, d2, d3) -> (d0, d1 + 2, d2, d3)
-// CHECK-DAG: #[[map_instance_3:map[0-9]+]] = (d0, d1, d2, d3) -> (d0, d1 + 3, d2, d3)
-// CHECK-DAG: #[[map_proj_d0d1d2d3d4_d1d0:map[0-9]+]] = (d0, d1, d2, d3) -> (d1, d0)
+// CHECK-DAG: #[[D0D1D2D3TOD0:map[0-9]+]] = (d0, d1, d2, d3) -> (d0)
+// CHECK-DAG: #[[D0D1D2D3TOD1:map[0-9]+]] = (d0, d1, d2, d3) -> (d1)
+// CHECK-DAG: #[[D0D1D2D3TOD2:map[0-9]+]] = (d0, d1, d2, d3) -> (d2)
+// CHECK-DAG: #[[D0D1D2D3TOD3:map[0-9]+]] = (d0, d1, d2, d3) -> (d3)
+// CHECK-DAG: #[[D0D1D2D3TOD1D0:map[0-9]+]] = (d0, d1, d2, d3) -> (d1, d0)
+// CHECK-DAG: #[[D0D1D2D3TOD1P1:map[0-9]+]] = (d0, d1, d2, d3) -> (d1 + 1)
+// CHECK-DAG: #[[D0D1D2D3TOD1P2:map[0-9]+]] = (d0, d1, d2, d3) -> (d1 + 2)
+// CHECK-DAG: #[[D0D1D2D3TOD1P3:map[0-9]+]] = (d0, d1, d2, d3) -> (d1 + 3)
+// CHECK-LABEL: func @materialize
func @materialize(%M : index, %N : index, %O : index, %P : index) {
%A = alloc (%M, %N, %O, %P) : memref<?x?x?x?xf32, 0>
%f1 = constant splat<vector<4x4x4xf32>, 1.000000e+00> : vector<4x4x4xf32>
// CHECK: for %i0 = 0 to %arg0 step 4 {
- // CHECK: for %i1 = 0 to %arg1 step 4 {
- // CHECK: for %i2 = 0 to %arg2 {
- // CHECK: for %i3 = 0 to %arg3 step 4 {
- // CHECK: %1 = affine_apply #[[map_instance_0]](%i0, %i1, %i2, %i3)
- // CHECK: vector_transfer_write {{.*}}, %0, %1#0, %1#1, %1#2, %1#3 {permutation_map: #[[map_proj_d0d1d2d3d4_d1d0]]} : vector<4x4xf32>, memref<?x?x?x?xf32>, index, index, index, index
- // CHECK: %2 = affine_apply #[[map_instance_1]](%i0, %i1, %i2, %i3)
- // CHECK: vector_transfer_write {{.*}}, %0, %2#0, %2#1, %2#2, %2#3 {permutation_map: #[[map_proj_d0d1d2d3d4_d1d0]]} : vector<4x4xf32>, memref<?x?x?x?xf32>, index, index, index, index
- // CHECK: %3 = affine_apply #[[map_instance_2]](%i0, %i1, %i2, %i3)
- // CHECK: vector_transfer_write {{.*}}, %0, %3#0, %3#1, %3#2, %3#3 {permutation_map: #[[map_proj_d0d1d2d3d4_d1d0]]} : vector<4x4xf32>, memref<?x?x?x?xf32>, index, index, index, index
- // CHECK: %4 = affine_apply #[[map_instance_3]](%i0, %i1, %i2, %i3)
- // CHECK: vector_transfer_write {{.*}}, %0, %4#0, %4#1, %4#2, %4#3 {permutation_map: #[[map_proj_d0d1d2d3d4_d1d0]]} : vector<4x4xf32>, memref<?x?x?x?xf32>, index, index, index, index
+ // CHECK-NEXT: for %i1 = 0 to %arg1 step 4 {
+ // CHECK-NEXT: for %i2 = 0 to %arg2 {
+ // CHECK-NEXT: for %i3 = 0 to %arg3 step 4 {
+ // CHECK-NEXT: %[[a:[0-9]+]] = {{.*}}[[D0D1D2D3TOD0]](%i0, %i1, %i2, %i3)
+ // CHECK-NEXT: %[[b:[0-9]+]] = {{.*}}[[D0D1D2D3TOD1]](%i0, %i1, %i2, %i3)
+ // CHECK-NEXT: %[[c:[0-9]+]] = {{.*}}[[D0D1D2D3TOD2]](%i0, %i1, %i2, %i3)
+ // CHECK-NEXT: %[[d:[0-9]+]] = {{.*}}[[D0D1D2D3TOD3]](%i0, %i1, %i2, %i3)
+ // CHECK-NEXT: vector_transfer_write {{.*}}, %0, %[[a]], %[[b]], %[[c]], %[[d]] {permutation_map: #[[D0D1D2D3TOD1D0]]} : vector<4x4xf32>, memref<?x?x?x?xf32>, index, index, index, index
+ // CHECK: %[[b1:[0-9]+]] = {{.*}}[[D0D1D2D3TOD1P1]](%i0, %i1, %i2, %i3)
+ // CHECK: vector_transfer_write {{.*}}, %0, {{.*}}, %[[b1]], {{.*}} {permutation_map: #[[D0D1D2D3TOD1D0]]} : vector<4x4xf32>, memref<?x?x?x?xf32>, index, index, index, index
+ // CHECK: %[[b2:[0-9]+]] = {{.*}}[[D0D1D2D3TOD1P2]](%i0, %i1, %i2, %i3)
+ // CHECK: vector_transfer_write {{.*}}, %0, {{.*}}, %[[b2]], {{.*}} {permutation_map: #[[D0D1D2D3TOD1D0]]} : vector<4x4xf32>, memref<?x?x?x?xf32>, index, index, index, index
+ // CHECK: %[[b3:[0-9]+]] = {{.*}}[[D0D1D2D3TOD1P3]](%i0, %i1, %i2, %i3)
+ // CHECK: vector_transfer_write {{.*}}, %0, {{.*}}, %[[b3]], {{.*}} {permutation_map: #[[D0D1D2D3TOD1D0]]} : vector<4x4xf32>, memref<?x?x?x?xf32>, index, index, index, index
for %i0 = 0 to %M step 4 {
for %i1 = 0 to %N step 4 {
for %i2 = 0 to %O {
diff --git a/mlir/test/Transforms/Vectorize/materialize_vectors_1d_to_1d.mlir b/mlir/test/Transforms/Vectorize/materialize_vectors_1d_to_1d.mlir
index 8837aa94744..c2f4009fa06 100644
--- a/mlir/test/Transforms/Vectorize/materialize_vectors_1d_to_1d.mlir
+++ b/mlir/test/Transforms/Vectorize/materialize_vectors_1d_to_1d.mlir
@@ -1,13 +1,13 @@
// RUN: mlir-opt %s -vectorize -virtual-vector-size 32 --test-fastest-varying=0 -materialize-vectors -vector-size=8 | FileCheck %s
-// Capture permutation maps used in vectorization.
-// CHECK-DAG: #[[map_proj_d0d1_d1:map[0-9]+]] = (d0, d1) -> (d1)
-
// vector<32xf32> -> vector<8xf32>
-// CHECK-DAG: [[MAP0:#.*]] = (d0, d1) -> (d0, d1)
-// CHECK-DAG: [[MAP1:#.*]] = (d0, d1) -> (d0, d1 + 8)
-// CHECK-DAG: [[MAP2:#.*]] = (d0, d1) -> (d0, d1 + 16)
-// CHECK-DAG: [[MAP3:#.*]] = (d0, d1) -> (d0, d1 + 24)
+// CHECK-DAG: [[D0D1TOD0:#.*]] = (d0, d1) -> (d0)
+// CHECK-DAG: [[D0D1TOD1:#.*]] = (d0, d1) -> (d1)
+// CHECK-DAG: [[D0D1TOD1P8:#.*]] = (d0, d1) -> (d1 + 8)
+// CHECK-DAG: [[D0D1TOD1P16:#.*]] = (d0, d1) -> (d1 + 16)
+// CHECK-DAG: [[D0D1TOD1P24:#.*]] = (d0, d1) -> (d1 + 24)
+
+// CHECK-LABEL: func @vector_add_2d
func @vector_add_2d(%M : index, %N : index) -> f32 {
%A = alloc (%M, %N) : memref<?x?xf32, 0>
%B = alloc (%M, %N) : memref<?x?xf32, 0>
@@ -16,19 +16,23 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
%f2 = constant 2.0 : f32
// 4x unroll (jammed by construction).
// CHECK: for %i0 = 0 to %arg0 {
- // CHECK: for %i1 = 0 to %arg1 step 32 {
- // CHECK: [[CST0:%.*]] = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
- // CHECK: [[CST1:%.*]] = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
- // CHECK: [[CST2:%.*]] = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
- // CHECK: [[CST3:%.*]] = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
- // CHECK: [[VAL0:%.*]] = affine_apply [[MAP0]]{{.*}}
- // CHECK: vector_transfer_write [[CST0]], {{.*}}, [[VAL0]]#0, [[VAL0]]#1 {permutation_map: #[[map_proj_d0d1_d1]]} : vector<8xf32>
- // CHECK: [[VAL1:%.*]] = affine_apply [[MAP1]]{{.*}}
- // CHECK: vector_transfer_write [[CST1]], {{.*}}, [[VAL1]]#0, [[VAL1]]#1 {permutation_map: #[[map_proj_d0d1_d1]]} : vector<8xf32>
- // CHECK: [[VAL2:%.*]] = affine_apply [[MAP2]]{{.*}}
- // CHECK: vector_transfer_write [[CST2]], {{.*}}, [[VAL2]]#0, [[VAL2]]#1 {permutation_map: #[[map_proj_d0d1_d1]]} : vector<8xf32>
- // CHECK: [[VAL3:%.*]] = affine_apply [[MAP3]]{{.*}}
- // CHECK: vector_transfer_write [[CST3]], {{.*}}, [[VAL3]]#0, [[VAL3]]#1 {permutation_map: #[[map_proj_d0d1_d1]]} : vector<8xf32>
+ // CHECK-NEXT: for %i1 = 0 to %arg1 step 32 {
+ // CHECK-NEXT: [[CST0:%.*]] = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
+ // CHECK-NEXT: [[CST1:%.*]] = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
+ // CHECK-NEXT: [[CST2:%.*]] = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
+ // CHECK-NEXT: [[CST3:%.*]] = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
+ // CHECK-NEXT: [[VAL00:%.*]] = affine_apply [[D0D1TOD0]]{{.*}}
+ // CHECK-NEXT: [[VAL01:%.*]] = affine_apply [[D0D1TOD1]]{{.*}}
+ // CHECK-NEXT: vector_transfer_write [[CST0]], {{.*}}, [[VAL00]], [[VAL01]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32>
+ // CHECK-NEXT: [[VAL10:%.*]] = affine_apply [[D0D1TOD0]]{{.*}}
+ // CHECK-NEXT: [[VAL11:%.*]] = affine_apply [[D0D1TOD1P8]]{{.*}}
+ // CHECK-NEXT: vector_transfer_write [[CST1]], {{.*}}, [[VAL10]], [[VAL11]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32>
+ // CHECK-NEXT: [[VAL20:%.*]] = affine_apply [[D0D1TOD0]]{{.*}}
+ // CHECK-NEXT: [[VAL21:%.*]] = affine_apply [[D0D1TOD1P16]]{{.*}}
+ // CHECK-NEXT: vector_transfer_write [[CST2]], {{.*}}, [[VAL20]], [[VAL21]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32>
+ // CHECK-NEXT: [[VAL30:%.*]] = affine_apply [[D0D1TOD0]]{{.*}}
+ // CHECK-NEXT: [[VAL31:%.*]] = affine_apply [[D0D1TOD1P24]]{{.*}}
+ // CHECK-NEXT: vector_transfer_write [[CST3]], {{.*}}, [[VAL30]], [[VAL31]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32>
//
for %i0 = 0 to %M {
for %i1 = 0 to %N {
@@ -38,19 +42,23 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
}
// 4x unroll (jammed by construction).
// CHECK: for %i2 = 0 to %arg0 {
- // CHECK: for %i3 = 0 to %arg1 step 32 {
- // CHECK: [[CST0:%.*]] = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32>
- // CHECK: [[CST1:%.*]] = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32>
- // CHECK: [[CST2:%.*]] = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32>
- // CHECK: [[CST3:%.*]] = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32>
- // CHECK: [[VAL0:%.*]] = affine_apply [[MAP0]]{{.*}}
- // CHECK: vector_transfer_write [[CST0]], {{.*}}, [[VAL0]]#0, [[VAL0]]#1 {permutation_map: #[[map_proj_d0d1_d1]]} : vector<8xf32>
- // CHECK: [[VAL1:%.*]] = affine_apply [[MAP1]]{{.*}}
- // CHECK: vector_transfer_write [[CST1]], {{.*}}, [[VAL1]]#0, [[VAL1]]#1 {permutation_map: #[[map_proj_d0d1_d1]]} : vector<8xf32>
- // CHECK: [[VAL2:%.*]] = affine_apply [[MAP2]]{{.*}}
- // CHECK: vector_transfer_write [[CST2]], {{.*}}, [[VAL2]]#0, [[VAL2]]#1 {permutation_map: #[[map_proj_d0d1_d1]]} : vector<8xf32>
- // CHECK: [[VAL3:%.*]] = affine_apply [[MAP3]]{{.*}}
- // CHECK: vector_transfer_write [[CST3]], {{.*}}, [[VAL3]]#0, [[VAL3]]#1 {permutation_map: #[[map_proj_d0d1_d1]]} : vector<8xf32>
+ // CHECK-NEXT: for %i3 = 0 to %arg1 step 32 {
+ // CHECK-NEXT: [[CST0:%.*]] = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32>
+ // CHECK-NEXT: [[CST1:%.*]] = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32>
+ // CHECK-NEXT: [[CST2:%.*]] = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32>
+ // CHECK-NEXT: [[CST3:%.*]] = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32>
+ // CHECK-NEXT: [[VAL00:%.*]] = affine_apply [[D0D1TOD0]]{{.*}}
+ // CHECK-NEXT: [[VAL01:%.*]] = affine_apply [[D0D1TOD1]]{{.*}}
+ // CHECK-NEXT: vector_transfer_write [[CST0]], {{.*}}, [[VAL00]], [[VAL01]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32>
+ // CHECK-NEXT: [[VAL10:%.*]] = affine_apply [[D0D1TOD0]]{{.*}}
+ // CHECK-NEXT: [[VAL11:%.*]] = affine_apply [[D0D1TOD1P8]]{{.*}}
+ // CHECK-NEXT: vector_transfer_write [[CST1]], {{.*}}, [[VAL10]], [[VAL11]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32>
+ // CHECK-NEXT: [[VAL20:%.*]] = affine_apply [[D0D1TOD0]]{{.*}}
+ // CHECK-NEXT: [[VAL21:%.*]] = affine_apply [[D0D1TOD1P16]]{{.*}}
+ // CHECK-NEXT: vector_transfer_write [[CST2]], {{.*}}, [[VAL20]], [[VAL21]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32>
+ // CHECK-NEXT: [[VAL30:%.*]] = affine_apply [[D0D1TOD0]]{{.*}}
+ // CHECK-NEXT: [[VAL31:%.*]] = affine_apply [[D0D1TOD1P24]]{{.*}}
+ // CHECK-NEXT: vector_transfer_write [[CST3]], {{.*}}, [[VAL30]], [[VAL31]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32>
//
for %i2 = 0 to %M {
for %i3 = 0 to %N {
@@ -60,35 +68,47 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
}
// 4x unroll (jammed by construction).
// CHECK: for %i4 = 0 to %arg0 {
- // CHECK: for %i5 = 0 to %arg1 step 32 {
- // CHECK: %11 = affine_apply #map0(%i4, %i5)
- // CHECK: %12 = vector_transfer_read %0, %11#0, %11#1 {permutation_map: #[[map_proj_d0d1_d1]]} : (memref<?x?xf32>, index, index) -> vector<8xf32>
- // CHECK: %13 = affine_apply #map2(%i4, %i5)
- // CHECK: %14 = vector_transfer_read %0, %13#0, %13#1 {permutation_map: #[[map_proj_d0d1_d1]]} : (memref<?x?xf32>, index, index) -> vector<8xf32>
- // CHECK: %15 = affine_apply #map3(%i4, %i5)
- // CHECK: %16 = vector_transfer_read %0, %15#0, %15#1 {permutation_map: #[[map_proj_d0d1_d1]]} : (memref<?x?xf32>, index, index) -> vector<8xf32>
- // CHECK: %17 = affine_apply #map4(%i4, %i5)
- // CHECK: %18 = vector_transfer_read %0, %17#0, %17#1 {permutation_map: #[[map_proj_d0d1_d1]]} : (memref<?x?xf32>, index, index) -> vector<8xf32>
- // CHECK: %19 = affine_apply #map0(%i4, %i5)
- // CHECK: %20 = vector_transfer_read %1, %19#0, %19#1 {permutation_map: #[[map_proj_d0d1_d1]]} : (memref<?x?xf32>, index, index) -> vector<8xf32>
- // CHECK: %21 = affine_apply #map2(%i4, %i5)
- // CHECK: %22 = vector_transfer_read %1, %21#0, %21#1 {permutation_map: #[[map_proj_d0d1_d1]]} : (memref<?x?xf32>, index, index) -> vector<8xf32>
- // CHECK: %23 = affine_apply #map3(%i4, %i5)
- // CHECK: %24 = vector_transfer_read %1, %23#0, %23#1 {permutation_map: #[[map_proj_d0d1_d1]]} : (memref<?x?xf32>, index, index) -> vector<8xf32>
- // CHECK: %25 = affine_apply #map4(%i4, %i5)
- // CHECK: %26 = vector_transfer_read %1, %25#0, %25#1 {permutation_map: #[[map_proj_d0d1_d1]]} : (memref<?x?xf32>, index, index) -> vector<8xf32>
- // CHECK: %27 = addf %12, %20 : vector<8xf32>
- // CHECK: %28 = addf %14, %22 : vector<8xf32>
- // CHECK: %29 = addf %16, %24 : vector<8xf32>
- // CHECK: %30 = addf %18, %26 : vector<8xf32>
- // CHECK: %31 = affine_apply #map0(%i4, %i5)
- // CHECK: vector_transfer_write %27, %2, %31#0, %31#1 {permutation_map: #[[map_proj_d0d1_d1]]} : vector<8xf32>, memref<?x?xf32>, index, index
- // CHECK: %32 = affine_apply #map2(%i4, %i5)
- // CHECK: vector_transfer_write %28, %2, %32#0, %32#1 {permutation_map: #[[map_proj_d0d1_d1]]} : vector<8xf32>, memref<?x?xf32>, index, index
- // CHECK: %33 = affine_apply #map3(%i4, %i5)
- // CHECK: vector_transfer_write %29, %2, %33#0, %33#1 {permutation_map: #[[map_proj_d0d1_d1]]} : vector<8xf32>, memref<?x?xf32>, index, index
- // CHECK: %34 = affine_apply #map4(%i4, %i5)
- // CHECK: vector_transfer_write %30, %2, %34#0, %34#1 {permutation_map: #[[map_proj_d0d1_d1]]} : vector<8xf32>, memref<?x?xf32>, index, index
+ // CHECK-NEXT: for %i5 = 0 to %arg1 step 32 {
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: {{.*}} = vector_transfer_read
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: {{.*}} = vector_transfer_read
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: {{.*}} = vector_transfer_read
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: {{.*}} = vector_transfer_read
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: {{.*}} = vector_transfer_read
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: {{.*}} = vector_transfer_read
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: {{.*}} = vector_transfer_read
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: {{.*}} = vector_transfer_read
+ // CHECK-NEXT: {{.*}} = addf {{.*}} : vector<8xf32>
+ // CHECK-NEXT: {{.*}} = addf {{.*}} : vector<8xf32>
+ // CHECK-NEXT: {{.*}} = addf {{.*}} : vector<8xf32>
+ // CHECK-NEXT: {{.*}} = addf {{.*}} : vector<8xf32>
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: vector_transfer_write
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: vector_transfer_write
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: vector_transfer_write
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: vector_transfer_write
//
for %i4 = 0 to %M {
for %i5 = 0 to %N {
diff --git a/mlir/test/Transforms/Vectorize/materialize_vectors_2d_to_1d.mlir b/mlir/test/Transforms/Vectorize/materialize_vectors_2d_to_1d.mlir
index 6503b46b73b..fdf7749b73e 100644
--- a/mlir/test/Transforms/Vectorize/materialize_vectors_2d_to_1d.mlir
+++ b/mlir/test/Transforms/Vectorize/materialize_vectors_2d_to_1d.mlir
@@ -1,15 +1,13 @@
// RUN: mlir-opt %s -vectorize -virtual-vector-size 3 -virtual-vector-size 16 --test-fastest-varying=1 --test-fastest-varying=0 -materialize-vectors -vector-size=8 | FileCheck %s
-// Capture permutation maps used in vectorization.
-// CHECK-DAG: #[[map_proj_d0d1_d1:map[0-9]+]] = (d0, d1) -> (d1)
-
// vector<3x16xf32> -> vector<8xf32>
-// CHECK-DAG: [[MAP0:#.*]] = (d0, d1) -> (d0, d1)
-// CHECK-DAG: [[MAP1:#.*]] = (d0, d1) -> (d0, d1 + 8)
-// CHECK-DAG: [[MAP2:#.*]] = (d0, d1) -> (d0 + 1, d1)
-// CHECK-DAG: [[MAP3:#.*]] = (d0, d1) -> (d0 + 1, d1 + 8)
-// CHECK-DAG: [[MAP4:#.*]] = (d0, d1) -> (d0 + 2, d1)
-// CHECK-DAG: [[MAP5:#.*]] = (d0, d1) -> (d0 + 2, d1 + 8)
+// CHECK-DAG: [[D0D1TOD0:#.*]] = (d0, d1) -> (d0)
+// CHECK-DAG: [[D0D1TOD1:#.*]] = (d0, d1) -> (d1)
+// CHECK-DAG: [[D0D1TOD1P8:#.*]] = (d0, d1) -> (d1 + 8)
+// CHECK-DAG: [[D0D1TOD0P1:#.*]] = (d0, d1) -> (d0 + 1)
+// CHECK-DAG: [[D0D1TOD0P2:#.*]] = (d0, d1) -> (d0 + 2)
+
+// CHECK-LABEL: func @vector_add_2d
func @vector_add_2d(%M : index, %N : index) -> f32 {
%A = alloc (%M, %N) : memref<?x?xf32, 0>
%B = alloc (%M, %N) : memref<?x?xf32, 0>
@@ -18,25 +16,31 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
%f2 = constant 2.0 : f32
// (3x2)x unroll (jammed by construction).
// CHECK: for %i0 = 0 to %arg0 step 3 {
- // CHECK: for %i1 = 0 to %arg1 step 16 {
- // CHECK: %cst_1 = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
- // CHECK: %cst_2 = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
- // CHECK: %cst_3 = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
- // CHECK: %cst_4 = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
- // CHECK: %cst_5 = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
- // CHECK: %cst_6 = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
- // CHECK: %3 = affine_apply #map0(%i0, %i1)
- // CHECK: vector_transfer_write %cst_1, %0, %3#0, %3#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index
- // CHECK: %4 = affine_apply #map2(%i0, %i1)
- // CHECK: vector_transfer_write %cst_2, %0, %4#0, %4#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index
- // CHECK: %5 = affine_apply #map3(%i0, %i1)
- // CHECK: vector_transfer_write %cst_3, %0, %5#0, %5#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index
- // CHECK: %6 = affine_apply #map4(%i0, %i1)
- // CHECK: vector_transfer_write %cst_4, %0, %6#0, %6#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index
- // CHECK: %7 = affine_apply #map5(%i0, %i1)
- // CHECK: vector_transfer_write %cst_5, %0, %7#0, %7#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index
- // CHECK: %8 = affine_apply #map6(%i0, %i1)
- // CHECK: vector_transfer_write %cst_6, %0, %8#0, %8#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index
+ // CHECK-NEXT: for %i1 = 0 to %arg1 step 16 {
+ // CHECK-NEXT: {{.*}} = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
+ // CHECK-NEXT: {{.*}} = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
+ // CHECK-NEXT: {{.*}} = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
+ // CHECK-NEXT: {{.*}} = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
+ // CHECK-NEXT: {{.*}} = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
+ // CHECK-NEXT: {{.*}} = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
+ // CHECK-NEXT: [[VAL00:%.*]] = affine_apply [[D0D1TOD0]](%i0, %i1)
+ // CHECK-NEXT: [[VAL01:%.*]] = affine_apply [[D0D1TOD1]](%i0, %i1)
+ // CHECK-NEXT: vector_transfer_write {{.*}}, {{.*}}, [[VAL00]], [[VAL01]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32>
+ // CHECK-NEXT: [[VAL10:%.*]] = affine_apply [[D0D1TOD0]](%i0, %i1)
+ // CHECK-NEXT: [[VAL11:%.*]] = affine_apply [[D0D1TOD1P8]](%i0, %i1)
+ // CHECK-NEXT: vector_transfer_write {{.*}}, {{.*}}, [[VAL10]], [[VAL11]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32>
+ // CHECK-NEXT: [[VAL20:%.*]] = affine_apply [[D0D1TOD0P1]](%i0, %i1)
+ // CHECK-NEXT: [[VAL21:%.*]] = affine_apply [[D0D1TOD1]](%i0, %i1)
+ // CHECK-NEXT: vector_transfer_write {{.*}}, {{.*}}, [[VAL20]], [[VAL21]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32>
+ // CHECK-NEXT: [[VAL30:%.*]] = affine_apply [[D0D1TOD0P1]](%i0, %i1)
+ // CHECK-NEXT: [[VAL31:%.*]] = affine_apply [[D0D1TOD1P8]](%i0, %i1)
+ // CHECK-NEXT: vector_transfer_write {{.*}}, {{.*}}, [[VAL30]], [[VAL31]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32>
+ // CHECK-NEXT: [[VAL40:%.*]] = affine_apply [[D0D1TOD0P2]](%i0, %i1)
+ // CHECK-NEXT: [[VAL41:%.*]] = affine_apply [[D0D1TOD1]](%i0, %i1)
+ // CHECK-NEXT: vector_transfer_write {{.*}}, {{.*}}, [[VAL40]], [[VAL41]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32>
+ // CHECK-NEXT: [[VAL50:%.*]] = affine_apply [[D0D1TOD0P2]](%i0, %i1)
+ // CHECK-NEXT: [[VAL51:%.*]] = affine_apply [[D0D1TOD1P8]](%i0, %i1)
+ // CHECK-NEXT: vector_transfer_write {{.*}}, {{.*}}, [[VAL50]], [[VAL51]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32>
for %i0 = 0 to %M {
for %i1 = 0 to %N {
// non-scoped %f1
@@ -45,25 +49,8 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
}
// (3x2)x unroll (jammed by construction).
// CHECK: for %i2 = 0 to %arg0 step 3 {
- // CHECK: for %i3 = 0 to %arg1 step 16 {
- // CHECK: %cst_7 = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32>
- // CHECK: %cst_8 = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32>
- // CHECK: %cst_9 = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32>
- // CHECK: %cst_10 = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32>
- // CHECK: %cst_11 = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32>
- // CHECK: %cst_12 = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32>
- // CHECK: %9 = affine_apply #map0(%i2, %i3)
- // CHECK: vector_transfer_write %cst_7, %1, %9#0, %9#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index
- // CHECK: %10 = affine_apply #map2(%i2, %i3)
- // CHECK: vector_transfer_write %cst_8, %1, %10#0, %10#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index
- // CHECK: %11 = affine_apply #map3(%i2, %i3)
- // CHECK: vector_transfer_write %cst_9, %1, %11#0, %11#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index
- // CHECK: %12 = affine_apply #map4(%i2, %i3)
- // CHECK: vector_transfer_write %cst_10, %1, %12#0, %12#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index
- // CHECK: %13 = affine_apply #map5(%i2, %i3)
- // CHECK: vector_transfer_write %cst_11, %1, %13#0, %13#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index
- // CHECK: %14 = affine_apply #map6(%i2, %i3)
- // CHECK: vector_transfer_write %cst_12, %1, %14#0, %14#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index
+ // CHECK-NEXT: for %i3 = 0 to %arg1 step 16 {
+ // .....
for %i2 = 0 to %M {
for %i3 = 0 to %N {
// non-scoped %f2
@@ -73,49 +60,68 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
}
// (3x2)x unroll (jammed by construction).
// CHECK: for %i4 = 0 to %arg0 step 3 {
- // CHECK: for %i5 = 0 to %arg1 step 16 {
- // CHECK: %15 = affine_apply #map0(%i4, %i5)
- // CHECK: %16 = vector_transfer_read %0, %15#0, %15#1 {permutation_map: #map1} : (memref<?x?xf32>, index, index) -> vector<8xf32>
- // CHECK: %17 = affine_apply #map2(%i4, %i5)
- // CHECK: %18 = vector_transfer_read %0, %17#0, %17#1 {permutation_map: #map1} : (memref<?x?xf32>, index, index) -> vector<8xf32>
- // CHECK: %19 = affine_apply #map3(%i4, %i5)
- // CHECK: %20 = vector_transfer_read %0, %19#0, %19#1 {permutation_map: #map1} : (memref<?x?xf32>, index, index) -> vector<8xf32>
- // CHECK: %21 = affine_apply #map4(%i4, %i5)
- // CHECK: %22 = vector_transfer_read %0, %21#0, %21#1 {permutation_map: #map1} : (memref<?x?xf32>, index, index) -> vector<8xf32>
- // CHECK: %23 = affine_apply #map5(%i4, %i5)
- // CHECK: %24 = vector_transfer_read %0, %23#0, %23#1 {permutation_map: #map1} : (memref<?x?xf32>, index, index) -> vector<8xf32>
- // CHECK: %25 = affine_apply #map6(%i4, %i5)
- // CHECK: %26 = vector_transfer_read %0, %25#0, %25#1 {permutation_map: #map1} : (memref<?x?xf32>, index, index) -> vector<8xf32>
- // CHECK: %27 = affine_apply #map0(%i4, %i5)
- // CHECK: %28 = vector_transfer_read %1, %27#0, %27#1 {permutation_map: #map1} : (memref<?x?xf32>, index, index) -> vector<8xf32>
- // CHECK: %29 = affine_apply #map2(%i4, %i5)
- // CHECK: %30 = vector_transfer_read %1, %29#0, %29#1 {permutation_map: #map1} : (memref<?x?xf32>, index, index) -> vector<8xf32>
- // CHECK: %31 = affine_apply #map3(%i4, %i5)
- // CHECK: %32 = vector_transfer_read %1, %31#0, %31#1 {permutation_map: #map1} : (memref<?x?xf32>, index, index) -> vector<8xf32>
- // CHECK: %33 = affine_apply #map4(%i4, %i5)
- // CHECK: %34 = vector_transfer_read %1, %33#0, %33#1 {permutation_map: #map1} : (memref<?x?xf32>, index, index) -> vector<8xf32>
- // CHECK: %35 = affine_apply #map5(%i4, %i5)
- // CHECK: %36 = vector_transfer_read %1, %35#0, %35#1 {permutation_map: #map1} : (memref<?x?xf32>, index, index) -> vector<8xf32>
- // CHECK: %37 = affine_apply #map6(%i4, %i5)
- // CHECK: %38 = vector_transfer_read %1, %37#0, %37#1 {permutation_map: #map1} : (memref<?x?xf32>, index, index) -> vector<8xf32>
- // CHECK: %39 = addf %16, %28 : vector<8xf32>
- // CHECK: %40 = addf %18, %30 : vector<8xf32>
- // CHECK: %41 = addf %20, %32 : vector<8xf32>
- // CHECK: %42 = addf %22, %34 : vector<8xf32>
- // CHECK: %43 = addf %24, %36 : vector<8xf32>
- // CHECK: %44 = addf %26, %38 : vector<8xf32>
- // CHECK: %45 = affine_apply #map0(%i4, %i5)
- // CHECK: vector_transfer_write %39, %2, %45#0, %45#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index
- // CHECK: %46 = affine_apply #map2(%i4, %i5)
- // CHECK: vector_transfer_write %40, %2, %46#0, %46#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index
- // CHECK: %47 = affine_apply #map3(%i4, %i5)
- // CHECK: vector_transfer_write %41, %2, %47#0, %47#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index
- // CHECK: %48 = affine_apply #map4(%i4, %i5)
- // CHECK: vector_transfer_write %42, %2, %48#0, %48#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index
- // CHECK: %49 = affine_apply #map5(%i4, %i5)
- // CHECK: vector_transfer_write %43, %2, %49#0, %49#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index
- // CHECK: %50 = affine_apply #map6(%i4, %i5)
- // CHECK: vector_transfer_write %44, %2, %50#0, %50#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index
+ // CHECK-NEXT: for %i5 = 0 to %arg1 step 16 {
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: {{.*}} = vector_transfer_read
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: {{.*}} = vector_transfer_read
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: {{.*}} = vector_transfer_read
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: {{.*}} = vector_transfer_read
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: {{.*}} = vector_transfer_read
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: {{.*}} = vector_transfer_read
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: {{.*}} = vector_transfer_read
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: {{.*}} = vector_transfer_read
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: {{.*}} = vector_transfer_read
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: {{.*}} = vector_transfer_read
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: {{.*}} = vector_transfer_read
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: {{.*}} = vector_transfer_read
+ // CHECK-NEXT: {{.*}} = addf {{.*}} : vector<8xf32>
+ // CHECK-NEXT: {{.*}} = addf {{.*}} : vector<8xf32>
+ // CHECK-NEXT: {{.*}} = addf {{.*}} : vector<8xf32>
+ // CHECK-NEXT: {{.*}} = addf {{.*}} : vector<8xf32>
+ // CHECK-NEXT: {{.*}} = addf {{.*}} : vector<8xf32>
+ // CHECK-NEXT: {{.*}} = addf {{.*}} : vector<8xf32>
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: vector_transfer_write
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: vector_transfer_write
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: vector_transfer_write
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: vector_transfer_write
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: vector_transfer_write
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: vector_transfer_write
+ //
for %i4 = 0 to %M {
for %i5 = 0 to %N {
%a5 = load %A[%i4, %i5] : memref<?x?xf32, 0>
diff --git a/mlir/test/Transforms/Vectorize/materialize_vectors_2d_to_2d.mlir b/mlir/test/Transforms/Vectorize/materialize_vectors_2d_to_2d.mlir
index 6d348c2e2c3..58865df7a9b 100644
--- a/mlir/test/Transforms/Vectorize/materialize_vectors_2d_to_2d.mlir
+++ b/mlir/test/Transforms/Vectorize/materialize_vectors_2d_to_2d.mlir
@@ -1,10 +1,12 @@
// RUN: mlir-opt %s -vectorize -virtual-vector-size 3 -virtual-vector-size 32 --test-fastest-varying=1 --test-fastest-varying=0 -materialize-vectors -vector-size=3 -vector-size=16 | FileCheck %s
-// Capture permutation maps used in vectorization.
-// CHECK-DAG: #[[map_proj_d0d1_d0d1:map[0-9]+]] = (d0, d1) -> (d0, d1)
-
// vector<3x32xf32> -> vector<3x16xf32>
-// CHECK-DAG: [[MAP1:#.*]] = (d0, d1) -> (d0, d1 + 16)
+// CHECK-DAG: [[D0D1TOD0:#.*]] = (d0, d1) -> (d0)
+// CHECK-DAG: [[D0D1TOD1:#.*]] = (d0, d1) -> (d1)
+// CHECK-DAG: [[D0D1TOD0D1:#.*]] = (d0, d1) -> (d0, d1)
+// CHECK-DAG: [[D0D1TOD1P16:#.*]] = (d0, d1) -> (d1 + 16)
+
+// CHECK-LABEL: func @vector_add_2d
func @vector_add_2d(%M : index, %N : index) -> f32 {
%A = alloc (%M, %N) : memref<?x?xf32, 0>
%B = alloc (%M, %N) : memref<?x?xf32, 0>
@@ -13,13 +15,15 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
%f2 = constant 2.0 : f32
// 2x unroll (jammed by construction).
// CHECK: for %i0 = 0 to %arg0 step 3 {
- // CHECK: for %i1 = 0 to %arg1 step 32 {
- // CHECK: %cst_1 = constant splat<vector<3x16xf32>, 1.000000e+00> : vector<3x16xf32>
- // CHECK: %cst_2 = constant splat<vector<3x16xf32>, 1.000000e+00> : vector<3x16xf32>
- // CHECK: %3 = affine_apply #map0(%i0, %i1)
- // CHECK: vector_transfer_write %cst_1, %0, %3#0, %3#1 {permutation_map: #map0} : vector<3x16xf32>, memref<?x?xf32>, index, index
- // CHECK: %4 = affine_apply #map1(%i0, %i1)
- // CHECK: vector_transfer_write %cst_2, %0, %4#0, %4#1 {permutation_map: #map0} : vector<3x16xf32>, memref<?x?xf32>, index, index
+ // CHECK-NEXT: for %i1 = 0 to %arg1 step 32 {
+ // CHECK-NEXT: {{.*}} = constant splat<vector<3x16xf32>, 1.000000e+00> : vector<3x16xf32>
+ // CHECK-NEXT: {{.*}} = constant splat<vector<3x16xf32>, 1.000000e+00> : vector<3x16xf32>
+ // CHECK-NEXT: [[VAL00:%.*]] = affine_apply [[D0D1TOD0]](%i0, %i1)
+ // CHECK-NEXT: [[VAL01:%.*]] = affine_apply [[D0D1TOD1]](%i0, %i1)
+ // CHECK-NEXT: vector_transfer_write {{.*}}, {{.*}}, [[VAL00]], [[VAL01]] {permutation_map: [[D0D1TOD0D1]]} : vector<3x16xf32>
+ // CHECK-NEXT: [[VAL10:%.*]] = affine_apply [[D0D1TOD0]](%i0, %i1)
+ // CHECK-NEXT: [[VAL11:%.*]] = affine_apply [[D0D1TOD1P16]](%i0, %i1)
+ // CHECK-NEXT: vector_transfer_write {{.*}}, {{.*}}, [[VAL10]], [[VAL11]] {permutation_map: [[D0D1TOD0D1]]} : vector<3x16xf32>
//
for %i0 = 0 to %M {
for %i1 = 0 to %N {
@@ -29,13 +33,15 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
}
// 2x unroll (jammed by construction).
// CHECK: for %i2 = 0 to %arg0 step 3 {
- // CHECK: for %i3 = 0 to %arg1 step 32 {
- // CHECK: %cst_3 = constant splat<vector<3x16xf32>, 2.000000e+00> : vector<3x16xf32>
- // CHECK: %cst_4 = constant splat<vector<3x16xf32>, 2.000000e+00> : vector<3x16xf32>
- // CHECK: %5 = affine_apply #map0(%i2, %i3)
- // CHECK: vector_transfer_write %cst_3, %1, %5#0, %5#1 {permutation_map: #map0} : vector<3x16xf32>, memref<?x?xf32>, index, index
- // CHECK: %6 = affine_apply #map1(%i2, %i3)
- // CHECK: vector_transfer_write %cst_4, %1, %6#0, %6#1 {permutation_map: #map0} : vector<3x16xf32>, memref<?x?xf32>, index, index
+ // CHECK-NEXT: for %i3 = 0 to %arg1 step 32 {
+ // CHECK-NEXT: {{.*}} = constant splat<vector<3x16xf32>, 2.000000e+00> : vector<3x16xf32>
+ // CHECK-NEXT: {{.*}} = constant splat<vector<3x16xf32>, 2.000000e+00> : vector<3x16xf32>
+ // CHECK-NEXT: [[VAL00:%.*]] = affine_apply [[D0D1TOD0]](%i2, %i3)
+ // CHECK-NEXT: [[VAL01:%.*]] = affine_apply [[D0D1TOD1]](%i2, %i3)
+ // CHECK-NEXT: vector_transfer_write {{.*}}, {{.*}}, [[VAL00]], [[VAL01]] {permutation_map: [[D0D1TOD0D1]]} : vector<3x16xf32>
+ // CHECK-NEXT: [[VAL10:%.*]] = affine_apply [[D0D1TOD0]](%i2, %i3)
+ // CHECK-NEXT: [[VAL11:%.*]] = affine_apply [[D0D1TOD1P16]](%i2, %i3)
+ // CHECK-NEXT: vector_transfer_write {{.*}}, {{.*}}, [[VAL10]], [[VAL11]] {permutation_map: [[D0D1TOD0D1]]} : vector<3x16xf32>
//
for %i2 = 0 to %M {
for %i3 = 0 to %N {
@@ -45,21 +51,27 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
}
// 2x unroll (jammed by construction).
// CHECK: for %i4 = 0 to %arg0 step 3 {
- // CHECK: for %i5 = 0 to %arg1 step 32 {
- // CHECK: %7 = affine_apply #map0(%i4, %i5)
- // CHECK: %8 = vector_transfer_read %0, %7#0, %7#1 {permutation_map: #map0} : (memref<?x?xf32>, index, index) -> vector<3x16xf32>
- // CHECK: %9 = affine_apply #map1(%i4, %i5)
- // CHECK: %10 = vector_transfer_read %0, %9#0, %9#1 {permutation_map: #map0} : (memref<?x?xf32>, index, index) -> vector<3x16xf32>
- // CHECK: %11 = affine_apply #map0(%i4, %i5)
- // CHECK: %12 = vector_transfer_read %1, %11#0, %11#1 {permutation_map: #map0} : (memref<?x?xf32>, index, index) -> vector<3x16xf32>
- // CHECK: %13 = affine_apply #map1(%i4, %i5)
- // CHECK: %14 = vector_transfer_read %1, %13#0, %13#1 {permutation_map: #map0} : (memref<?x?xf32>, index, index) -> vector<3x16xf32>
- // CHECK: %15 = addf %8, %12 : vector<3x16xf32>
- // CHECK: %16 = addf %10, %14 : vector<3x16xf32>
- // CHECK: %17 = affine_apply #map0(%i4, %i5)
- // CHECK: vector_transfer_write %15, %2, %17#0, %17#1 {permutation_map: #map0} : vector<3x16xf32>, memref<?x?xf32>, index, index
- // CHECK: %18 = affine_apply #map1(%i4, %i5)
- // CHECK: vector_transfer_write %16, %2, %18#0, %18#1 {permutation_map: #map0} : vector<3x16xf32>, memref<?x?xf32>, index, index
+ // CHECK-NEXT: for %i5 = 0 to %arg1 step 32 {
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: {{.*}} = vector_transfer_read
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: {{.*}} = vector_transfer_read
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: {{.*}} = vector_transfer_read
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: {{.*}} = vector_transfer_read
+ // CHECK-NEXT: {{.*}} = addf {{.*}} : vector<3x16xf32>
+ // CHECK-NEXT: {{.*}} = addf {{.*}} : vector<3x16xf32>
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: vector_transfer_write
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: {{.*}} = affine_apply
+ // CHECK-NEXT: vector_transfer_write
//
for %i4 = 0 to %M {
for %i5 = 0 to %N {
OpenPOWER on IntegriCloud