diff options
5 files changed, 266 insertions, 209 deletions
diff --git a/mlir/lib/Transforms/MaterializeVectors.cpp b/mlir/lib/Transforms/MaterializeVectors.cpp index 8c8e1e88a08..12a83c08c7c 100644 --- a/mlir/lib/Transforms/MaterializeVectors.cpp +++ b/mlir/lib/Transforms/MaterializeVectors.cpp @@ -373,12 +373,16 @@ reindexAffineIndices(FuncBuilder *b, VectorType hwVectorType, auto stride = vectorShape[i - numMemRefIndices - numSuperVectorIndices]; affineExprs.push_back(d_i + offset * stride); } - auto affineMap = AffineMap::get(numIndices, 0, affineExprs, {}); - // TODO(ntv): support a concrete map and composition. - auto app = b->create<AffineApplyOp>(b->getInsertionPoint()->getLoc(), - affineMap, memrefIndices); - return SmallVector<mlir::Value *, 8>{app->getResults()}; + // Create a bunch of single result maps. + return functional::map( + [b, numIndices, memrefIndices](AffineExpr expr) { + auto map = AffineMap::get(numIndices, 0, expr, {}); + auto app = makeNormalizedAffineApply( + b, b->getInsertionPoint()->getLoc(), map, memrefIndices); + return app->getResult(0); + }, + affineExprs); } /// Returns attributes with the following substitutions applied: @@ -553,11 +557,17 @@ static bool instantiateMaterialization(Instruction *inst, // Create a builder here for unroll-and-jam effects. FuncBuilder b(inst); auto *opInst = cast<OperationInst>(inst); + // AffineApplyOp are ignored: instantiating the proper vector op will take + // care of AffineApplyOps by composing them properly. + if (opInst->isa<AffineApplyOp>()) { + return false; + } if (auto write = opInst->dyn_cast<VectorTransferWriteOp>()) { auto *clone = instantiate(&b, write, state->hwVectorType, state->hwVectorInstance, state->substitutionsMap); return clone == nullptr; - } else if (auto read = opInst->dyn_cast<VectorTransferReadOp>()) { + } + if (auto read = opInst->dyn_cast<VectorTransferReadOp>()) { auto *clone = instantiate(&b, read, state->hwVectorType, state->hwVectorInstance, state->substitutionsMap); if (!clone) { @@ -570,10 +580,12 @@ static bool instantiateMaterialization(Instruction *inst, // The only op with 0 results reaching this point must, by construction, be // VectorTransferWriteOps and have been caught above. Ops with >= 2 results // are not yet supported. So just support 1 result. - if (opInst->getNumResults() != 1) + if (opInst->getNumResults() != 1) { return inst->emitError("NYI: ops with != 1 results"); - if (opInst->getResult(0)->getType() != state->superVectorType) + } + if (opInst->getResult(0)->getType() != state->superVectorType) { return inst->emitError("Op does not return a supervector."); + } auto *clone = instantiate(&b, opInst, state->hwVectorType, state->substitutionsMap); if (!clone) { diff --git a/mlir/test/Transforms/Vectorize/materialize.mlir b/mlir/test/Transforms/Vectorize/materialize.mlir index a0b808e2541..b1c56fe143c 100644 --- a/mlir/test/Transforms/Vectorize/materialize.mlir +++ b/mlir/test/Transforms/Vectorize/materialize.mlir @@ -1,26 +1,33 @@ // RUN: mlir-opt %s -materialize-vectors -vector-size=4 -vector-size=4 | FileCheck %s -// CHECK-DAG: #[[map_instance_0:map[0-9]+]] = (d0, d1, d2, d3) -> (d0, d1, d2, d3) -// CHECK-DAG: #[[map_instance_1:map[0-9]+]] = (d0, d1, d2, d3) -> (d0, d1 + 1, d2, d3) -// CHECK-DAG: #[[map_instance_2:map[0-9]+]] = (d0, d1, d2, d3) -> (d0, d1 + 2, d2, d3) -// CHECK-DAG: #[[map_instance_3:map[0-9]+]] = (d0, d1, d2, d3) -> (d0, d1 + 3, d2, d3) -// CHECK-DAG: #[[map_proj_d0d1d2d3d4_d1d0:map[0-9]+]] = (d0, d1, d2, d3) -> (d1, d0) +// CHECK-DAG: #[[D0D1D2D3TOD0:map[0-9]+]] = (d0, d1, d2, d3) -> (d0) +// CHECK-DAG: #[[D0D1D2D3TOD1:map[0-9]+]] = (d0, d1, d2, d3) -> (d1) +// CHECK-DAG: #[[D0D1D2D3TOD2:map[0-9]+]] = (d0, d1, d2, d3) -> (d2) +// CHECK-DAG: #[[D0D1D2D3TOD3:map[0-9]+]] = (d0, d1, d2, d3) -> (d3) +// CHECK-DAG: #[[D0D1D2D3TOD1D0:map[0-9]+]] = (d0, d1, d2, d3) -> (d1, d0) +// CHECK-DAG: #[[D0D1D2D3TOD1P1:map[0-9]+]] = (d0, d1, d2, d3) -> (d1 + 1) +// CHECK-DAG: #[[D0D1D2D3TOD1P2:map[0-9]+]] = (d0, d1, d2, d3) -> (d1 + 2) +// CHECK-DAG: #[[D0D1D2D3TOD1P3:map[0-9]+]] = (d0, d1, d2, d3) -> (d1 + 3) +// CHECK-LABEL: func @materialize func @materialize(%M : index, %N : index, %O : index, %P : index) { %A = alloc (%M, %N, %O, %P) : memref<?x?x?x?xf32, 0> %f1 = constant splat<vector<4x4x4xf32>, 1.000000e+00> : vector<4x4x4xf32> // CHECK: for %i0 = 0 to %arg0 step 4 { - // CHECK: for %i1 = 0 to %arg1 step 4 { - // CHECK: for %i2 = 0 to %arg2 { - // CHECK: for %i3 = 0 to %arg3 step 4 { - // CHECK: %1 = affine_apply #[[map_instance_0]](%i0, %i1, %i2, %i3) - // CHECK: vector_transfer_write {{.*}}, %0, %1#0, %1#1, %1#2, %1#3 {permutation_map: #[[map_proj_d0d1d2d3d4_d1d0]]} : vector<4x4xf32>, memref<?x?x?x?xf32>, index, index, index, index - // CHECK: %2 = affine_apply #[[map_instance_1]](%i0, %i1, %i2, %i3) - // CHECK: vector_transfer_write {{.*}}, %0, %2#0, %2#1, %2#2, %2#3 {permutation_map: #[[map_proj_d0d1d2d3d4_d1d0]]} : vector<4x4xf32>, memref<?x?x?x?xf32>, index, index, index, index - // CHECK: %3 = affine_apply #[[map_instance_2]](%i0, %i1, %i2, %i3) - // CHECK: vector_transfer_write {{.*}}, %0, %3#0, %3#1, %3#2, %3#3 {permutation_map: #[[map_proj_d0d1d2d3d4_d1d0]]} : vector<4x4xf32>, memref<?x?x?x?xf32>, index, index, index, index - // CHECK: %4 = affine_apply #[[map_instance_3]](%i0, %i1, %i2, %i3) - // CHECK: vector_transfer_write {{.*}}, %0, %4#0, %4#1, %4#2, %4#3 {permutation_map: #[[map_proj_d0d1d2d3d4_d1d0]]} : vector<4x4xf32>, memref<?x?x?x?xf32>, index, index, index, index + // CHECK-NEXT: for %i1 = 0 to %arg1 step 4 { + // CHECK-NEXT: for %i2 = 0 to %arg2 { + // CHECK-NEXT: for %i3 = 0 to %arg3 step 4 { + // CHECK-NEXT: %[[a:[0-9]+]] = {{.*}}[[D0D1D2D3TOD0]](%i0, %i1, %i2, %i3) + // CHECK-NEXT: %[[b:[0-9]+]] = {{.*}}[[D0D1D2D3TOD1]](%i0, %i1, %i2, %i3) + // CHECK-NEXT: %[[c:[0-9]+]] = {{.*}}[[D0D1D2D3TOD2]](%i0, %i1, %i2, %i3) + // CHECK-NEXT: %[[d:[0-9]+]] = {{.*}}[[D0D1D2D3TOD3]](%i0, %i1, %i2, %i3) + // CHECK-NEXT: vector_transfer_write {{.*}}, %0, %[[a]], %[[b]], %[[c]], %[[d]] {permutation_map: #[[D0D1D2D3TOD1D0]]} : vector<4x4xf32>, memref<?x?x?x?xf32>, index, index, index, index + // CHECK: %[[b1:[0-9]+]] = {{.*}}[[D0D1D2D3TOD1P1]](%i0, %i1, %i2, %i3) + // CHECK: vector_transfer_write {{.*}}, %0, {{.*}}, %[[b1]], {{.*}} {permutation_map: #[[D0D1D2D3TOD1D0]]} : vector<4x4xf32>, memref<?x?x?x?xf32>, index, index, index, index + // CHECK: %[[b2:[0-9]+]] = {{.*}}[[D0D1D2D3TOD1P2]](%i0, %i1, %i2, %i3) + // CHECK: vector_transfer_write {{.*}}, %0, {{.*}}, %[[b2]], {{.*}} {permutation_map: #[[D0D1D2D3TOD1D0]]} : vector<4x4xf32>, memref<?x?x?x?xf32>, index, index, index, index + // CHECK: %[[b3:[0-9]+]] = {{.*}}[[D0D1D2D3TOD1P3]](%i0, %i1, %i2, %i3) + // CHECK: vector_transfer_write {{.*}}, %0, {{.*}}, %[[b3]], {{.*}} {permutation_map: #[[D0D1D2D3TOD1D0]]} : vector<4x4xf32>, memref<?x?x?x?xf32>, index, index, index, index for %i0 = 0 to %M step 4 { for %i1 = 0 to %N step 4 { for %i2 = 0 to %O { diff --git a/mlir/test/Transforms/Vectorize/materialize_vectors_1d_to_1d.mlir b/mlir/test/Transforms/Vectorize/materialize_vectors_1d_to_1d.mlir index 8837aa94744..c2f4009fa06 100644 --- a/mlir/test/Transforms/Vectorize/materialize_vectors_1d_to_1d.mlir +++ b/mlir/test/Transforms/Vectorize/materialize_vectors_1d_to_1d.mlir @@ -1,13 +1,13 @@ // RUN: mlir-opt %s -vectorize -virtual-vector-size 32 --test-fastest-varying=0 -materialize-vectors -vector-size=8 | FileCheck %s -// Capture permutation maps used in vectorization. -// CHECK-DAG: #[[map_proj_d0d1_d1:map[0-9]+]] = (d0, d1) -> (d1) - // vector<32xf32> -> vector<8xf32> -// CHECK-DAG: [[MAP0:#.*]] = (d0, d1) -> (d0, d1) -// CHECK-DAG: [[MAP1:#.*]] = (d0, d1) -> (d0, d1 + 8) -// CHECK-DAG: [[MAP2:#.*]] = (d0, d1) -> (d0, d1 + 16) -// CHECK-DAG: [[MAP3:#.*]] = (d0, d1) -> (d0, d1 + 24) +// CHECK-DAG: [[D0D1TOD0:#.*]] = (d0, d1) -> (d0) +// CHECK-DAG: [[D0D1TOD1:#.*]] = (d0, d1) -> (d1) +// CHECK-DAG: [[D0D1TOD1P8:#.*]] = (d0, d1) -> (d1 + 8) +// CHECK-DAG: [[D0D1TOD1P16:#.*]] = (d0, d1) -> (d1 + 16) +// CHECK-DAG: [[D0D1TOD1P24:#.*]] = (d0, d1) -> (d1 + 24) + +// CHECK-LABEL: func @vector_add_2d func @vector_add_2d(%M : index, %N : index) -> f32 { %A = alloc (%M, %N) : memref<?x?xf32, 0> %B = alloc (%M, %N) : memref<?x?xf32, 0> @@ -16,19 +16,23 @@ func @vector_add_2d(%M : index, %N : index) -> f32 { %f2 = constant 2.0 : f32 // 4x unroll (jammed by construction). // CHECK: for %i0 = 0 to %arg0 { - // CHECK: for %i1 = 0 to %arg1 step 32 { - // CHECK: [[CST0:%.*]] = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32> - // CHECK: [[CST1:%.*]] = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32> - // CHECK: [[CST2:%.*]] = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32> - // CHECK: [[CST3:%.*]] = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32> - // CHECK: [[VAL0:%.*]] = affine_apply [[MAP0]]{{.*}} - // CHECK: vector_transfer_write [[CST0]], {{.*}}, [[VAL0]]#0, [[VAL0]]#1 {permutation_map: #[[map_proj_d0d1_d1]]} : vector<8xf32> - // CHECK: [[VAL1:%.*]] = affine_apply [[MAP1]]{{.*}} - // CHECK: vector_transfer_write [[CST1]], {{.*}}, [[VAL1]]#0, [[VAL1]]#1 {permutation_map: #[[map_proj_d0d1_d1]]} : vector<8xf32> - // CHECK: [[VAL2:%.*]] = affine_apply [[MAP2]]{{.*}} - // CHECK: vector_transfer_write [[CST2]], {{.*}}, [[VAL2]]#0, [[VAL2]]#1 {permutation_map: #[[map_proj_d0d1_d1]]} : vector<8xf32> - // CHECK: [[VAL3:%.*]] = affine_apply [[MAP3]]{{.*}} - // CHECK: vector_transfer_write [[CST3]], {{.*}}, [[VAL3]]#0, [[VAL3]]#1 {permutation_map: #[[map_proj_d0d1_d1]]} : vector<8xf32> + // CHECK-NEXT: for %i1 = 0 to %arg1 step 32 { + // CHECK-NEXT: [[CST0:%.*]] = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32> + // CHECK-NEXT: [[CST1:%.*]] = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32> + // CHECK-NEXT: [[CST2:%.*]] = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32> + // CHECK-NEXT: [[CST3:%.*]] = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32> + // CHECK-NEXT: [[VAL00:%.*]] = affine_apply [[D0D1TOD0]]{{.*}} + // CHECK-NEXT: [[VAL01:%.*]] = affine_apply [[D0D1TOD1]]{{.*}} + // CHECK-NEXT: vector_transfer_write [[CST0]], {{.*}}, [[VAL00]], [[VAL01]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32> + // CHECK-NEXT: [[VAL10:%.*]] = affine_apply [[D0D1TOD0]]{{.*}} + // CHECK-NEXT: [[VAL11:%.*]] = affine_apply [[D0D1TOD1P8]]{{.*}} + // CHECK-NEXT: vector_transfer_write [[CST1]], {{.*}}, [[VAL10]], [[VAL11]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32> + // CHECK-NEXT: [[VAL20:%.*]] = affine_apply [[D0D1TOD0]]{{.*}} + // CHECK-NEXT: [[VAL21:%.*]] = affine_apply [[D0D1TOD1P16]]{{.*}} + // CHECK-NEXT: vector_transfer_write [[CST2]], {{.*}}, [[VAL20]], [[VAL21]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32> + // CHECK-NEXT: [[VAL30:%.*]] = affine_apply [[D0D1TOD0]]{{.*}} + // CHECK-NEXT: [[VAL31:%.*]] = affine_apply [[D0D1TOD1P24]]{{.*}} + // CHECK-NEXT: vector_transfer_write [[CST3]], {{.*}}, [[VAL30]], [[VAL31]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32> // for %i0 = 0 to %M { for %i1 = 0 to %N { @@ -38,19 +42,23 @@ func @vector_add_2d(%M : index, %N : index) -> f32 { } // 4x unroll (jammed by construction). // CHECK: for %i2 = 0 to %arg0 { - // CHECK: for %i3 = 0 to %arg1 step 32 { - // CHECK: [[CST0:%.*]] = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32> - // CHECK: [[CST1:%.*]] = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32> - // CHECK: [[CST2:%.*]] = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32> - // CHECK: [[CST3:%.*]] = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32> - // CHECK: [[VAL0:%.*]] = affine_apply [[MAP0]]{{.*}} - // CHECK: vector_transfer_write [[CST0]], {{.*}}, [[VAL0]]#0, [[VAL0]]#1 {permutation_map: #[[map_proj_d0d1_d1]]} : vector<8xf32> - // CHECK: [[VAL1:%.*]] = affine_apply [[MAP1]]{{.*}} - // CHECK: vector_transfer_write [[CST1]], {{.*}}, [[VAL1]]#0, [[VAL1]]#1 {permutation_map: #[[map_proj_d0d1_d1]]} : vector<8xf32> - // CHECK: [[VAL2:%.*]] = affine_apply [[MAP2]]{{.*}} - // CHECK: vector_transfer_write [[CST2]], {{.*}}, [[VAL2]]#0, [[VAL2]]#1 {permutation_map: #[[map_proj_d0d1_d1]]} : vector<8xf32> - // CHECK: [[VAL3:%.*]] = affine_apply [[MAP3]]{{.*}} - // CHECK: vector_transfer_write [[CST3]], {{.*}}, [[VAL3]]#0, [[VAL3]]#1 {permutation_map: #[[map_proj_d0d1_d1]]} : vector<8xf32> + // CHECK-NEXT: for %i3 = 0 to %arg1 step 32 { + // CHECK-NEXT: [[CST0:%.*]] = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32> + // CHECK-NEXT: [[CST1:%.*]] = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32> + // CHECK-NEXT: [[CST2:%.*]] = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32> + // CHECK-NEXT: [[CST3:%.*]] = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32> + // CHECK-NEXT: [[VAL00:%.*]] = affine_apply [[D0D1TOD0]]{{.*}} + // CHECK-NEXT: [[VAL01:%.*]] = affine_apply [[D0D1TOD1]]{{.*}} + // CHECK-NEXT: vector_transfer_write [[CST0]], {{.*}}, [[VAL00]], [[VAL01]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32> + // CHECK-NEXT: [[VAL10:%.*]] = affine_apply [[D0D1TOD0]]{{.*}} + // CHECK-NEXT: [[VAL11:%.*]] = affine_apply [[D0D1TOD1P8]]{{.*}} + // CHECK-NEXT: vector_transfer_write [[CST1]], {{.*}}, [[VAL10]], [[VAL11]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32> + // CHECK-NEXT: [[VAL20:%.*]] = affine_apply [[D0D1TOD0]]{{.*}} + // CHECK-NEXT: [[VAL21:%.*]] = affine_apply [[D0D1TOD1P16]]{{.*}} + // CHECK-NEXT: vector_transfer_write [[CST2]], {{.*}}, [[VAL20]], [[VAL21]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32> + // CHECK-NEXT: [[VAL30:%.*]] = affine_apply [[D0D1TOD0]]{{.*}} + // CHECK-NEXT: [[VAL31:%.*]] = affine_apply [[D0D1TOD1P24]]{{.*}} + // CHECK-NEXT: vector_transfer_write [[CST3]], {{.*}}, [[VAL30]], [[VAL31]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32> // for %i2 = 0 to %M { for %i3 = 0 to %N { @@ -60,35 +68,47 @@ func @vector_add_2d(%M : index, %N : index) -> f32 { } // 4x unroll (jammed by construction). // CHECK: for %i4 = 0 to %arg0 { - // CHECK: for %i5 = 0 to %arg1 step 32 { - // CHECK: %11 = affine_apply #map0(%i4, %i5) - // CHECK: %12 = vector_transfer_read %0, %11#0, %11#1 {permutation_map: #[[map_proj_d0d1_d1]]} : (memref<?x?xf32>, index, index) -> vector<8xf32> - // CHECK: %13 = affine_apply #map2(%i4, %i5) - // CHECK: %14 = vector_transfer_read %0, %13#0, %13#1 {permutation_map: #[[map_proj_d0d1_d1]]} : (memref<?x?xf32>, index, index) -> vector<8xf32> - // CHECK: %15 = affine_apply #map3(%i4, %i5) - // CHECK: %16 = vector_transfer_read %0, %15#0, %15#1 {permutation_map: #[[map_proj_d0d1_d1]]} : (memref<?x?xf32>, index, index) -> vector<8xf32> - // CHECK: %17 = affine_apply #map4(%i4, %i5) - // CHECK: %18 = vector_transfer_read %0, %17#0, %17#1 {permutation_map: #[[map_proj_d0d1_d1]]} : (memref<?x?xf32>, index, index) -> vector<8xf32> - // CHECK: %19 = affine_apply #map0(%i4, %i5) - // CHECK: %20 = vector_transfer_read %1, %19#0, %19#1 {permutation_map: #[[map_proj_d0d1_d1]]} : (memref<?x?xf32>, index, index) -> vector<8xf32> - // CHECK: %21 = affine_apply #map2(%i4, %i5) - // CHECK: %22 = vector_transfer_read %1, %21#0, %21#1 {permutation_map: #[[map_proj_d0d1_d1]]} : (memref<?x?xf32>, index, index) -> vector<8xf32> - // CHECK: %23 = affine_apply #map3(%i4, %i5) - // CHECK: %24 = vector_transfer_read %1, %23#0, %23#1 {permutation_map: #[[map_proj_d0d1_d1]]} : (memref<?x?xf32>, index, index) -> vector<8xf32> - // CHECK: %25 = affine_apply #map4(%i4, %i5) - // CHECK: %26 = vector_transfer_read %1, %25#0, %25#1 {permutation_map: #[[map_proj_d0d1_d1]]} : (memref<?x?xf32>, index, index) -> vector<8xf32> - // CHECK: %27 = addf %12, %20 : vector<8xf32> - // CHECK: %28 = addf %14, %22 : vector<8xf32> - // CHECK: %29 = addf %16, %24 : vector<8xf32> - // CHECK: %30 = addf %18, %26 : vector<8xf32> - // CHECK: %31 = affine_apply #map0(%i4, %i5) - // CHECK: vector_transfer_write %27, %2, %31#0, %31#1 {permutation_map: #[[map_proj_d0d1_d1]]} : vector<8xf32>, memref<?x?xf32>, index, index - // CHECK: %32 = affine_apply #map2(%i4, %i5) - // CHECK: vector_transfer_write %28, %2, %32#0, %32#1 {permutation_map: #[[map_proj_d0d1_d1]]} : vector<8xf32>, memref<?x?xf32>, index, index - // CHECK: %33 = affine_apply #map3(%i4, %i5) - // CHECK: vector_transfer_write %29, %2, %33#0, %33#1 {permutation_map: #[[map_proj_d0d1_d1]]} : vector<8xf32>, memref<?x?xf32>, index, index - // CHECK: %34 = affine_apply #map4(%i4, %i5) - // CHECK: vector_transfer_write %30, %2, %34#0, %34#1 {permutation_map: #[[map_proj_d0d1_d1]]} : vector<8xf32>, memref<?x?xf32>, index, index + // CHECK-NEXT: for %i5 = 0 to %arg1 step 32 { + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: {{.*}} = vector_transfer_read + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: {{.*}} = vector_transfer_read + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: {{.*}} = vector_transfer_read + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: {{.*}} = vector_transfer_read + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: {{.*}} = vector_transfer_read + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: {{.*}} = vector_transfer_read + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: {{.*}} = vector_transfer_read + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: {{.*}} = vector_transfer_read + // CHECK-NEXT: {{.*}} = addf {{.*}} : vector<8xf32> + // CHECK-NEXT: {{.*}} = addf {{.*}} : vector<8xf32> + // CHECK-NEXT: {{.*}} = addf {{.*}} : vector<8xf32> + // CHECK-NEXT: {{.*}} = addf {{.*}} : vector<8xf32> + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: vector_transfer_write + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: vector_transfer_write + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: vector_transfer_write + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: vector_transfer_write // for %i4 = 0 to %M { for %i5 = 0 to %N { diff --git a/mlir/test/Transforms/Vectorize/materialize_vectors_2d_to_1d.mlir b/mlir/test/Transforms/Vectorize/materialize_vectors_2d_to_1d.mlir index 6503b46b73b..fdf7749b73e 100644 --- a/mlir/test/Transforms/Vectorize/materialize_vectors_2d_to_1d.mlir +++ b/mlir/test/Transforms/Vectorize/materialize_vectors_2d_to_1d.mlir @@ -1,15 +1,13 @@ // RUN: mlir-opt %s -vectorize -virtual-vector-size 3 -virtual-vector-size 16 --test-fastest-varying=1 --test-fastest-varying=0 -materialize-vectors -vector-size=8 | FileCheck %s -// Capture permutation maps used in vectorization. -// CHECK-DAG: #[[map_proj_d0d1_d1:map[0-9]+]] = (d0, d1) -> (d1) - // vector<3x16xf32> -> vector<8xf32> -// CHECK-DAG: [[MAP0:#.*]] = (d0, d1) -> (d0, d1) -// CHECK-DAG: [[MAP1:#.*]] = (d0, d1) -> (d0, d1 + 8) -// CHECK-DAG: [[MAP2:#.*]] = (d0, d1) -> (d0 + 1, d1) -// CHECK-DAG: [[MAP3:#.*]] = (d0, d1) -> (d0 + 1, d1 + 8) -// CHECK-DAG: [[MAP4:#.*]] = (d0, d1) -> (d0 + 2, d1) -// CHECK-DAG: [[MAP5:#.*]] = (d0, d1) -> (d0 + 2, d1 + 8) +// CHECK-DAG: [[D0D1TOD0:#.*]] = (d0, d1) -> (d0) +// CHECK-DAG: [[D0D1TOD1:#.*]] = (d0, d1) -> (d1) +// CHECK-DAG: [[D0D1TOD1P8:#.*]] = (d0, d1) -> (d1 + 8) +// CHECK-DAG: [[D0D1TOD0P1:#.*]] = (d0, d1) -> (d0 + 1) +// CHECK-DAG: [[D0D1TOD0P2:#.*]] = (d0, d1) -> (d0 + 2) + +// CHECK-LABEL: func @vector_add_2d func @vector_add_2d(%M : index, %N : index) -> f32 { %A = alloc (%M, %N) : memref<?x?xf32, 0> %B = alloc (%M, %N) : memref<?x?xf32, 0> @@ -18,25 +16,31 @@ func @vector_add_2d(%M : index, %N : index) -> f32 { %f2 = constant 2.0 : f32 // (3x2)x unroll (jammed by construction). // CHECK: for %i0 = 0 to %arg0 step 3 { - // CHECK: for %i1 = 0 to %arg1 step 16 { - // CHECK: %cst_1 = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32> - // CHECK: %cst_2 = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32> - // CHECK: %cst_3 = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32> - // CHECK: %cst_4 = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32> - // CHECK: %cst_5 = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32> - // CHECK: %cst_6 = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32> - // CHECK: %3 = affine_apply #map0(%i0, %i1) - // CHECK: vector_transfer_write %cst_1, %0, %3#0, %3#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index - // CHECK: %4 = affine_apply #map2(%i0, %i1) - // CHECK: vector_transfer_write %cst_2, %0, %4#0, %4#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index - // CHECK: %5 = affine_apply #map3(%i0, %i1) - // CHECK: vector_transfer_write %cst_3, %0, %5#0, %5#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index - // CHECK: %6 = affine_apply #map4(%i0, %i1) - // CHECK: vector_transfer_write %cst_4, %0, %6#0, %6#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index - // CHECK: %7 = affine_apply #map5(%i0, %i1) - // CHECK: vector_transfer_write %cst_5, %0, %7#0, %7#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index - // CHECK: %8 = affine_apply #map6(%i0, %i1) - // CHECK: vector_transfer_write %cst_6, %0, %8#0, %8#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index + // CHECK-NEXT: for %i1 = 0 to %arg1 step 16 { + // CHECK-NEXT: {{.*}} = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32> + // CHECK-NEXT: {{.*}} = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32> + // CHECK-NEXT: {{.*}} = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32> + // CHECK-NEXT: {{.*}} = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32> + // CHECK-NEXT: {{.*}} = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32> + // CHECK-NEXT: {{.*}} = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32> + // CHECK-NEXT: [[VAL00:%.*]] = affine_apply [[D0D1TOD0]](%i0, %i1) + // CHECK-NEXT: [[VAL01:%.*]] = affine_apply [[D0D1TOD1]](%i0, %i1) + // CHECK-NEXT: vector_transfer_write {{.*}}, {{.*}}, [[VAL00]], [[VAL01]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32> + // CHECK-NEXT: [[VAL10:%.*]] = affine_apply [[D0D1TOD0]](%i0, %i1) + // CHECK-NEXT: [[VAL11:%.*]] = affine_apply [[D0D1TOD1P8]](%i0, %i1) + // CHECK-NEXT: vector_transfer_write {{.*}}, {{.*}}, [[VAL10]], [[VAL11]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32> + // CHECK-NEXT: [[VAL20:%.*]] = affine_apply [[D0D1TOD0P1]](%i0, %i1) + // CHECK-NEXT: [[VAL21:%.*]] = affine_apply [[D0D1TOD1]](%i0, %i1) + // CHECK-NEXT: vector_transfer_write {{.*}}, {{.*}}, [[VAL20]], [[VAL21]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32> + // CHECK-NEXT: [[VAL30:%.*]] = affine_apply [[D0D1TOD0P1]](%i0, %i1) + // CHECK-NEXT: [[VAL31:%.*]] = affine_apply [[D0D1TOD1P8]](%i0, %i1) + // CHECK-NEXT: vector_transfer_write {{.*}}, {{.*}}, [[VAL30]], [[VAL31]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32> + // CHECK-NEXT: [[VAL40:%.*]] = affine_apply [[D0D1TOD0P2]](%i0, %i1) + // CHECK-NEXT: [[VAL41:%.*]] = affine_apply [[D0D1TOD1]](%i0, %i1) + // CHECK-NEXT: vector_transfer_write {{.*}}, {{.*}}, [[VAL40]], [[VAL41]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32> + // CHECK-NEXT: [[VAL50:%.*]] = affine_apply [[D0D1TOD0P2]](%i0, %i1) + // CHECK-NEXT: [[VAL51:%.*]] = affine_apply [[D0D1TOD1P8]](%i0, %i1) + // CHECK-NEXT: vector_transfer_write {{.*}}, {{.*}}, [[VAL50]], [[VAL51]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32> for %i0 = 0 to %M { for %i1 = 0 to %N { // non-scoped %f1 @@ -45,25 +49,8 @@ func @vector_add_2d(%M : index, %N : index) -> f32 { } // (3x2)x unroll (jammed by construction). // CHECK: for %i2 = 0 to %arg0 step 3 { - // CHECK: for %i3 = 0 to %arg1 step 16 { - // CHECK: %cst_7 = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32> - // CHECK: %cst_8 = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32> - // CHECK: %cst_9 = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32> - // CHECK: %cst_10 = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32> - // CHECK: %cst_11 = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32> - // CHECK: %cst_12 = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32> - // CHECK: %9 = affine_apply #map0(%i2, %i3) - // CHECK: vector_transfer_write %cst_7, %1, %9#0, %9#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index - // CHECK: %10 = affine_apply #map2(%i2, %i3) - // CHECK: vector_transfer_write %cst_8, %1, %10#0, %10#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index - // CHECK: %11 = affine_apply #map3(%i2, %i3) - // CHECK: vector_transfer_write %cst_9, %1, %11#0, %11#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index - // CHECK: %12 = affine_apply #map4(%i2, %i3) - // CHECK: vector_transfer_write %cst_10, %1, %12#0, %12#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index - // CHECK: %13 = affine_apply #map5(%i2, %i3) - // CHECK: vector_transfer_write %cst_11, %1, %13#0, %13#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index - // CHECK: %14 = affine_apply #map6(%i2, %i3) - // CHECK: vector_transfer_write %cst_12, %1, %14#0, %14#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index + // CHECK-NEXT: for %i3 = 0 to %arg1 step 16 { + // ..... for %i2 = 0 to %M { for %i3 = 0 to %N { // non-scoped %f2 @@ -73,49 +60,68 @@ func @vector_add_2d(%M : index, %N : index) -> f32 { } // (3x2)x unroll (jammed by construction). // CHECK: for %i4 = 0 to %arg0 step 3 { - // CHECK: for %i5 = 0 to %arg1 step 16 { - // CHECK: %15 = affine_apply #map0(%i4, %i5) - // CHECK: %16 = vector_transfer_read %0, %15#0, %15#1 {permutation_map: #map1} : (memref<?x?xf32>, index, index) -> vector<8xf32> - // CHECK: %17 = affine_apply #map2(%i4, %i5) - // CHECK: %18 = vector_transfer_read %0, %17#0, %17#1 {permutation_map: #map1} : (memref<?x?xf32>, index, index) -> vector<8xf32> - // CHECK: %19 = affine_apply #map3(%i4, %i5) - // CHECK: %20 = vector_transfer_read %0, %19#0, %19#1 {permutation_map: #map1} : (memref<?x?xf32>, index, index) -> vector<8xf32> - // CHECK: %21 = affine_apply #map4(%i4, %i5) - // CHECK: %22 = vector_transfer_read %0, %21#0, %21#1 {permutation_map: #map1} : (memref<?x?xf32>, index, index) -> vector<8xf32> - // CHECK: %23 = affine_apply #map5(%i4, %i5) - // CHECK: %24 = vector_transfer_read %0, %23#0, %23#1 {permutation_map: #map1} : (memref<?x?xf32>, index, index) -> vector<8xf32> - // CHECK: %25 = affine_apply #map6(%i4, %i5) - // CHECK: %26 = vector_transfer_read %0, %25#0, %25#1 {permutation_map: #map1} : (memref<?x?xf32>, index, index) -> vector<8xf32> - // CHECK: %27 = affine_apply #map0(%i4, %i5) - // CHECK: %28 = vector_transfer_read %1, %27#0, %27#1 {permutation_map: #map1} : (memref<?x?xf32>, index, index) -> vector<8xf32> - // CHECK: %29 = affine_apply #map2(%i4, %i5) - // CHECK: %30 = vector_transfer_read %1, %29#0, %29#1 {permutation_map: #map1} : (memref<?x?xf32>, index, index) -> vector<8xf32> - // CHECK: %31 = affine_apply #map3(%i4, %i5) - // CHECK: %32 = vector_transfer_read %1, %31#0, %31#1 {permutation_map: #map1} : (memref<?x?xf32>, index, index) -> vector<8xf32> - // CHECK: %33 = affine_apply #map4(%i4, %i5) - // CHECK: %34 = vector_transfer_read %1, %33#0, %33#1 {permutation_map: #map1} : (memref<?x?xf32>, index, index) -> vector<8xf32> - // CHECK: %35 = affine_apply #map5(%i4, %i5) - // CHECK: %36 = vector_transfer_read %1, %35#0, %35#1 {permutation_map: #map1} : (memref<?x?xf32>, index, index) -> vector<8xf32> - // CHECK: %37 = affine_apply #map6(%i4, %i5) - // CHECK: %38 = vector_transfer_read %1, %37#0, %37#1 {permutation_map: #map1} : (memref<?x?xf32>, index, index) -> vector<8xf32> - // CHECK: %39 = addf %16, %28 : vector<8xf32> - // CHECK: %40 = addf %18, %30 : vector<8xf32> - // CHECK: %41 = addf %20, %32 : vector<8xf32> - // CHECK: %42 = addf %22, %34 : vector<8xf32> - // CHECK: %43 = addf %24, %36 : vector<8xf32> - // CHECK: %44 = addf %26, %38 : vector<8xf32> - // CHECK: %45 = affine_apply #map0(%i4, %i5) - // CHECK: vector_transfer_write %39, %2, %45#0, %45#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index - // CHECK: %46 = affine_apply #map2(%i4, %i5) - // CHECK: vector_transfer_write %40, %2, %46#0, %46#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index - // CHECK: %47 = affine_apply #map3(%i4, %i5) - // CHECK: vector_transfer_write %41, %2, %47#0, %47#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index - // CHECK: %48 = affine_apply #map4(%i4, %i5) - // CHECK: vector_transfer_write %42, %2, %48#0, %48#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index - // CHECK: %49 = affine_apply #map5(%i4, %i5) - // CHECK: vector_transfer_write %43, %2, %49#0, %49#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index - // CHECK: %50 = affine_apply #map6(%i4, %i5) - // CHECK: vector_transfer_write %44, %2, %50#0, %50#1 {permutation_map: #map1} : vector<8xf32>, memref<?x?xf32>, index, index + // CHECK-NEXT: for %i5 = 0 to %arg1 step 16 { + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: {{.*}} = vector_transfer_read + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: {{.*}} = vector_transfer_read + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: {{.*}} = vector_transfer_read + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: {{.*}} = vector_transfer_read + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: {{.*}} = vector_transfer_read + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: {{.*}} = vector_transfer_read + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: {{.*}} = vector_transfer_read + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: {{.*}} = vector_transfer_read + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: {{.*}} = vector_transfer_read + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: {{.*}} = vector_transfer_read + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: {{.*}} = vector_transfer_read + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: {{.*}} = vector_transfer_read + // CHECK-NEXT: {{.*}} = addf {{.*}} : vector<8xf32> + // CHECK-NEXT: {{.*}} = addf {{.*}} : vector<8xf32> + // CHECK-NEXT: {{.*}} = addf {{.*}} : vector<8xf32> + // CHECK-NEXT: {{.*}} = addf {{.*}} : vector<8xf32> + // CHECK-NEXT: {{.*}} = addf {{.*}} : vector<8xf32> + // CHECK-NEXT: {{.*}} = addf {{.*}} : vector<8xf32> + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: vector_transfer_write + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: vector_transfer_write + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: vector_transfer_write + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: vector_transfer_write + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: vector_transfer_write + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: vector_transfer_write + // for %i4 = 0 to %M { for %i5 = 0 to %N { %a5 = load %A[%i4, %i5] : memref<?x?xf32, 0> diff --git a/mlir/test/Transforms/Vectorize/materialize_vectors_2d_to_2d.mlir b/mlir/test/Transforms/Vectorize/materialize_vectors_2d_to_2d.mlir index 6d348c2e2c3..58865df7a9b 100644 --- a/mlir/test/Transforms/Vectorize/materialize_vectors_2d_to_2d.mlir +++ b/mlir/test/Transforms/Vectorize/materialize_vectors_2d_to_2d.mlir @@ -1,10 +1,12 @@ // RUN: mlir-opt %s -vectorize -virtual-vector-size 3 -virtual-vector-size 32 --test-fastest-varying=1 --test-fastest-varying=0 -materialize-vectors -vector-size=3 -vector-size=16 | FileCheck %s -// Capture permutation maps used in vectorization. -// CHECK-DAG: #[[map_proj_d0d1_d0d1:map[0-9]+]] = (d0, d1) -> (d0, d1) - // vector<3x32xf32> -> vector<3x16xf32> -// CHECK-DAG: [[MAP1:#.*]] = (d0, d1) -> (d0, d1 + 16) +// CHECK-DAG: [[D0D1TOD0:#.*]] = (d0, d1) -> (d0) +// CHECK-DAG: [[D0D1TOD1:#.*]] = (d0, d1) -> (d1) +// CHECK-DAG: [[D0D1TOD0D1:#.*]] = (d0, d1) -> (d0, d1) +// CHECK-DAG: [[D0D1TOD1P16:#.*]] = (d0, d1) -> (d1 + 16) + +// CHECK-LABEL: func @vector_add_2d func @vector_add_2d(%M : index, %N : index) -> f32 { %A = alloc (%M, %N) : memref<?x?xf32, 0> %B = alloc (%M, %N) : memref<?x?xf32, 0> @@ -13,13 +15,15 @@ func @vector_add_2d(%M : index, %N : index) -> f32 { %f2 = constant 2.0 : f32 // 2x unroll (jammed by construction). // CHECK: for %i0 = 0 to %arg0 step 3 { - // CHECK: for %i1 = 0 to %arg1 step 32 { - // CHECK: %cst_1 = constant splat<vector<3x16xf32>, 1.000000e+00> : vector<3x16xf32> - // CHECK: %cst_2 = constant splat<vector<3x16xf32>, 1.000000e+00> : vector<3x16xf32> - // CHECK: %3 = affine_apply #map0(%i0, %i1) - // CHECK: vector_transfer_write %cst_1, %0, %3#0, %3#1 {permutation_map: #map0} : vector<3x16xf32>, memref<?x?xf32>, index, index - // CHECK: %4 = affine_apply #map1(%i0, %i1) - // CHECK: vector_transfer_write %cst_2, %0, %4#0, %4#1 {permutation_map: #map0} : vector<3x16xf32>, memref<?x?xf32>, index, index + // CHECK-NEXT: for %i1 = 0 to %arg1 step 32 { + // CHECK-NEXT: {{.*}} = constant splat<vector<3x16xf32>, 1.000000e+00> : vector<3x16xf32> + // CHECK-NEXT: {{.*}} = constant splat<vector<3x16xf32>, 1.000000e+00> : vector<3x16xf32> + // CHECK-NEXT: [[VAL00:%.*]] = affine_apply [[D0D1TOD0]](%i0, %i1) + // CHECK-NEXT: [[VAL01:%.*]] = affine_apply [[D0D1TOD1]](%i0, %i1) + // CHECK-NEXT: vector_transfer_write {{.*}}, {{.*}}, [[VAL00]], [[VAL01]] {permutation_map: [[D0D1TOD0D1]]} : vector<3x16xf32> + // CHECK-NEXT: [[VAL10:%.*]] = affine_apply [[D0D1TOD0]](%i0, %i1) + // CHECK-NEXT: [[VAL11:%.*]] = affine_apply [[D0D1TOD1P16]](%i0, %i1) + // CHECK-NEXT: vector_transfer_write {{.*}}, {{.*}}, [[VAL10]], [[VAL11]] {permutation_map: [[D0D1TOD0D1]]} : vector<3x16xf32> // for %i0 = 0 to %M { for %i1 = 0 to %N { @@ -29,13 +33,15 @@ func @vector_add_2d(%M : index, %N : index) -> f32 { } // 2x unroll (jammed by construction). // CHECK: for %i2 = 0 to %arg0 step 3 { - // CHECK: for %i3 = 0 to %arg1 step 32 { - // CHECK: %cst_3 = constant splat<vector<3x16xf32>, 2.000000e+00> : vector<3x16xf32> - // CHECK: %cst_4 = constant splat<vector<3x16xf32>, 2.000000e+00> : vector<3x16xf32> - // CHECK: %5 = affine_apply #map0(%i2, %i3) - // CHECK: vector_transfer_write %cst_3, %1, %5#0, %5#1 {permutation_map: #map0} : vector<3x16xf32>, memref<?x?xf32>, index, index - // CHECK: %6 = affine_apply #map1(%i2, %i3) - // CHECK: vector_transfer_write %cst_4, %1, %6#0, %6#1 {permutation_map: #map0} : vector<3x16xf32>, memref<?x?xf32>, index, index + // CHECK-NEXT: for %i3 = 0 to %arg1 step 32 { + // CHECK-NEXT: {{.*}} = constant splat<vector<3x16xf32>, 2.000000e+00> : vector<3x16xf32> + // CHECK-NEXT: {{.*}} = constant splat<vector<3x16xf32>, 2.000000e+00> : vector<3x16xf32> + // CHECK-NEXT: [[VAL00:%.*]] = affine_apply [[D0D1TOD0]](%i2, %i3) + // CHECK-NEXT: [[VAL01:%.*]] = affine_apply [[D0D1TOD1]](%i2, %i3) + // CHECK-NEXT: vector_transfer_write {{.*}}, {{.*}}, [[VAL00]], [[VAL01]] {permutation_map: [[D0D1TOD0D1]]} : vector<3x16xf32> + // CHECK-NEXT: [[VAL10:%.*]] = affine_apply [[D0D1TOD0]](%i2, %i3) + // CHECK-NEXT: [[VAL11:%.*]] = affine_apply [[D0D1TOD1P16]](%i2, %i3) + // CHECK-NEXT: vector_transfer_write {{.*}}, {{.*}}, [[VAL10]], [[VAL11]] {permutation_map: [[D0D1TOD0D1]]} : vector<3x16xf32> // for %i2 = 0 to %M { for %i3 = 0 to %N { @@ -45,21 +51,27 @@ func @vector_add_2d(%M : index, %N : index) -> f32 { } // 2x unroll (jammed by construction). // CHECK: for %i4 = 0 to %arg0 step 3 { - // CHECK: for %i5 = 0 to %arg1 step 32 { - // CHECK: %7 = affine_apply #map0(%i4, %i5) - // CHECK: %8 = vector_transfer_read %0, %7#0, %7#1 {permutation_map: #map0} : (memref<?x?xf32>, index, index) -> vector<3x16xf32> - // CHECK: %9 = affine_apply #map1(%i4, %i5) - // CHECK: %10 = vector_transfer_read %0, %9#0, %9#1 {permutation_map: #map0} : (memref<?x?xf32>, index, index) -> vector<3x16xf32> - // CHECK: %11 = affine_apply #map0(%i4, %i5) - // CHECK: %12 = vector_transfer_read %1, %11#0, %11#1 {permutation_map: #map0} : (memref<?x?xf32>, index, index) -> vector<3x16xf32> - // CHECK: %13 = affine_apply #map1(%i4, %i5) - // CHECK: %14 = vector_transfer_read %1, %13#0, %13#1 {permutation_map: #map0} : (memref<?x?xf32>, index, index) -> vector<3x16xf32> - // CHECK: %15 = addf %8, %12 : vector<3x16xf32> - // CHECK: %16 = addf %10, %14 : vector<3x16xf32> - // CHECK: %17 = affine_apply #map0(%i4, %i5) - // CHECK: vector_transfer_write %15, %2, %17#0, %17#1 {permutation_map: #map0} : vector<3x16xf32>, memref<?x?xf32>, index, index - // CHECK: %18 = affine_apply #map1(%i4, %i5) - // CHECK: vector_transfer_write %16, %2, %18#0, %18#1 {permutation_map: #map0} : vector<3x16xf32>, memref<?x?xf32>, index, index + // CHECK-NEXT: for %i5 = 0 to %arg1 step 32 { + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: {{.*}} = vector_transfer_read + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: {{.*}} = vector_transfer_read + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: {{.*}} = vector_transfer_read + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: {{.*}} = vector_transfer_read + // CHECK-NEXT: {{.*}} = addf {{.*}} : vector<3x16xf32> + // CHECK-NEXT: {{.*}} = addf {{.*}} : vector<3x16xf32> + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: vector_transfer_write + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: {{.*}} = affine_apply + // CHECK-NEXT: vector_transfer_write // for %i4 = 0 to %M { for %i5 = 0 to %N { |

