summaryrefslogtreecommitdiffstats
path: root/mlir/test/Transforms/Vectorize
diff options
context:
space:
mode:
authorNicolas Vasilache <ntv@google.com>2018-12-31 09:42:05 -0800
committerjpienaar <jpienaar@google.com>2019-03-29 14:50:23 -0700
commit73f5c9c380105bf99ff66740bc530121ba708648 (patch)
treef4cab1ab46d433bb69be80c9e0911c8cfaf0ab1c /mlir/test/Transforms/Vectorize
parenta250643ec844f1672fe42fc6ddde9504e7e8130f (diff)
downloadbcm5719-llvm-73f5c9c380105bf99ff66740bc530121ba708648.tar.gz
bcm5719-llvm-73f5c9c380105bf99ff66740bc530121ba708648.zip
[MLIR] Sketch a simple set of EDSCs to declaratively write MLIR
This CL introduces a simple set of Embedded Domain-Specific Components (EDSCs) in MLIR components: 1. a `Type` system of shell classes that closely matches the MLIR type system. These types are subdivided into `Bindable` leaf expressions and non-bindable `Expr` expressions; 2. an `MLIREmitter` class whose purpose is to: a. maintain a map of `Bindable` leaf expressions to concrete SSAValue*; b. provide helper functionality to specify bindings of `Bindable` classes to SSAValue* while verifying comformable types; c. traverse the `Expr` and emit the MLIR. This is used on a concrete example to implement MemRef load/store with clipping in the LowerVectorTransfer pass. More specifically, the following pseudo-C++ code: ```c++ MLFuncBuilder *b = ...; Location location = ...; Bindable zero, one, expr, size; // EDSL expression auto access = select(expr < zero, zero, select(expr < size, expr, size - one)); auto ssaValue = MLIREmitter(b) .bind(zero, ...) .bind(one, ...) .bind(expr, ...) .bind(size, ...) .emit(location, access); ``` is used to emit all the MLIR for a clipped MemRef access. This simple EDSL can easily be extended to more powerful patterns and should serve as the counterpart to pattern matchers (and could potentially be unified once we get enough experience). In the future, most of this code should be TableGen'd but for now it has concrete valuable uses: make MLIR programmable in a declarative fashion. This CL also adds Stmt, proper supporting free functions and rewrites VectorTransferLowering fully using EDSCs. The code for creating the EDSCs emitting a VectorTransferReadOp as loops with clipped loads is: ```c++ Stmt block = Block({ tmpAlloc = alloc(tmpMemRefType), vectorView = vector_type_cast(tmpAlloc, vectorMemRefType), ForNest(ivs, lbs, ubs, steps, { scalarValue = load(scalarMemRef, accessInfo.clippedScalarAccessExprs), store(scalarValue, tmpAlloc, accessInfo.tmpAccessExprs), }), vectorValue = load(vectorView, zero), tmpDealloc = dealloc(tmpAlloc.getLHS())}); emitter.emitStmt(block); ``` where `accessInfo.clippedScalarAccessExprs)` is created with: ```c++ select(i + ii < zero, zero, select(i + ii < N, i + ii, N - one)); ``` The generated MLIR resembles: ```mlir %1 = dim %0, 0 : memref<?x?x?x?xf32> %2 = dim %0, 1 : memref<?x?x?x?xf32> %3 = dim %0, 2 : memref<?x?x?x?xf32> %4 = dim %0, 3 : memref<?x?x?x?xf32> %5 = alloc() : memref<5x4x3xf32> %6 = vector_type_cast %5 : memref<5x4x3xf32>, memref<1xvector<5x4x3xf32>> for %i4 = 0 to 3 { for %i5 = 0 to 4 { for %i6 = 0 to 5 { %7 = affine_apply #map0(%i0, %i4) %8 = cmpi "slt", %7, %c0 : index %9 = affine_apply #map0(%i0, %i4) %10 = cmpi "slt", %9, %1 : index %11 = affine_apply #map0(%i0, %i4) %12 = affine_apply #map1(%1, %c1) %13 = select %10, %11, %12 : index %14 = select %8, %c0, %13 : index %15 = affine_apply #map0(%i3, %i6) %16 = cmpi "slt", %15, %c0 : index %17 = affine_apply #map0(%i3, %i6) %18 = cmpi "slt", %17, %4 : index %19 = affine_apply #map0(%i3, %i6) %20 = affine_apply #map1(%4, %c1) %21 = select %18, %19, %20 : index %22 = select %16, %c0, %21 : index %23 = load %0[%14, %i1, %i2, %22] : memref<?x?x?x?xf32> store %23, %5[%i6, %i5, %i4] : memref<5x4x3xf32> } } } %24 = load %6[%c0] : memref<1xvector<5x4x3xf32>> dealloc %5 : memref<5x4x3xf32> ``` In particular notice that only 3 out of the 4-d accesses are clipped: this corresponds indeed to the number of dimensions in the super-vector. This CL also addresses the cleanups resulting from the review of the prevous CL and performs some refactoring to simplify the abstraction. PiperOrigin-RevId: 227367414
Diffstat (limited to 'mlir/test/Transforms/Vectorize')
-rw-r--r--mlir/test/Transforms/Vectorize/lower_vector_transfers.mlir143
1 files changed, 104 insertions, 39 deletions
diff --git a/mlir/test/Transforms/Vectorize/lower_vector_transfers.mlir b/mlir/test/Transforms/Vectorize/lower_vector_transfers.mlir
index b19b9ce7e9a..223f5ebff5e 100644
--- a/mlir/test/Transforms/Vectorize/lower_vector_transfers.mlir
+++ b/mlir/test/Transforms/Vectorize/lower_vector_transfers.mlir
@@ -1,26 +1,55 @@
// RUN: mlir-opt %s -lower-vector-transfers | FileCheck %s
// CHECK: #[[ADD:map[0-9]+]] = (d0, d1) -> (d0 + d1)
-mlfunc @materialize_read(%M : index, %N : index, %O : index, %P : index) {
+// CHECK: #[[SUB:map[0-9]+]] = (d0, d1) -> (d0 - d1)
+// CHECK-LABEL: mlfunc @materialize_read(%arg0: index, %arg1: index, %arg2: index, %arg3: index) {
+mlfunc @materialize_read(%M: index, %N: index, %O: index, %P: index) {
+ // CHECK-NEXT: %0 = alloc(%arg0, %arg1, %arg2, %arg3) : memref<?x?x?x?xf32>
+ // CHECK-NEXT: for %i0 = 0 to %arg0 step 3 {
+ // CHECK-NEXT: for %i1 = 0 to %arg1 {
+ // CHECK-NEXT: for %i2 = 0 to %arg2 {
+ // CHECK-NEXT: for %i3 = 0 to %arg3 step 5 {
+ // CHECK-NEXT: %c0 = constant 0 : index
+ // CHECK-NEXT: %c1 = constant 1 : index
+ // CHECK: %1 = dim %0, 0 : memref<?x?x?x?xf32>
+ // CHECK-NEXT: %2 = dim %0, 1 : memref<?x?x?x?xf32>
+ // CHECK-NEXT: %3 = dim %0, 2 : memref<?x?x?x?xf32>
+ // CHECK-NEXT: %4 = dim %0, 3 : memref<?x?x?x?xf32>
+ // CHECK: %5 = alloc() : memref<5x4x3xf32>
+ // CHECK-NEXT: %6 = vector_type_cast %5 : memref<5x4x3xf32>, memref<1xvector<5x4x3xf32>>
+ // CHECK-NEXT: for %i4 = 0 to 3 {
+ // CHECK-NEXT: for %i5 = 0 to 4 {
+ // CHECK-NEXT: for %i6 = 0 to 5 {
+ // CHECK-NEXT: %7 = affine_apply #[[ADD]](%i0, %i4)
+ // CHECK-NEXT: %8 = cmpi "slt", %7, %c0 : index
+ // CHECK-NEXT: %9 = affine_apply #[[ADD]](%i0, %i4)
+ // CHECK-NEXT: %10 = cmpi "slt", %9, %1 : index
+ // CHECK-NEXT: %11 = affine_apply #[[ADD]](%i0, %i4)
+ // CHECK-NEXT: %12 = affine_apply #[[SUB]](%1, %c1)
+ // CHECK-NEXT: %13 = select %10, %11, %12 : index
+ // CHECK-NEXT: %14 = select %8, %c0, %13 : index
+ // CHECK-NEXT: %15 = affine_apply #[[ADD]](%i3, %i6)
+ // CHECK-NEXT: %16 = cmpi "slt", %15, %c0 : index
+ // CHECK-NEXT: %17 = affine_apply #[[ADD]](%i3, %i6)
+ // CHECK-NEXT: %18 = cmpi "slt", %17, %4 : index
+ // CHECK-NEXT: %19 = affine_apply #[[ADD]](%i3, %i6)
+ // CHECK-NEXT: %20 = affine_apply #[[SUB]](%4, %c1)
+ // CHECK-NEXT: %21 = select %18, %19, %20 : index
+ // CHECK-NEXT: %22 = select %16, %c0, %21 : index
+ // CHECK-NEXT: %23 = load %0[%14, %i1, %i2, %22] : memref<?x?x?x?xf32>
+ // CHECK-NEXT: store %23, %5[%i6, %i5, %i4] : memref<5x4x3xf32>
+ // CHECK-NEXT: }
+ // CHECK-NEXT: }
+ // CHECK-NEXT: }
+ // CHECK-NEXT: %24 = load %6[%c0] : memref<1xvector<5x4x3xf32>>
+ // CHECK-NEXT: dealloc %5 : memref<5x4x3xf32>
+ // CHECK-NEXT: }
+ // CHECK-NEXT: }
+ // CHECK-NEXT: }
+ // CHECK-NEXT: }
+ // CHECK-NEXT: return
+ // CHECK-NEXT:}
%A = alloc (%M, %N, %O, %P) : memref<?x?x?x?xf32, 0>
- // CHECK: for %i0 = 0 to %arg0 step 3 {
- // CHECK-NEXT: for %i1 = 0 to %arg1 {
- // CHECK-NEXT: for %i2 = 0 to %arg2 {
- // CHECK-NEXT: for %i3 = 0 to %arg3 step 5 {
- // CHECK-NEXT: %1 = alloc() : memref<5x4x3xf32>
- // CHECK-NEXT: %2 = vector_type_cast %1 : memref<5x4x3xf32>, memref<1xvector<5x4x3xf32>>
- // CHECK-NEXT: for %i4 = 0 to 5 {
- // CHECK-NEXT: %3 = affine_apply #[[ADD]](%i3, %i4)
- // CHECK-NEXT: for %i5 = 0 to 4 {
- // CHECK-NEXT: for %i6 = 0 to 3 {
- // CHECK-NEXT: %4 = affine_apply #[[ADD]](%i0, %i6)
- // CHECK-NEXT: %5 = load %0[%4, %i1, %i2, %3] : memref<?x?x?x?xf32>
- // CHECK-NEXT: store %5, %1[%i4, %i5, %i6] : memref<5x4x3xf32>
- // CHECK-NEXT: }
- // CHECK-NEXT: }
- // CHECK-NEXT: }
- // CHECK-NEXT: %6 = load %2[%c0] : memref<1xvector<5x4x3xf32>>
- // CHECK-NEXT: dealloc %1 : memref<5x4x3xf32>
for %i0 = 0 to %M step 3 {
for %i1 = 0 to %N {
for %i2 = 0 to %O {
@@ -33,28 +62,64 @@ mlfunc @materialize_read(%M : index, %N : index, %O : index, %P : index) {
return
}
-mlfunc @materialize_write(%M : index, %N : index, %O : index, %P : index) {
+// CHECK-LABEL:mlfunc @materialize_write(%arg0: index, %arg1: index, %arg2: index, %arg3: index) {
+mlfunc @materialize_write(%M: index, %N: index, %O: index, %P: index) {
+ // CHECK-NEXT: %0 = alloc(%arg0, %arg1, %arg2, %arg3) : memref<?x?x?x?xf32>
+ // CHECK-NEXT: %cst = constant splat<vector<5x4x3xf32>, 1.000000e+00> : vector<5x4x3xf32>
+ // CHECK-NEXT: for %i0 = 0 to %arg0 step 3 {
+ // CHECK-NEXT: for %i1 = 0 to %arg1 step 4 {
+ // CHECK-NEXT: for %i2 = 0 to %arg2 {
+ // CHECK-NEXT: for %i3 = 0 to %arg3 step 5 {
+ // CHECK-NEXT: %c0 = constant 0 : index
+ // CHECK-NEXT: %c1 = constant 1 : index
+ // CHECK: %1 = dim %0, 0 : memref<?x?x?x?xf32>
+ // CHECK-NEXT: %2 = dim %0, 1 : memref<?x?x?x?xf32>
+ // CHECK-NEXT: %3 = dim %0, 2 : memref<?x?x?x?xf32>
+ // CHECK-NEXT: %4 = dim %0, 3 : memref<?x?x?x?xf32>
+ // CHECK: %5 = alloc() : memref<5x4x3xf32>
+ // CHECK-NEXT: %6 = vector_type_cast %5 : memref<5x4x3xf32>, memref<1xvector<5x4x3xf32>>
+ // CHECK-NEXT: store %cst, %6[%c0] : memref<1xvector<5x4x3xf32>>
+ // CHECK-NEXT: for %i4 = 0 to 3 {
+ // CHECK-NEXT: for %i5 = 0 to 4 {
+ // CHECK-NEXT: for %i6 = 0 to 5 {
+ // CHECK-NEXT: %7 = load %5[%i6, %i5, %i4] : memref<5x4x3xf32>
+ // CHECK-NEXT: %8 = affine_apply #[[ADD]](%i0, %i4)
+ // CHECK-NEXT: %9 = cmpi "slt", %8, %c0 : index
+ // CHECK-NEXT: %10 = affine_apply #[[ADD]](%i0, %i4)
+ // CHECK-NEXT: %11 = cmpi "slt", %10, %1 : index
+ // CHECK-NEXT: %12 = affine_apply #[[ADD]](%i0, %i4)
+ // CHECK-NEXT: %13 = affine_apply #[[SUB]](%1, %c1)
+ // CHECK-NEXT: %14 = select %11, %12, %13 : index
+ // CHECK-NEXT: %15 = select %9, %c0, %14 : index
+ // CHECK-NEXT: %16 = affine_apply #[[ADD]](%i1, %i5)
+ // CHECK-NEXT: %17 = cmpi "slt", %16, %c0 : index
+ // CHECK-NEXT: %18 = affine_apply #[[ADD]](%i1, %i5)
+ // CHECK-NEXT: %19 = cmpi "slt", %18, %2 : index
+ // CHECK-NEXT: %20 = affine_apply #[[ADD]](%i1, %i5)
+ // CHECK-NEXT: %21 = affine_apply #[[SUB]](%2, %c1)
+ // CHECK-NEXT: %22 = select %19, %20, %21 : index
+ // CHECK-NEXT: %23 = select %17, %c0, %22 : index
+ // CHECK-NEXT: %24 = affine_apply #[[ADD]](%i3, %i6)
+ // CHECK-NEXT: %25 = cmpi "slt", %24, %c0 : index
+ // CHECK-NEXT: %26 = affine_apply #[[ADD]](%i3, %i6)
+ // CHECK-NEXT: %27 = cmpi "slt", %26, %4 : index
+ // CHECK-NEXT: %28 = affine_apply #[[ADD]](%i3, %i6)
+ // CHECK-NEXT: %29 = affine_apply #[[SUB]](%4, %c1)
+ // CHECK-NEXT: %30 = select %27, %28, %29 : index
+ // CHECK-NEXT: %31 = select %25, %c0, %30 : index
+ // CHECK-NEXT: store %7, %0[%15, %23, %i2, %31] : memref<?x?x?x?xf32>
+ // CHECK-NEXT: }
+ // CHECK-NEXT: }
+ // CHECK-NEXT: }
+ // CHECK-NEXT: dealloc %5 : memref<5x4x3xf32>
+ // CHECK-NEXT: }
+ // CHECK-NEXT: }
+ // CHECK-NEXT: }
+ // CHECK-NEXT: }
+ // CHECK-NEXT: return
+ // CHECK-NEXT:}
%A = alloc (%M, %N, %O, %P) : memref<?x?x?x?xf32, 0>
%f1 = constant splat<vector<5x4x3xf32>, 1.000000e+00> : vector<5x4x3xf32>
- // CHECK: for %i0 = 0 to %arg0 step 3 {
- // CHECK-NEXT: for %i1 = 0 to %arg1 step 4 {
- // CHECK-NEXT: for %i2 = 0 to %arg2 {
- // CHECK-NEXT: for %i3 = 0 to %arg3 step 5 {
- // CHECK-NEXT: %1 = alloc() : memref<5x4x3xf32>
- // CHECK-NEXT: %2 = vector_type_cast %1 : memref<5x4x3xf32>, memref<1xvector<5x4x3xf32>>
- // CHECK-NEXT: store %cst, %2[%c0] : memref<1xvector<5x4x3xf32>>
- // CHECK-NEXT: for %i4 = 0 to 5 {
- // CHECK-NEXT: %3 = affine_apply #[[ADD]](%i3, %i4)
- // CHECK-NEXT: for %i5 = 0 to 4 {
- // CHECK-NEXT: %4 = affine_apply #[[ADD]](%i1, %i5)
- // CHECK-NEXT: for %i6 = 0 to 3 {
- // CHECK-NEXT: %5 = affine_apply #[[ADD]](%i0, %i6)
- // CHECK-NEXT: %6 = load %1[%i4, %i5, %i6] : memref<5x4x3xf32>
- // CHECK-NEXT: store %6, %0[%5, %4, %i2, %3] : memref<?x?x?x?xf32>
- // CHECK-NEXT: }
- // CHECK-NEXT: }
- // CHECK-NEXT: }
- // CHECK-NEXT: dealloc %1 : memref<5x4x3xf32>
for %i0 = 0 to %M step 3 {
for %i1 = 0 to %N step 4 {
for %i2 = 0 to %O {
OpenPOWER on IntegriCloud