diff options
21 files changed, 597 insertions, 908 deletions
diff --git a/mlir/g3doc/Dialects/Vector.md b/mlir/g3doc/Dialects/Vector.md index 4607c32fe74..04f5ba71cdb 100644 --- a/mlir/g3doc/Dialects/Vector.md +++ b/mlir/g3doc/Dialects/Vector.md @@ -6,175 +6,9 @@ This dialect provides mid-level abstraction for the MLIR super-vectorizer. ## Operations -### Vector transfers +# To see op documentation -#### `vector.transfer_read` operation - -Syntax: - -``` {.ebnf} -operation ::= ssa-id `=` `vector.transfer_read` ssa-use-list `{` attribute-entry `} :` function-type -``` - -Examples: - -```mlir {.mlir} -// Read the slice `%A[%i0, %i1:%i1+256, %i2:%i2+32]` into vector<32x256xf32> and -// pad with %f0 to handle the boundary case: -%f0 = constant 0.0f : f32 -for %i0 = 0 to %0 { - affine.for %i1 = 0 to %1 step 256 { - affine.for %i2 = 0 to %2 step 32 { - %v = vector.transfer_read %A[%i0, %i1, %i2], (%f0) - {permutation_map: (d0, d1, d2) -> (d2, d1)} : - memref<?x?x?xf32>, vector<32x256xf32> -}}} - -// Read the slice `%A[%i0, %i1]` (i.e. the element `%A[%i0, %i1]`) into -// vector<128xf32>. The underlying implementation will require a 1-D vector -// broadcast: -for %i0 = 0 to %0 { - affine.for %i1 = 0 to %1 { - %3 = vector.transfer_read %A[%i0, %i1] - {permutation_map: (d0, d1) -> (0)} : - memref<?x?xf32>, vector<128xf32> - } -} -``` - -The `vector.transfer_read` performs a blocking read from a slice within a scalar -[MemRef](../LangRef.md#memref-type) supplied as its first operand into a -[vector](../LangRef.md#vector-type) of the same elemental type. The slice is -further defined by a full-rank index within the MemRef, supplied as the operands -`2 .. 1 + rank(memref)`. The permutation_map [attribute](../LangRef.md#attributes) -is an [affine-map](Affine.md#affine-maps) which specifies the transposition on -the slice to match the vector shape. The size of the slice is specified by the -size of the vector, given as the return type. Optionally, an `ssa-value` of the -same elemental type as the MemRef is provided as the last operand to specify -padding in the case of out-of-bounds accesses. Absence of the optional padding -value signifies the `vector.transfer_read` is statically guaranteed to remain -within the MemRef bounds. This operation is called 'read' by opposition to -'load' because the super-vector granularity is generally not representable with -a single hardware register. A `vector.transfer_read` is thus a mid-level -abstraction that supports super-vectorization with non-effecting padding for -full-tile-only code. - -More precisely, let's dive deeper into the permutation_map for the following : - -```mlir {.mlir} -vector.transfer_read %A[%expr1, %expr2, %expr3, %expr4] - { permutation_map : (d0,d1,d2,d3) -> (d2,0,d0) } : - memref<?x?x?x?xf32>, vector<3x4x5xf32> -``` - -This operation always reads a slice starting at `%A[%expr1, %expr2, %expr3, -%expr4]`. The size of the slice is 3 along d2 and 5 along d0, so the slice is: -`%A[%expr1 : %expr1 + 5, %expr2, %expr3:%expr3 + 3, %expr4]` - -That slice needs to be read into a `vector<3x4x5xf32>`. Since the permutation -map is not full rank, there must be a broadcast along vector dimension `1`. - -A notional lowering of vector.transfer_read could generate code resembling: - -```mlir {.mlir} -// %expr1, %expr2, %expr3, %expr4 defined before this point -%tmp = alloc() : vector<3x4x5xf32> -%view_in_tmp = "element_type_cast"(%tmp) : memref<1xvector<3x4x5xf32>> -for %i = 0 to 3 { - affine.for %j = 0 to 4 { - affine.for %k = 0 to 5 { - %a = load %A[%expr1 + %k, %expr2, %expr3 + %i, %expr4] : memref<?x?x?x?xf32> - store %tmp[%i, %j, %k] : vector<3x4x5xf32> -}}} -%c0 = constant 0 : index -%vec = load %view_in_tmp[%c0] : vector<3x4x5xf32> -``` - -On a GPU one could then map `i`, `j`, `k` to blocks and threads. Notice that the -temporary storage footprint is `3 * 5` values but `3 * 4 * 5` values are -actually transferred between `%A` and `%tmp`. - -Alternatively, if a notional vector broadcast operation were available, the -lowered code would resemble: - -```mlir {.mlir} -// %expr1, %expr2, %expr3, %expr4 defined before this point -%tmp = alloc() : vector<3x4x5xf32> -%view_in_tmp = "element_type_cast"(%tmp) : memref<1xvector<3x4x5xf32>> -for %i = 0 to 3 { - affine.for %k = 0 to 5 { - %a = load %A[%expr1 + %k, %expr2, %expr3 + %i, %expr4] : memref<?x?x?x?xf32> - store %tmp[%i, 0, %k] : vector<3x4x5xf32> -}} -%c0 = constant 0 : index -%tmpvec = load %view_in_tmp[%c0] : vector<3x4x5xf32> -%vec = broadcast %tmpvec, 1 : vector<3x4x5xf32> +```sh +mlir-tblgen --gen-op-doc -I /path/to/mlir/include \ +/path/to/mlir/include/mlir/Dialect/VectorOps/VectorOps.td ``` - -where `broadcast` broadcasts from element 0 to all others along the specified -dimension. This time, the temporary storage footprint is `3 * 5` values which is -the same amount of data as the `3 * 5` values transferred. An additional `1` -broadcast is required. On a GPU this broadcast could be implemented using a -warp-shuffle if loop `j` were mapped to `threadIdx.x`. - -#### `vector.transfer_write` operation - -Syntax: - -``` {.ebnf} -operation ::= `vector.transfer_write` ssa-use-list `{` attribute-entry `} :` vector-type ', ' memref-type ', ' index-type-list -``` - -Examples: - -```mlir {.mlir} -// write vector<16x32x64xf32> into the slice `%A[%i0, %i1:%i1+32, %i2:%i2+64, %i3:%i3+16]`: -for %i0 = 0 to %0 { - affine.for %i1 = 0 to %1 step 32 { - affine.for %i2 = 0 to %2 step 64 { - affine.for %i3 = 0 to %3 step 16 { - %val = `ssa-value` : vector<16x32x64xf32> - vector.transfer_write %val, %A[%i0, %i1, %i2, %i3] - {permutation_map: (d0, d1, d2, d3) -> (d3, d1, d2)} : - vector<16x32x64xf32>, memref<?x?x?x?xf32> -}}}} -``` - -The `vector.transfer_write` performs a blocking write from a -[vector](../LangRef.md#vector-type), supplied as its first operand, into a slice -within a scalar [MemRef](../LangRef.md#memref-type) of the same elemental type, -supplied as its second operand. The slice is further defined by a full-rank -index within the MemRef, supplied as the operands `3 .. 2 + rank(memref)`. The -permutation_map [attribute](../LangRef.md#attributes) is an -[affine-map](Affine.md#affine-maps) which specifies the transposition on the -slice to match the vector shape. The size of the slice is specified by the size -of the vector. This operation is called 'write' by opposition to 'store' because -the super-vector granularity is generally not representable with a single -hardware register. A `vector.transfer_write` is thus a mid-level abstraction -that supports super-vectorization with non-effecting padding for full-tile-only -code. It is the responsibility of `vector.transfer_write`'s implementation to -ensure the memory writes are valid. Different lowerings may be pertinent -depending on the hardware support. - -### Vector views - -#### `vector.type_cast` operation - -Syntax: - -``` {.ebnf} -operation ::= `vector.type_cast` ssa-use : memref-type, memref-type -``` - -Examples: - -```mlir - %A = alloc() : memref<5x4x3xf32> - %VA = vector.type_cast %A : memref<5x4x3xf32>, memref<1xvector<5x4x3xf32>> -``` - -The `vector.type_cast` operation performs a conversion from a memref with scalar -element to memref with a *single* vector element, copying the shape of the -memref to the vector. This is the minimal viable operation that is required to -make super-vectorization operational. It can be seen as a special case of the -`view` operation but scoped in the super-vectorization context. diff --git a/mlir/include/mlir/Dialect/VectorOps/VectorOps.h b/mlir/include/mlir/Dialect/VectorOps/VectorOps.h index 1d29567a1d6..668eaa5c9d5 100644 --- a/mlir/include/mlir/Dialect/VectorOps/VectorOps.h +++ b/mlir/include/mlir/Dialect/VectorOps/VectorOps.h @@ -15,8 +15,7 @@ // limitations under the License. // ============================================================================= // -// This file defines convenience types for working with super-vectorization -// operations, in particular super-vector loads and stores. +// This file defines the Vector dialect. // //===----------------------------------------------------------------------===// @@ -31,178 +30,13 @@ namespace mlir { namespace vector { -/// Dialect for super-vectorization Ops. +/// Dialect for Ops on higher-dimensional vector types. class VectorOpsDialect : public Dialect { public: VectorOpsDialect(MLIRContext *context); static StringRef getDialectNamespace() { return "vector"; } }; -/// VectorTransferReadOp performs a blocking read from a scalar memref -/// location into a super-vector of the same elemental type. This operation is -/// called 'read' by opposition to 'load' because the super-vector granularity -/// is generally not representable with a single hardware register. As a -/// consequence, memory transfers will generally be required when lowering -/// VectorTransferReadOp. A VectorTransferReadOp is thus a mid-level abstraction -/// that supports super-vectorization with non-effecting padding for full-tile -/// only code. -// -/// A vector transfer read has semantics similar to a vector load, with -/// additional support for: -/// 1. an optional value of the elemental type of the MemRef. This value -/// supports non-effecting padding and is inserted in places where the -/// vector read exceeds the MemRef bounds. If the value is not specified, -/// the access is statically guaranteed to be within bounds; -/// 2. an attribute of type AffineMap to specify a slice of the original -/// MemRef access and its transposition into the super-vector shape. -/// The permutation_map is an AffineMap that must represent a permutation -/// from the MemRef dim space projected onto the vector dim space. -/// This permutation_map has as many output dimensions as the vector rank. -/// However, it is not necessarily full rank on the target space to signify -/// that broadcast operations will be needed along certain vector -/// dimensions. -/// In the limit, one may load a 0-D slice of a memref (i.e. a single -/// value) into a vector, which corresponds to broadcasting that value in -/// the whole vector (i.e. a non-constant splat). -/// -/// Example with full rank permutation_map: -/// ```mlir -/// %A = alloc(%size1, %size2, %size3, %size4) : memref<?x?x?x?xf32> -/// ... -/// %val = `ssa-value` : f32 -/// // let %i, %j, %k, %l be ssa-values of type index -/// %v0 = vector.transfer_read %src[%i, %j, %k, %l] -/// {permutation_map: (d0, d1, d2, d3) -> (d3, d1, d2)} : -/// memref<?x?x?x?xf32>, vector<16x32x64xf32> -/// %v1 = vector.transfer_read %src[%i, %j, %k, %l], (%val) -/// {permutation_map: (d0, d1, d2, d3) -> (d3, d1, d2)} : -/// memref<?x?x?x?xf32>, vector<16x32x64xf32> -/// ``` -/// -/// Example with partial rank permutation_map: -/// ```mlir -/// %c0 = constant 0 : index -/// %A = alloc(%size1, %size2, %size3, %size4) : memref<?x?x?x?xf32> -/// ... -/// // let %i, %j be ssa-values of type index -/// %v0 = vector.transfer_read %src[%i, %c0, %c0, %c0] -/// {permutation_map: (d0, d1, d2, d3) -> (0, d1, 0)} : -/// memref<?x?x?x?xf32>, vector<16x32x64xf32> -class VectorTransferReadOp - : public Op<VectorTransferReadOp, OpTrait::VariadicOperands, - OpTrait::OneResult> { - enum Offsets : unsigned { MemRefOffset = 0, FirstIndexOffset = 1 }; - -public: - using Op::Op; - - static StringRef getOperationName() { return "vector.transfer_read"; } - static StringRef getPermutationMapAttrName() { return "permutation_map"; } - static void build(Builder *builder, OperationState &result, - VectorType vectorType, Value *srcMemRef, - ArrayRef<Value *> srcIndices, AffineMap permutationMap, - Optional<Value *> paddingValue = None); - VectorType getResultType() { - return getResult()->getType().cast<VectorType>(); - } - Value *getVector() { return getResult(); } - Value *getMemRef() { return getOperand(Offsets::MemRefOffset); } - VectorType getVectorType() { return getResultType(); } - MemRefType getMemRefType() { - return getMemRef()->getType().cast<MemRefType>(); - } - operand_range getIndices(); - Optional<Value *> getPaddingValue(); - AffineMap getPermutationMap(); - - static ParseResult parse(OpAsmParser &parser, OperationState &result); - void print(OpAsmPrinter &p); - LogicalResult verify(); -}; - -/// VectorTransferWriteOp performs a blocking write from a super-vector to -/// a scalar memref of the same elemental type. This operation is -/// called 'write' by opposition to 'store' because the super-vector granularity -/// is generally not representable with a single hardware register. As a -/// consequence, memory transfers will generally be required when lowering -/// VectorTransferWriteOp. A VectorTransferWriteOp is thus a mid-level -/// abstraction that supports super-vectorization with non-effecting padding for -/// full-tile only code. -/// -/// A vector transfer write has semantics similar to a vector store, with -/// additional support for handling out-of-bounds situations. It is the -/// responsibility of vector.transfer_write's implementation to ensure the -/// memory writes are valid. Different implementations may be pertinent -/// depending on the hardware support including: -/// 1. predication; -/// 2. explicit control-flow; -/// 3. Read-Modify-Write; -/// 4. writing out of bounds of the memref when the allocation allows it. -/// -/// Example: -/// ```mlir -/// %A = alloc(%size1, %size2, %size3, %size4) : memref<?x?x?x?xf32>. -/// %val = `ssa-value` : vector<16x32x64xf32> -/// // let %i, %j, %k, %l be ssa-values of type index -/// vector.transfer_write %val, %src[%i, %j, %k, %l] -/// {permutation_map: (d0, d1, d2, d3) -> (d3, d1, d2)} : -/// vector<16x32x64xf32>, memref<?x?x?x?xf32> -/// ``` -class VectorTransferWriteOp - : public Op<VectorTransferWriteOp, OpTrait::VariadicOperands, - OpTrait::ZeroResult> { - enum Offsets : unsigned { - VectorOffset = 0, - MemRefOffset = 1, - FirstIndexOffset = 2 - }; - -public: - using Op::Op; - - static StringRef getOperationName() { return "vector.transfer_write"; } - static StringRef getPermutationMapAttrName() { return "permutation_map"; } - static void build(Builder *builder, OperationState &result, Value *srcVector, - Value *dstMemRef, ArrayRef<Value *> dstIndices, - AffineMap permutationMap); - Value *getVector() { return getOperand(Offsets::VectorOffset); } - VectorType getVectorType() { - return getVector()->getType().cast<VectorType>(); - } - Value *getMemRef() { return getOperand(Offsets::MemRefOffset); } - MemRefType getMemRefType() { - return getMemRef()->getType().cast<MemRefType>(); - } - operand_range getIndices(); - AffineMap getPermutationMap(); - - static ParseResult parse(OpAsmParser &parser, OperationState &result); - void print(OpAsmPrinter &p); - LogicalResult verify(); -}; - -/// VectorTypeCastOp performs a conversion from a memref with scalar element to -/// memref with vector element, copying the shape of the memref to the vector. -/// -/// Example: -/// -/// ```mlir -/// %A = alloc() : memref<5x4x3xf32> -/// %VA = vector.type_cast %A : memref<5x4x3xf32>, memref<1xvector<5x4x3xf32>> -/// ``` -class VectorTypeCastOp - : public Op<VectorTypeCastOp, OpTrait::OneOperand, OpTrait::OneResult> { -public: - using Op::Op; - - static StringRef getOperationName() { return "vector.type_cast"; } - static void build(Builder *builder, OperationState &result, Value *srcVector, - Type dstType); - static ParseResult parse(OpAsmParser &parser, OperationState &result); - void print(OpAsmPrinter &p); - LogicalResult verify(); -}; - #define GET_OP_CLASSES #include "mlir/Dialect/VectorOps/VectorOps.h.inc" diff --git a/mlir/include/mlir/Dialect/VectorOps/VectorOps.td b/mlir/include/mlir/Dialect/VectorOps/VectorOps.td index 032312e72cf..125ecac57d8 100644 --- a/mlir/include/mlir/Dialect/VectorOps/VectorOps.td +++ b/mlir/include/mlir/Dialect/VectorOps/VectorOps.td @@ -26,6 +26,10 @@ include "mlir/IR/OpBase.td" #endif // OP_BASE +#ifndef AFFINE_OPS_BASE +include "mlir/Dialect/AffineOps/AffineOpsBase.td" +#endif // AFFINE_OPS_BASE + def Vector_Dialect : Dialect { let name = "vector"; let cppNamespace = "vector"; @@ -68,6 +72,7 @@ def ExtractElementOp : } }]; } + def OuterProductOp : Vector_Op<"outerproduct", [NoSideEffect, SameOperandsAndResultElementType]>, Arguments<(ins AnyVector:$lhs, AnyVector:$rhs, Variadic<AnyVector>:$acc)>, @@ -106,4 +111,239 @@ def OuterProductOp : } }]; } + +def VectorTransferReadOp : + Vector_Op<"transfer_read">, + Arguments<(ins AnyMemRef:$memref, Variadic<Index>:$indices, + AffineMapAttr:$permutation_map, AnyType:$padding)>, + Results<(outs AnyVector:$vector)> { + + let summary = "Reads a supervector from memory into an SSA vector value."; + + let description = [{ + The `vector.transfer_read` op performs a blocking read from a slice within + a scalar [MemRef](../LangRef.md#memref-type) supplied as its first operand + into a [vector](../LangRef.md#vector-type) of the same elemental type. The + slice is further defined by a full-rank index within the MemRef, supplied as + the operands `2 .. 1 + rank(memref)`. The permutation_map + [attribute](../LangRef.md#attributes) is an + [affine-map](Affine.md#affine-maps) which specifies the transposition on the + slice to match the vector shape. The size of the slice is specified by the + size of the vector, given as the return type. An `ssa-value` of the same + elemental type as the MemRef is provided as the last operand to specify + padding in the case of out-of-bounds accesses. This operation is called + 'read' by opposition to 'load' because the super-vector granularity is + generally not representable with a single hardware register. + A `vector.transfer_read` is thus a mid-level + abstraction that supports super-vectorization with non-effecting padding for + full-tile-only code. + + More precisely, let's dive deeper into the permutation_map for the following + MLIR: + + ```mlir {.mlir} + vector.transfer_read %A[%expr1, %expr2, %expr3, %expr4] + { permutation_map : (d0,d1,d2,d3) -> (d2,0,d0) } : + memref<?x?x?x?xf32>, vector<3x4x5xf32> + ``` + + This operation always reads a slice starting at `%A[%expr1, %expr2, %expr3, + %expr4]`. The size of the slice is 3 along d2 and 5 along d0, so the slice + is: `%A[%expr1 : %expr1 + 5, %expr2, %expr3:%expr3 + 3, %expr4]` + + That slice needs to be read into a `vector<3x4x5xf32>`. Since the + permutation map is not full rank, there must be a broadcast along vector + dimension `1`. + + A notional lowering of vector.transfer_read could generate code resembling: + + ```mlir + // %expr1, %expr2, %expr3, %expr4 defined before this point + %tmp = alloc() : vector<3x4x5xf32> + %view_in_tmp = "element_type_cast"(%tmp) : memref<1xvector<3x4x5xf32>> + for %i = 0 to 3 { + affine.for %j = 0 to 4 { + affine.for %k = 0 to 5 { + %a = load %A[%expr1 + %k, %expr2, %expr3 + %i, %expr4] : + memref<?x?x?x?xf32> + store %tmp[%i, %j, %k] : vector<3x4x5xf32> + }}} + %c0 = constant 0 : index + %vec = load %view_in_tmp[%c0] : vector<3x4x5xf32> + ``` + + On a GPU one could then map `i`, `j`, `k` to blocks and threads. Notice that + the temporary storage footprint is `3 * 5` values but `3 * 4 * 5` values are + actually transferred between `%A` and `%tmp`. + + Alternatively, if a notional vector broadcast operation were available, the + lowered code would resemble: + + ```mlir + // %expr1, %expr2, %expr3, %expr4 defined before this point + %tmp = alloc() : vector<3x4x5xf32> + %view_in_tmp = "element_type_cast"(%tmp) : memref<1xvector<3x4x5xf32>> + for %i = 0 to 3 { + affine.for %k = 0 to 5 { + %a = load %A[%expr1 + %k, %expr2, %expr3 + %i, %expr4] : + memref<?x?x?x?xf32> + store %tmp[%i, 0, %k] : vector<3x4x5xf32> + }} + %c0 = constant 0 : index + %tmpvec = load %view_in_tmp[%c0] : vector<3x4x5xf32> + %vec = broadcast %tmpvec, 1 : vector<3x4x5xf32> + ``` + + where `broadcast` broadcasts from element 0 to all others along the + specified dimension. This time, the temporary storage footprint is `3 * 5` + values which is the same amount of data as the `3 * 5` values transferred. + An additional `1` broadcast is required. On a GPU this broadcast could be + implemented using a warp-shuffle if loop `j` were mapped to `threadIdx.x`. + + Syntax + ``` {.ebnf} + operation ::= ssa-id `=` `vector.transfer_read` ssa-use-list + `{` attribute-entry `} :` memref-type `,` vector-type + ``` + + Examples: + + ```mlir + // Read the slice `%A[%i0, %i1:%i1+256, %i2:%i2+32]` into vector<32x256xf32> + // and pad with %f0 to handle the boundary case: + %f0 = constant 0.0f : f32 + for %i0 = 0 to %0 { + affine.for %i1 = 0 to %1 step 256 { + affine.for %i2 = 0 to %2 step 32 { + %v = vector.transfer_read %A[%i0, %i1, %i2], (%f0) + {permutation_map: (d0, d1, d2) -> (d2, d1)} : + memref<?x?x?xf32>, vector<32x256xf32> + }}} + + // Read the slice `%A[%i0, %i1]` (i.e. the element `%A[%i0, %i1]`) into + // vector<128xf32>. The underlying implementation will require a 1-D vector + // broadcast: + for %i0 = 0 to %0 { + affine.for %i1 = 0 to %1 { + %3 = vector.transfer_read %A[%i0, %i1] + {permutation_map: (d0, d1) -> (0)} : + memref<?x?xf32>, vector<128xf32> + } + } + ``` + }]; + + let extraClassDeclaration = [{ + MemRefType getMemRefType() { + return memref()->getType().cast<MemRefType>(); + } + VectorType getVectorType() { + return vector()->getType().cast<VectorType>(); + } + }]; +} + +def VectorTransferWriteOp : + Vector_Op<"transfer_write">, + Arguments<(ins AnyVector:$vector, AnyMemRef:$memref, + Variadic<Index>:$indices, + AffineMapAttr:$permutation_map)> { + + let summary = "The vector.transfer_write op writes a supervector to memory."; + + let description = [{ + The `vector.transfer_write` performs a blocking write from a + [vector](../LangRef.md#vector-type), supplied as its first operand, into a + slice within a scalar [MemRef](../LangRef.md#memref-type) of the same + elemental type, supplied as its second operand. The slice is further defined + by a full-rank index within the MemRef, supplied as the operands + `3 .. 2 + rank(memref)`. + The permutation_map [attribute](../LangRef.md#attributes) is an + [affine-map](Affine.md#affine-maps) which specifies the transposition on the + slice to match the vector shape. The size of the slice is specified by the + size of the vector. This operation is called 'write' by opposition to + 'store' because the super-vector granularity is generally not representable + with a single hardware register. A `vector.transfer_write` is thus a + mid-level abstraction that supports super-vectorization with non-effecting + padding for full-tile-only code. It is the responsibility of + `vector.transfer_write`'s implementation to ensure the memory writes are + valid. Different lowerings may be pertinent depending on the hardware + support. + + Syntax: + + ``` {.ebnf} + operation ::= `vector.transfer_write` ssa-use-list `{` attribute-entry `} : + ` vector-type ', ' memref-type ' + ``` + + Examples: + + ```mlir {.mlir} + // write vector<16x32x64xf32> into the slice + // `%A[%i0, %i1:%i1+32, %i2:%i2+64, %i3:%i3+16]`: + for %i0 = 0 to %0 { + affine.for %i1 = 0 to %1 step 32 { + affine.for %i2 = 0 to %2 step 64 { + affine.for %i3 = 0 to %3 step 16 { + %val = `ssa-value` : vector<16x32x64xf32> + vector.transfer_write %val, %A[%i0, %i1, %i2, %i3] + {permutation_map: (d0, d1, d2, d3) -> (d3, d1, d2)} : + vector<16x32x64xf32>, memref<?x?x?x?xf32> + }}}} + ``` + }]; + + let extraClassDeclaration = [{ + VectorType getVectorType() { + return vector()->getType().cast<VectorType>(); + } + MemRefType getMemRefType() { + return memref()->getType().cast<MemRefType>(); + } + }]; +} + +def VectorTypeCastOp : + Vector_Op<"type_cast", [NoSideEffect]>, + Arguments<(ins StaticShapeMemRefOf<[AnyType]>:$memref)>, + Results<(outs AnyMemRef)> { + let summary = "type_cast op converts a scalar memref to a vector memref"; + let description = [{ + Performs a conversion from a memref with scalar element to a memref with a + *single* vector element, copying the shape of the memref to the vector. This + is the minimal viable operation that is required to makeke + super-vectorization operational. It can be seen as a special case of the + `view` operation but scoped in the super-vectorization context. + + Syntax: + + ``` {.ebnf} + operation ::= `vector.type_cast` ssa-use : memref-type to memref-type + ``` + + Example: + + ```mlir + %A = alloc() : memref<5x4x3xf32> + %VA = vector.type_cast %A : memref<5x4x3xf32> to memref<vector<5x4x3xf32>> + ``` + }]; + + let builders = [OpBuilder< + "Builder *builder, OperationState &result, Value *source">]; + + let parser = [{ + return impl::parseCastOp(parser, result); + }]; + + let extraClassDeclaration = [{ + MemRefType getMemRefType() { + return memref()->getType().cast<MemRefType>(); + } + MemRefType getResultMemRefType() { + return getResult()->getType().cast<MemRefType>(); + } + }]; +} #endif // VECTOR_OPS diff --git a/mlir/lib/Analysis/VectorAnalysis.cpp b/mlir/lib/Analysis/VectorAnalysis.cpp index e765ce35e74..2dab3481e56 100644 --- a/mlir/lib/Analysis/VectorAnalysis.cpp +++ b/mlir/lib/Analysis/VectorAnalysis.cpp @@ -195,7 +195,7 @@ bool mlir::matcher::operatesOnSuperVectorsOf(Operation &op, (void)mustDivide; VectorType superVectorType; if (auto read = dyn_cast<vector::VectorTransferReadOp>(op)) { - superVectorType = read.getResultType(); + superVectorType = read.getVectorType(); mustDivide = true; } else if (auto write = dyn_cast<vector::VectorTransferWriteOp>(op)) { superVectorType = write.getVectorType(); diff --git a/mlir/lib/Conversion/VectorToLLVM/VectorToLLVM.cpp b/mlir/lib/Conversion/VectorToLLVM/VectorToLLVM.cpp index 5ccf740f2fb..21bcdc9a6db 100644 --- a/mlir/lib/Conversion/VectorToLLVM/VectorToLLVM.cpp +++ b/mlir/lib/Conversion/VectorToLLVM/VectorToLLVM.cpp @@ -196,10 +196,10 @@ public: int64_t offset; SmallVector<int64_t, 4> strides; auto successStrides = - getStridesAndOffset(targetMemRefType, strides, offset); + getStridesAndOffset(sourceMemRefType, strides, offset); bool isContiguous = (strides.back() == 1); if (isContiguous) { - auto sizes = targetMemRefType.getShape(); + auto sizes = sourceMemRefType.getShape(); for (int index = 0, e = strides.size() - 2; index < e; ++index) { if (strides[index] != strides[index + 1] * sizes[index + 1]) { isContiguous = false; @@ -207,7 +207,7 @@ public: } } } - // Only contiguous tensors supported atm. + // Only contiguous source tensors supported atm. if (failed(successStrides) || !isContiguous) return matchFailure(); diff --git a/mlir/lib/Dialect/VectorOps/VectorOps.cpp b/mlir/lib/Dialect/VectorOps/VectorOps.cpp index 8626f241955..215e92d0947 100644 --- a/mlir/lib/Dialect/VectorOps/VectorOps.cpp +++ b/mlir/lib/Dialect/VectorOps/VectorOps.cpp @@ -37,8 +37,6 @@ using namespace mlir::vector; mlir::vector::VectorOpsDialect::VectorOpsDialect(MLIRContext *context) : Dialect(getDialectNamespace(), context) { - addOperations<VectorTransferReadOp, VectorTransferWriteOp, - VectorTypeCastOp>(); addOperations< #define GET_OP_LIST #include "mlir/Dialect/VectorOps/VectorOps.cpp.inc" @@ -195,354 +193,165 @@ static LogicalResult verifyPermutationMap(AffineMap permutationMap, return success(); } -void VectorTransferReadOp::build(Builder *builder, OperationState &result, - VectorType vectorType, Value *srcMemRef, - ArrayRef<Value *> srcIndices, - AffineMap permutationMap, - Optional<Value *> paddingValue) { - result.addOperands(srcMemRef); - result.addOperands(srcIndices); - if (paddingValue) { - result.addOperands({*paddingValue}); - } - result.addAttribute(getPermutationMapAttrName(), - AffineMapAttr::get(permutationMap)); - result.addTypes(vectorType); -} - -auto VectorTransferReadOp::getIndices() -> operand_range { - auto begin = getOperation()->operand_begin() + Offsets::FirstIndexOffset; - auto end = begin + getMemRefType().getRank(); - return {begin, end}; -} - -Optional<Value *> VectorTransferReadOp::getPaddingValue() { - auto memRefRank = getMemRefType().getRank(); - if (getNumOperands() <= Offsets::FirstIndexOffset + memRefRank) { - return None; - } - return Optional<Value *>(getOperand(Offsets::FirstIndexOffset + memRefRank)); -} - -AffineMap VectorTransferReadOp::getPermutationMap() { - return getAttrOfType<AffineMapAttr>(getPermutationMapAttrName()).getValue(); -} - -void VectorTransferReadOp::print(OpAsmPrinter &p) { - p << getOperationName() << " "; - p.printOperand(getMemRef()); +static void print(OpAsmPrinter &p, VectorTransferReadOp op) { + p << op.getOperationName() << " "; + p.printOperand(op.memref()); p << "["; - p.printOperands(getIndices()); - p << "]"; - auto optionalPaddingValue = getPaddingValue(); - if (optionalPaddingValue) { - p << ", ("; - p.printOperand(*optionalPaddingValue); - p << ")"; - } - p.printOptionalAttrDict(getAttrs()); - p << " : " << getMemRefType(); - p << ", " << getResultType(); + p.printOperands(op.indices()); + p << "], "; + p.printOperand(op.padding()); + p << " "; + p.printOptionalAttrDict(op.getAttrs()); + p << " : " << op.getMemRefType(); + p << ", " << op.getVectorType(); } -ParseResult VectorTransferReadOp::parse(OpAsmParser &parser, - OperationState &result) { +ParseResult parseVectorTransferReadOp(OpAsmParser &parser, + OperationState &result) { + llvm::SMLoc typesLoc; OpAsmParser::OperandType memrefInfo; SmallVector<OpAsmParser::OperandType, 8> indexInfo; - SmallVector<OpAsmParser::OperandType, 8> paddingInfo; + OpAsmParser::OperandType paddingInfo; SmallVector<Type, 2> types; - // Parsing with support for optional paddingValue. if (parser.parseOperand(memrefInfo) || parser.parseOperandList(indexInfo, OpAsmParser::Delimiter::Square) || - parser.parseTrailingOperandList(paddingInfo, - OpAsmParser::Delimiter::Paren) || + parser.parseComma() || parser.parseOperand(paddingInfo) || parser.parseOptionalAttrDict(result.attributes) || - parser.parseColonTypeList(types)) + parser.getCurrentLocation(&typesLoc) || parser.parseColonTypeList(types)) return failure(); - - // Resolution. if (types.size() != 2) - return parser.emitError(parser.getNameLoc(), "expected 2 types"); - MemRefType memrefType = types[0].dyn_cast<MemRefType>(); - if (!memrefType) - return parser.emitError(parser.getNameLoc(), "memRef type expected"); - VectorType vectorType = types[1].dyn_cast<VectorType>(); - if (!vectorType) - return parser.emitError(parser.getNameLoc(), "vector type expected"); - - // Extract optional paddingValue. - // At this point, indexInfo may contain the optional paddingValue, pop it - // out. - if (static_cast<int64_t>(indexInfo.size()) != memrefType.getRank()) - return parser.emitError(parser.getNameLoc(), - "expected " + Twine(memrefType.getRank()) + - " indices to the memref"); - if (paddingInfo.size() > 1) - return parser.emitError(parser.getNameLoc(), - "expected at most one padding value"); - Type paddingType; - bool hasOptionalPaddingValue = !paddingInfo.empty(); - if (hasOptionalPaddingValue) { - paddingType = vectorType.getElementType(); - } + return parser.emitError(typesLoc, "two types required"); auto indexType = parser.getBuilder().getIndexType(); + MemRefType memRefType = types[0].dyn_cast<MemRefType>(); + if (!memRefType) + return parser.emitError(typesLoc, "memref type required"), failure(); + Type vectorType = types[1]; return failure( - parser.resolveOperand(memrefInfo, memrefType, result.operands) || + parser.resolveOperand(memrefInfo, memRefType, result.operands) || parser.resolveOperands(indexInfo, indexType, result.operands) || - (hasOptionalPaddingValue && - parser.resolveOperand(paddingInfo[0], paddingType, result.operands)) || + parser.resolveOperand(paddingInfo, memRefType.getElementType(), + result.operands) || parser.addTypeToList(vectorType, result.types)); } -LogicalResult VectorTransferReadOp::verify() { - // Consistency of memref type in function type. - if (llvm::empty(getOperands())) { - return emitOpError( - "requires at least a memref operand followed by 'rank' indices"); - } - if (!getMemRef()->getType().isa<MemRefType>()) { - return emitOpError("requires a memref as first operand"); - } - // Consistency of vector type in function type. - if (!getResult()->getType().isa<VectorType>()) { - return emitOpError("should have a vector result type in function type: " - "memref_type<...xelemental_type>, vector_type"); - } +static LogicalResult verify(VectorTransferReadOp op) { // Consistency of elemental types in memref and vector. - MemRefType memrefType = getMemRefType(); - VectorType vectorType = getResultType(); + MemRefType memrefType = op.getMemRefType(); + VectorType vectorType = op.getVectorType(); if (memrefType.getElementType() != vectorType.getElementType()) - return emitOpError( + return op.emitOpError( "requires memref and vector types of the same elemental type"); - // Consistency of number of input types. - auto optionalPaddingValue = getPaddingValue(); - unsigned expectedNumOperands = Offsets::FirstIndexOffset + - memrefType.getRank() + - (optionalPaddingValue ? 1 : 0); - // Checks on the actual operands and their types. - if (getNumOperands() != expectedNumOperands) { - return emitOpError("expects ") - << expectedNumOperands << " operands (of which " - << memrefType.getRank() << " indices)"; - } - // Consistency of padding value with vector type. - if (optionalPaddingValue) { - auto paddingValue = *optionalPaddingValue; - auto elementalType = paddingValue->getType(); - if (!VectorType::isValidElementType(elementalType)) { - return emitOpError("requires valid padding vector elemental type"); - } - if (elementalType != vectorType.getElementType()) { - return emitOpError( - "requires formal padding and vector of the same elemental type"); - } - } - // Consistency of indices types. - unsigned numIndices = 0; - for (auto *idx : getIndices()) { - if (!idx->getType().isIndex()) { - return emitOpError( - "index to vector.transfer_read must have 'index' type"); - } - ++numIndices; - } - if (numIndices != memrefType.getRank()) { - return emitOpError("requires at least a memref operand followed by ") - << memrefType.getRank() << " indices"; - } - - // Consistency of AffineMap attribute. - if (!getAttrOfType<AffineMapAttr>(getPermutationMapAttrName())) { - return emitOpError("requires an AffineMapAttr named 'permutation_map'"); - } - auto permutationMap = getPermutationMap(); - if (permutationMap.getNumSymbols() != 0) { - return emitOpError("requires a permutation_map without symbols"); - } - if (permutationMap.getNumInputs() != memrefType.getRank()) { - return emitOpError("requires a permutation_map with input dims of the " - "same rank as the memref type"); - } - if (permutationMap.getNumResults() != vectorType.getRank()) { - return emitOpError("requires a permutation_map with result dims of the " - "same rank as the vector type (") - << permutationMap.getNumResults() << " vs " << vectorType.getRank(); - } + auto elementalType = op.padding()->getType(); + if (!VectorType::isValidElementType(elementalType)) + return op.emitOpError("requires valid padding vector elemental type"); + if (elementalType != vectorType.getElementType()) + return op.emitOpError( + "requires formal padding and vector of the same elemental type"); + if (llvm::size(op.indices()) != memrefType.getRank()) + return op.emitOpError("requires ") << memrefType.getRank() << " indices"; + auto permutationMap = op.permutation_map(); + if (permutationMap.getNumSymbols() != 0) + return op.emitOpError("requires permutation_map without symbols"); + if (permutationMap.getNumInputs() != memrefType.getRank()) + return op.emitOpError("requires a permutation_map with input dims of the " + "same rank as the memref type"); + if (permutationMap.getNumResults() != vectorType.getRank()) + return op.emitOpError("requires a permutation_map with result dims of the " + "same rank as the vector type"); return verifyPermutationMap(permutationMap, - [this](Twine t) { return emitOpError(t); }); + [&op](Twine t) { return op.emitOpError(t); }); } //===----------------------------------------------------------------------===// // VectorTransferWriteOp //===----------------------------------------------------------------------===// -void VectorTransferWriteOp::build(Builder *builder, OperationState &result, - Value *srcVector, Value *dstMemRef, - ArrayRef<Value *> dstIndices, - AffineMap permutationMap) { - result.addOperands({srcVector, dstMemRef}); - result.addOperands(dstIndices); - result.addAttribute(getPermutationMapAttrName(), - AffineMapAttr::get(permutationMap)); -} - -auto VectorTransferWriteOp::getIndices() -> operand_range { - auto begin = getOperation()->operand_begin() + Offsets::FirstIndexOffset; - auto end = begin + getMemRefType().getRank(); - return {begin, end}; -} - -AffineMap VectorTransferWriteOp::getPermutationMap() { - return getAttrOfType<AffineMapAttr>(getPermutationMapAttrName()).getValue(); -} - -void VectorTransferWriteOp::print(OpAsmPrinter &p) { - p << getOperationName(); - p << " " << *getVector(); - p << ", " << *getMemRef(); +static void print(OpAsmPrinter &p, VectorTransferWriteOp op) { + p << op.getOperationName() << " " << *op.vector() << ", " << *op.memref(); p << "["; - p.printOperands(getIndices()); + p.printOperands(op.indices()); p << "]"; - p.printOptionalAttrDict(getAttrs()); + p.printOptionalAttrDict(op.getAttrs()); p << " : "; - p.printType(getVectorType()); + p.printType(op.getVectorType()); p << ", "; - p.printType(getMemRefType()); + p.printType(op.getMemRefType()); } -ParseResult VectorTransferWriteOp::parse(OpAsmParser &parser, - OperationState &result) { +ParseResult parseVectorTransferWriteOp(OpAsmParser &parser, + OperationState &result) { + llvm::SMLoc typesLoc; OpAsmParser::OperandType storeValueInfo; - OpAsmParser::OperandType memrefInfo; + OpAsmParser::OperandType memRefInfo; SmallVector<OpAsmParser::OperandType, 4> indexInfo; SmallVector<Type, 2> types; - auto indexType = parser.getBuilder().getIndexType(); if (parser.parseOperand(storeValueInfo) || parser.parseComma() || - parser.parseOperand(memrefInfo) || + parser.parseOperand(memRefInfo) || parser.parseOperandList(indexInfo, OpAsmParser::Delimiter::Square) || parser.parseOptionalAttrDict(result.attributes) || - parser.parseColonTypeList(types)) + parser.getCurrentLocation(&typesLoc) || parser.parseColonTypeList(types)) return failure(); - if (types.size() != 2) - return parser.emitError(parser.getNameLoc(), "expected 2 types"); - VectorType vectorType = types[Offsets::VectorOffset].dyn_cast<VectorType>(); - if (!vectorType) - return parser.emitError(parser.getNameLoc(), "vector type expected"); - MemRefType memrefType = types[Offsets::MemRefOffset].dyn_cast<MemRefType>(); - if (!memrefType) - return parser.emitError(parser.getNameLoc(), "memRef type expected"); - + return parser.emitError(typesLoc, "two types required"); + auto indexType = parser.getBuilder().getIndexType(); + Type vectorType = types[0], memRefType = types[1]; return failure( - parser.resolveOperands(storeValueInfo, vectorType, result.operands) || - parser.resolveOperands(memrefInfo, memrefType, result.operands) || + parser.resolveOperand(storeValueInfo, vectorType, result.operands) || + parser.resolveOperand(memRefInfo, memRefType, result.operands) || parser.resolveOperands(indexInfo, indexType, result.operands)); } -LogicalResult VectorTransferWriteOp::verify() { - // Consistency of memref type in function type. - if (llvm::empty(getOperands())) { - return emitOpError( - "requires at least a memref operand followed by 'rank' indices"); - } - if (!getMemRef()->getType().isa<MemRefType>()) { - return emitOpError("requires a memref first operand"); - } - // Consistency of vector type in function type. - if (!getVector()->getType().isa<VectorType>()) { - return emitOpError("should have a vector input type in function type: " - "(vector_type, memref_type [, elemental_type]) -> ()"); - } +static LogicalResult verify(VectorTransferWriteOp op) { // Consistency of elemental types in memref and vector. - MemRefType memrefType = getMemRefType(); - VectorType vectorType = getVectorType(); + MemRefType memrefType = op.getMemRefType(); + VectorType vectorType = op.getVectorType(); if (memrefType.getElementType() != vectorType.getElementType()) - return emitOpError( + return op.emitOpError( "requires memref and vector types of the same elemental type"); - // Consistency of number of input types. - unsigned expectedNumOperands = - Offsets::FirstIndexOffset + memrefType.getRank(); - // Checks on the actual operands and their types. - if (getNumOperands() != expectedNumOperands) { - return emitOpError() << "expects " << expectedNumOperands - << " operands (of which " << memrefType.getRank() - << " indices)"; - } - // Consistency of indices types. - unsigned numIndices = 0; - for (auto *idx : getIndices()) { - if (!idx->getType().isIndex()) { - return emitOpError( - "index to vector.transfer_write must have 'index' type"); - } - numIndices++; - } - if (numIndices != memrefType.getRank()) { - return emitOpError("requires at least a memref operand followed by ") - << memrefType.getRank() << " indices"; - } + if (llvm::size(op.indices()) != memrefType.getRank()) + return op.emitOpError("requires ") << memrefType.getRank() << " indices"; // Consistency of AffineMap attribute. - if (!getAttrOfType<AffineMapAttr>(getPermutationMapAttrName())) { - return emitOpError("requires an AffineMapAttr named 'permutation_map'"); - } - auto permutationMap = getPermutationMap(); - if (permutationMap.getNumSymbols() != 0) { - return emitOpError("requires a permutation_map without symbols"); - } - if (permutationMap.getNumInputs() != memrefType.getRank()) { - return emitOpError("requires a permutation_map with input dims of the " - "same rank as the memref type"); - } - if (permutationMap.getNumResults() != vectorType.getRank()) { - return emitOpError("requires a permutation_map with result dims of the " - "same rank as the vector type (") - << permutationMap.getNumResults() << " vs " << vectorType.getRank(); - } + auto permutationMap = op.permutation_map(); + if (permutationMap.getNumSymbols() != 0) + return op.emitOpError("requires a symbol-less permutation_map"); + if (permutationMap.getNumInputs() != memrefType.getRank()) + return op.emitOpError("requires a permutation_map with input dims of the " + "same rank as the memref type: ") + << permutationMap.getNumInputs() << " vs " << memrefType; + if (permutationMap.getNumResults() != vectorType.getRank()) + return op.emitOpError("requires a permutation_map with result dims of the " + "same rank as the vector type.") + << permutationMap.getNumResults() << " vs " << vectorType; return verifyPermutationMap(permutationMap, - [this](Twine t) { return emitOpError(t); }); + [&op](Twine t) { return op.emitOpError(t); }); } //===----------------------------------------------------------------------===// // VectorTypeCastOp //===----------------------------------------------------------------------===// -void VectorTypeCastOp::build(Builder *builder, OperationState &result, - Value *srcVector, Type dstType) { - result.addOperands(srcVector); - result.addTypes(dstType); -} -ParseResult VectorTypeCastOp::parse(OpAsmParser &parser, - OperationState &result) { - OpAsmParser::OperandType operand; - Type srcType, dstType; - return failure(parser.parseOperand(operand) || - parser.parseOptionalAttrDict(result.attributes) || - parser.parseColonType(srcType) || parser.parseComma() || - parser.parseType(dstType) || - parser.addTypeToList(dstType, result.types) || - parser.resolveOperand(operand, srcType, result.operands)); +static MemRefType inferVectorTypeCastResultType(MemRefType t) { + return MemRefType::get({}, VectorType::get(t.getShape(), t.getElementType())); } -void VectorTypeCastOp::print(OpAsmPrinter &p) { - p << getOperationName() << ' ' << *getOperand() << " : " - << getOperand()->getType() << ", " << getType(); +void VectorTypeCastOp::build(Builder *builder, OperationState &result, + Value *source) { + result.addOperands(source); + result.addTypes( + inferVectorTypeCastResultType(source->getType().cast<MemRefType>())); } -LogicalResult VectorTypeCastOp::verify() { - auto dstMemrefType = getType().dyn_cast<MemRefType>(); - if (!dstMemrefType) - return emitOpError("expects target type to be a memref type"); - auto dstVectorType = dstMemrefType.getElementType().dyn_cast<VectorType>(); - if (!dstVectorType) - return emitOpError( - "expects vector as an element of the target memref type"); - if (!dstMemrefType.hasStaticShape()) - return emitOpError("does not support dynamic shapes"); - - if (!getOperand()->getType().isa<MemRefType>()) - return emitOpError("expects source type to be a memref type"); +static void print(OpAsmPrinter &p, VectorTypeCastOp &op) { + auto type = op.getOperand()->getType().cast<MemRefType>(); + p << op.getOperationName() << ' ' << *op.memref() << " : " << type << " to " + << inferVectorTypeCastResultType(type); +} +static LogicalResult verify(VectorTypeCastOp &op) { + auto resultType = inferVectorTypeCastResultType(op.getMemRefType()); + if (op.getResultMemRefType() != resultType) + return op.emitOpError("expects result type to be: ") << resultType; return success(); } diff --git a/mlir/lib/Transforms/LowerVectorTransfers.cpp b/mlir/lib/Transforms/LowerVectorTransfers.cpp index c517d74f221..57dd18dac0f 100644 --- a/mlir/lib/Transforms/LowerVectorTransfers.cpp +++ b/mlir/lib/Transforms/LowerVectorTransfers.cpp @@ -113,12 +113,6 @@ struct VectorTransferRewriter : public RewritePattern { {}, 0); } - /// View of tmpMemRefType as one vector, used in vector load/store to tmp - /// buffer. - MemRefType vectorMemRefType(VectorTransferOpTy transfer) const { - return MemRefType::get({1}, transfer.getVectorType(), {}, 0); - } - /// Performs the rewrite. PatternMatchResult matchAndRewrite(Operation *op, PatternRewriter &rewriter) const override; @@ -139,7 +133,7 @@ void coalesceCopy(VectorTransferOpTy transfer, // the loop order for creating pointwise copies between remote and local // memories. int coalescedIdx = -1; - auto exprs = transfer.getPermutationMap().getResults(); + auto exprs = transfer.permutation_map().getResults(); for (auto en : llvm::enumerate(exprs)) { auto dim = en.value().template dyn_cast<AffineDimExpr>(); if (!dim) { @@ -170,7 +164,7 @@ llvm::SmallVector<edsc::ValueHandle, 8> clip(VectorTransferOpTy transfer, using edsc::intrinsics::select; IndexHandle zero(index_t(0)), one(index_t(1)); - llvm::SmallVector<edsc::ValueHandle, 8> memRefAccess(transfer.getIndices()); + llvm::SmallVector<edsc::ValueHandle, 8> memRefAccess(transfer.indices()); llvm::SmallVector<edsc::ValueHandle, 8> clippedScalarAccessExprs( memRefAccess.size(), edsc::IndexHandle()); @@ -180,7 +174,7 @@ llvm::SmallVector<edsc::ValueHandle, 8> clip(VectorTransferOpTy transfer, ++memRefDim) { // Linear search on a small number of entries. int loopIndex = -1; - auto exprs = transfer.getPermutationMap().getResults(); + auto exprs = transfer.permutation_map().getResults(); for (auto en : llvm::enumerate(exprs)) { auto expr = en.value(); auto dim = expr.template dyn_cast<AffineDimExpr>(); @@ -273,9 +267,9 @@ VectorTransferRewriter<VectorTransferReadOp>::matchAndRewrite( // 1. Setup all the captures. ScopedContext scope(rewriter, transfer.getLoc()); - IndexedValue remote(transfer.getMemRef()); - MemRefView view(transfer.getMemRef()); - VectorView vectorView(transfer.getVector()); + IndexedValue remote(transfer.memref()); + MemRefView view(transfer.memref()); + VectorView vectorView(transfer.vector()); SmallVector<IndexHandle, 8> ivs = makeIndexHandles(vectorView.rank()); SmallVector<ValueHandle *, 8> pivs = makeIndexHandlePointers(MutableArrayRef<IndexHandle>(ivs)); @@ -291,12 +285,12 @@ VectorTransferRewriter<VectorTransferReadOp>::matchAndRewrite( // 2. Emit alloc-copy-load-dealloc. ValueHandle tmp = alloc(tmpMemRefType(transfer)); IndexedValue local(tmp); - ValueHandle vec = vector_type_cast(tmp, vectorMemRefType(transfer)); + ValueHandle vec = vector_type_cast(tmp); LoopNestBuilder(pivs, lbs, ubs, steps)([&] { // Computes clippedScalarAccessExprs in the loop nest scope (ivs exist). local(ivs) = remote(clip(transfer, view, ivs)); }); - ValueHandle vectorValue = std_load(vec, {constant_index(0)}); + ValueHandle vectorValue = std_load(vec); (dealloc(tmp)); // vexing parse // 3. Propagate. @@ -336,10 +330,10 @@ VectorTransferRewriter<VectorTransferWriteOp>::matchAndRewrite( // 1. Setup all the captures. ScopedContext scope(rewriter, transfer.getLoc()); - IndexedValue remote(transfer.getMemRef()); - MemRefView view(transfer.getMemRef()); - ValueHandle vectorValue(transfer.getVector()); - VectorView vectorView(transfer.getVector()); + IndexedValue remote(transfer.memref()); + MemRefView view(transfer.memref()); + ValueHandle vectorValue(transfer.vector()); + VectorView vectorView(transfer.vector()); SmallVector<IndexHandle, 8> ivs = makeIndexHandles(vectorView.rank()); SmallVector<ValueHandle *, 8> pivs = makeIndexHandlePointers(ivs); coalesceCopy(transfer, &pivs, &vectorView); @@ -354,8 +348,8 @@ VectorTransferRewriter<VectorTransferWriteOp>::matchAndRewrite( // 2. Emit alloc-store-copy-dealloc. ValueHandle tmp = alloc(tmpMemRefType(transfer)); IndexedValue local(tmp); - ValueHandle vec = vector_type_cast(tmp, vectorMemRefType(transfer)); - std_store(vectorValue, vec, {constant_index(0)}); + ValueHandle vec = vector_type_cast(tmp); + std_store(vectorValue, vec); LoopNestBuilder(pivs, lbs, ubs, steps)([&] { // Computes clippedScalarAccessExprs in the loop nest scope (ivs exist). remote(clip(transfer, view, ivs)) = local(ivs); diff --git a/mlir/lib/Transforms/MaterializeVectors.cpp b/mlir/lib/Transforms/MaterializeVectors.cpp index a0b60dd3648..06016da5caa 100644 --- a/mlir/lib/Transforms/MaterializeVectors.cpp +++ b/mlir/lib/Transforms/MaterializeVectors.cpp @@ -465,7 +465,7 @@ static AffineMap projectedPermutationMap(VectorTransferOpTy transfer, ++dim; }, superVectorType.getShape(), *optionalRatio); - auto permutationMap = transfer.getPermutationMap(); + auto permutationMap = transfer.permutation_map(); LLVM_DEBUG(permutationMap.print(dbgs() << "\npermutationMap: ")); if (keep.empty()) { return permutationMap; @@ -486,16 +486,16 @@ static Operation *instantiate(OpBuilder b, VectorTransferReadOp read, ArrayRef<unsigned> hwVectorInstance, DenseMap<Value *, Value *> *substitutionsMap) { SmallVector<Value *, 8> indices = - map(makePtrDynCaster<Value>(), read.getIndices()); + map(makePtrDynCaster<Value>(), read.indices()); auto affineIndices = reindexAffineIndices(b, hwVectorType, hwVectorInstance, indices); auto map = projectedPermutationMap(read, hwVectorType); if (!map) { return nullptr; } - auto cloned = b.create<VectorTransferReadOp>(read.getLoc(), hwVectorType, - read.getMemRef(), affineIndices, - map, read.getPaddingValue()); + auto cloned = b.create<VectorTransferReadOp>( + read.getLoc(), hwVectorType, read.memref(), affineIndices, + AffineMapAttr::get(map), read.padding()); return cloned.getOperation(); } @@ -510,14 +510,14 @@ static Operation *instantiate(OpBuilder b, VectorTransferWriteOp write, ArrayRef<unsigned> hwVectorInstance, DenseMap<Value *, Value *> *substitutionsMap) { SmallVector<Value *, 8> indices = - map(makePtrDynCaster<Value>(), write.getIndices()); + map(makePtrDynCaster<Value>(), write.indices()); auto affineIndices = reindexAffineIndices(b, hwVectorType, hwVectorInstance, indices); auto cloned = b.create<VectorTransferWriteOp>( write.getLoc(), - substitute(write.getVector(), hwVectorType, substitutionsMap), - write.getMemRef(), affineIndices, - projectedPermutationMap(write, hwVectorType)); + substitute(write.vector(), hwVectorType, substitutionsMap), + write.memref(), affineIndices, + AffineMapAttr::get(projectedPermutationMap(write, hwVectorType))); return cloned.getOperation(); } diff --git a/mlir/lib/Transforms/Vectorize.cpp b/mlir/lib/Transforms/Vectorize.cpp index a1e87568745..b3eea35a55f 100644 --- a/mlir/lib/Transforms/Vectorize.cpp +++ b/mlir/lib/Transforms/Vectorize.cpp @@ -35,6 +35,7 @@ #include "mlir/Pass/Pass.h" #include "mlir/Support/Functional.h" #include "mlir/Support/LLVM.h" +#include "mlir/Transforms/FoldUtils.h" #include "mlir/Transforms/Passes.h" #include "llvm/ADT/DenseMap.h" @@ -718,6 +719,8 @@ struct VectorizationState { // Checks that the type of `op` is AffineStoreOp and adds it to the terminals // set. void registerTerminal(Operation *op); + // Folder used to factor out constant creation. + OperationFolder *folder; private: void registerReplacement(Value *key, Value *value); @@ -832,7 +835,11 @@ static LogicalResult vectorizeRootOrTerminal(Value *iv, LLVM_DEBUG(permutationMap.print(dbgs())); auto transfer = b.create<vector::VectorTransferReadOp>( opInst->getLoc(), vectorType, memoryOp.getMemRef(), - map(makePtrDynCaster<Value>(), indices), permutationMap); + map(makePtrDynCaster<Value>(), indices), + AffineMapAttr::get(permutationMap), + // TODO(b/144455320) add a proper padding value, not just 0.0 : f32 + state->folder->create<ConstantFloatOp>( + b, opInst->getLoc(), llvm::APFloat(0.0f), b.getF32Type())); state->registerReplacement(opInst, transfer.getOperation()); } else { state->registerTerminal(opInst); @@ -1058,7 +1065,8 @@ static Operation *vectorizeOneOperation(Operation *opInst, LLVM_DEBUG(dbgs() << "\n[early-vect]+++++ permutationMap: "); LLVM_DEBUG(permutationMap.print(dbgs())); auto transfer = b.create<vector::VectorTransferWriteOp>( - opInst->getLoc(), vectorValue, memRef, indices, permutationMap); + opInst->getLoc(), vectorValue, memRef, indices, + AffineMapAttr::get(permutationMap)); auto *res = transfer.getOperation(); LLVM_DEBUG(dbgs() << "\n[early-vect]+++++ vectorized store: " << *res); // "Terminals" (i.e. AffineStoreOps) are erased on the spot. @@ -1152,8 +1160,10 @@ static LogicalResult vectorizeNonTerminals(VectorizationState *state) { static LogicalResult vectorizeRootMatch(NestedMatch m, VectorizationStrategy *strategy) { auto loop = cast<AffineForOp>(m.getMatchedOperation()); + OperationFolder folder(loop.getContext()); VectorizationState state; state.strategy = strategy; + state.folder = &folder; // Since patterns are recursive, they can very well intersect. // Since we do not want a fully greedy strategy in general, we decouple diff --git a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir index d4bbc051a8d..ff07f52cf23 100644 --- a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir +++ b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir @@ -48,22 +48,18 @@ func @extract_element_from_vec_3d(%arg0: vector<4x3x16xf32>) -> f32 { // CHECK: llvm.extractelement %{{.*}}, %{{.*}} : !llvm<"<16 x float>"> // CHECK: llvm.return %{{.*}} : !llvm.float -func @vector_type_cast(%arg0: memref<8x8x8xf32>) -> memref<1xvector<8x8x8xf32>> { - %0 = vector.type_cast %arg0: memref<8x8x8xf32>, memref<1xvector<8x8x8xf32>> - return %0 : memref<1xvector<8x8x8xf32>> +func @vector_type_cast(%arg0: memref<8x8x8xf32>) -> memref<vector<8x8x8xf32>> { + %0 = vector.type_cast %arg0: memref<8x8x8xf32> to memref<vector<8x8x8xf32>> + return %0 : memref<vector<8x8x8xf32>> } // CHECK-LABEL: vector_type_cast -// CHECK: llvm.mlir.undef : !llvm<"{ [8 x [8 x <8 x float>]]*, [8 x [8 x <8 x float>]]*, i64, [1 x i64], [1 x i64] }"> +// CHECK: llvm.mlir.undef : !llvm<"{ [8 x [8 x <8 x float>]]*, [8 x [8 x <8 x float>]]*, i64 }"> // CHECK: %[[allocated:.*]] = llvm.extractvalue {{.*}}[0 : index] : !llvm<"{ float*, float*, i64, [3 x i64], [3 x i64] }"> // CHECK: %[[allocatedBit:.*]] = llvm.bitcast %[[allocated]] : !llvm<"float*"> to !llvm<"[8 x [8 x <8 x float>]]*"> -// CHECK: llvm.insertvalue %[[allocatedBit]], {{.*}}[0 : index] : !llvm<"{ [8 x [8 x <8 x float>]]*, [8 x [8 x <8 x float>]]*, i64, [1 x i64], [1 x i64] }"> +// CHECK: llvm.insertvalue %[[allocatedBit]], {{.*}}[0 : index] : !llvm<"{ [8 x [8 x <8 x float>]]*, [8 x [8 x <8 x float>]]*, i64 }"> // CHECK: %[[aligned:.*]] = llvm.extractvalue {{.*}}[1 : index] : !llvm<"{ float*, float*, i64, [3 x i64], [3 x i64] }"> // CHECK: %[[alignedBit:.*]] = llvm.bitcast %[[aligned]] : !llvm<"float*"> to !llvm<"[8 x [8 x <8 x float>]]*"> -// CHECK: llvm.insertvalue %[[alignedBit]], {{.*}}[1 : index] : !llvm<"{ [8 x [8 x <8 x float>]]*, [8 x [8 x <8 x float>]]*, i64, [1 x i64], [1 x i64] }"> +// CHECK: llvm.insertvalue %[[alignedBit]], {{.*}}[1 : index] : !llvm<"{ [8 x [8 x <8 x float>]]*, [8 x [8 x <8 x float>]]*, i64 }"> // CHECK: llvm.mlir.constant(0 : index -// CHECK: llvm.insertvalue {{.*}}[2 : index] : !llvm<"{ [8 x [8 x <8 x float>]]*, [8 x [8 x <8 x float>]]*, i64, [1 x i64], [1 x i64] }"> -// CHECK: llvm.mlir.constant(1 : index -// CHECK: llvm.insertvalue {{.*}}[3, 0] : !llvm<"{ [8 x [8 x <8 x float>]]*, [8 x [8 x <8 x float>]]*, i64, [1 x i64], [1 x i64] }"> -// CHECK: llvm.mlir.constant(1 : index -// CHECK: llvm.insertvalue {{.*}}[4, 0] : !llvm<"{ [8 x [8 x <8 x float>]]*, [8 x [8 x <8 x float>]]*, i64, [1 x i64], [1 x i64] }"> +// CHECK: llvm.insertvalue {{.*}}[2 : index] : !llvm<"{ [8 x [8 x <8 x float>]]*, [8 x [8 x <8 x float>]]*, i64 }"> diff --git a/mlir/test/Dialect/VectorOps/invalid.mlir b/mlir/test/Dialect/VectorOps/invalid.mlir index ca339e7362a..2db4cf53384 100644 --- a/mlir/test/Dialect/VectorOps/invalid.mlir +++ b/mlir/test/Dialect/VectorOps/invalid.mlir @@ -96,3 +96,146 @@ func @outerproduct_operand_3_result_type_generic(%arg0: vector<4xf32>, %arg1: ve // expected-error@+1 {{expected operand #3 of same type as result type}} %1 = "vector.outerproduct" (%arg0, %arg1, %arg2) : (vector<4xf32>, vector<8xf32>, vector<4x16xf32>) -> (vector<4x8xf32>) } + +// ----- + +func @test_vector.transfer_read(%arg0: memref<?x?xf32>) { + %c3 = constant 3 : index + %cst = constant 3.0 : f32 + // expected-error@+1 {{two types required}} + %0 = vector.transfer_read %arg0[%c3, %c3], %cst { permutation_map = ()->(0) } : memref<?x?xf32> +} + +// ----- + +func @test_vector.transfer_read(%arg0: memref<?x?xf32>) { + %c3 = constant 3 : index + %cst = constant 3.0 : f32 + // expected-error@+1 {{requires 2 indices}} + %0 = vector.transfer_read %arg0[%c3, %c3, %c3], %cst { permutation_map = ()->(0) } : memref<?x?xf32>, vector<128xf32> +} + +// ----- + +func @test_vector.transfer_read(%arg0: memref<?x?xf32>) { + %c3 = constant 3 : index + %cst = constant 3.0 : f32 + // expected-error@+1 {{requires attribute 'permutation_map'}} + %0 = vector.transfer_read %arg0[%c3, %c3], %cst {perm = (d0)->(d0)} : memref<?x?xf32>, vector<128xf32> +} + +// ----- + +func @test_vector.transfer_read(%arg0: memref<?x?xf32>) { + %c3 = constant 3 : index + %cst = constant 3.0 : f32 + // expected-error@+1 {{requires a permutation_map with input dims of the same rank as the memref type}} + %0 = vector.transfer_read %arg0[%c3, %c3], %cst {permutation_map = (d0)->(d0)} : memref<?x?xf32>, vector<128xf32> +} + +// ----- + +func @test_vector.transfer_read(%arg0: memref<?x?xf32>) { + %c3 = constant 3 : index + %cst = constant 3.0 : f32 + // expected-error@+1 {{requires a permutation_map with result dims of the same rank as the vector type}} + %0 = vector.transfer_read %arg0[%c3, %c3], %cst {permutation_map = (d0, d1)->(d0, d1)} : memref<?x?xf32>, vector<128xf32> +} + +// ----- + +func @test_vector.transfer_read(%arg0: memref<?x?xf32>) { + %c3 = constant 3 : index + %cst = constant 3.0 : f32 + // expected-error@+1 {{requires a projected permutation_map (at most one dim or the zero constant can appear in each result)}} + %0 = vector.transfer_read %arg0[%c3, %c3], %cst {permutation_map = (d0, d1)->(d0 + d1)} : memref<?x?xf32>, vector<128xf32> +} + +// ----- + +func @test_vector.transfer_read(%arg0: memref<?x?xf32>) { + %c3 = constant 3 : index + %cst = constant 3.0 : f32 + // expected-error@+1 {{requires a projected permutation_map (at most one dim or the zero constant can appear in each result)}} + %0 = vector.transfer_read %arg0[%c3, %c3], %cst {permutation_map = (d0, d1)->(d0 + 1)} : memref<?x?xf32>, vector<128xf32> +} + +// ----- + +func @test_vector.transfer_read(%arg0: memref<?x?x?xf32>) { + %c3 = constant 3 : index + %cst = constant 3.0 : f32 + // expected-error@+1 {{requires a permutation_map that is a permutation (found one dim used more than once)}} + %0 = vector.transfer_read %arg0[%c3, %c3, %c3], %cst {permutation_map = (d0, d1, d2)->(d0, d0)} : memref<?x?x?xf32>, vector<3x7xf32> +} + +// ----- + +func @test_vector.transfer_write(%arg0: memref<?x?xf32>) { + %c3 = constant 3 : index + %cst = constant dense<3.0> : vector<128 x f32> + // expected-error@+1 {{expected 5 operand types but had 4}} + %0 = "vector.transfer_write"(%cst, %arg0, %c3, %c3, %c3) {permutation_map = ()->(0)} : (vector<128xf32>, memref<?x?xf32>, index, index) -> () +} + +// ----- + +func @test_vector.transfer_write(%arg0: memref<?x?xf32>) { + %c3 = constant 3 : index + %cst = constant dense<3.0> : vector<128 x f32> + // expected-error@+1 {{requires 2 indices}} + vector.transfer_write %cst, %arg0[%c3, %c3, %c3] {permutation_map = ()->(0)} : vector<128xf32>, memref<?x?xf32> +} + +// ----- + +func @test_vector.transfer_write(%arg0: memref<?x?xf32>) { + %c3 = constant 3 : index + %cst = constant dense<3.0> : vector<128 x f32> + // expected-error@+1 {{requires attribute 'permutation_map'}} + vector.transfer_write %cst, %arg0[%c3, %c3] {perm = (d0)->(d0)} : vector<128xf32>, memref<?x?xf32> +} + +// ----- + +func @test_vector.transfer_write(%arg0: memref<?x?xf32>) { + %c3 = constant 3 : index + %cst = constant dense<3.0> : vector<128 x f32> + // expected-error@+1 {{requires a permutation_map with input dims of the same rank as the memref type}} + vector.transfer_write %cst, %arg0[%c3, %c3] {permutation_map = (d0)->(d0)} : vector<128xf32>, memref<?x?xf32> +} + +// ----- + +func @test_vector.transfer_write(%arg0: memref<?x?xf32>) { + %c3 = constant 3 : index + %cst = constant dense<3.0> : vector<128 x f32> + // expected-error@+1 {{requires a permutation_map with result dims of the same rank as the vector type}} + vector.transfer_write %cst, %arg0[%c3, %c3] {permutation_map = (d0, d1)->(d0, d1)} : vector<128xf32>, memref<?x?xf32> +} + +// ----- + +func @test_vector.transfer_write(%arg0: memref<?x?xf32>) { + %c3 = constant 3 : index + %cst = constant dense<3.0> : vector<128 x f32> + // expected-error@+1 {{requires a projected permutation_map (at most one dim or the zero constant can appear in each result)}} + vector.transfer_write %cst, %arg0[%c3, %c3] {permutation_map = (d0, d1)->(d0 + d1)} : vector<128xf32>, memref<?x?xf32> +} + +// ----- + +func @test_vector.transfer_write(%arg0: memref<?x?xf32>) { + %c3 = constant 3 : index + %cst = constant dense<3.0> : vector<128 x f32> + // expected-error@+1 {{requires a projected permutation_map (at most one dim or the zero constant can appear in each result)}} + vector.transfer_write %cst, %arg0[%c3, %c3] {permutation_map = (d0, d1)->(d0 + 1)} : vector<128xf32>, memref<?x?xf32> +} +// ----- + +func @test_vector.transfer_write(%arg0: memref<?x?x?xf32>) { + %c3 = constant 3 : index + %cst = constant dense<3.0> : vector<3 x 7 x f32> + // expected-error@+1 {{requires a permutation_map that is a permutation (found one dim used more than once)}} + vector.transfer_write %cst, %arg0[%c3, %c3, %c3] {permutation_map = (d0, d1, d2)->(d0, d0)} : vector<3x7xf32>, memref<?x?x?xf32> +} diff --git a/mlir/test/Dialect/VectorOps/ops.mlir b/mlir/test/Dialect/VectorOps/ops.mlir index 067345af0d9..77d40f5e84d 100644 --- a/mlir/test/Dialect/VectorOps/ops.mlir +++ b/mlir/test/Dialect/VectorOps/ops.mlir @@ -1,5 +1,27 @@ // RUN: mlir-opt %s | mlir-opt | FileCheck %s +// CHECK-LABEL: func @vector_transfer_ops( +func @vector_transfer_ops(%arg0: memref<?x?xf32>) { + %c3 = constant 3 : index + %cst = constant 3.0 : f32 + %f0 = constant 0.0 : f32 + // + // CHECK: %0 = vector.transfer_read + %0 = vector.transfer_read %arg0[%c3, %c3], %f0 {permutation_map = (d0, d1)->(d0)} : memref<?x?xf32>, vector<128xf32> + // CHECK: %1 = vector.transfer_read + %1 = vector.transfer_read %arg0[%c3, %c3], %f0 {permutation_map = (d0, d1)->(d1, d0)} : memref<?x?xf32>, vector<3x7xf32> + // CHECK: vector.transfer_read + %2 = vector.transfer_read %arg0[%c3, %c3], %cst {permutation_map = (d0, d1)->(d0)} : memref<?x?xf32>, vector<128xf32> + // CHECK: vector.transfer_read + %3 = vector.transfer_read %arg0[%c3, %c3], %cst {permutation_map = (d0, d1)->(d1)} : memref<?x?xf32>, vector<128xf32> + // + // CHECK: vector.transfer_write + vector.transfer_write %0, %arg0[%c3, %c3] {permutation_map = (d0, d1)->(d0)} : vector<128xf32>, memref<?x?xf32> + // CHECK: vector.transfer_write + vector.transfer_write %1, %arg0[%c3, %c3] {permutation_map = (d0, d1)->(d1, d0)} : vector<3x7xf32>, memref<?x?xf32> + return +} + // CHECK-LABEL: extractelement func @extractelement(%arg0: vector<4x8x16xf32>) -> (vector<8x16xf32>, vector<16xf32>, f32) { // CHECK: vector.extractelement {{.*}}[3 : i32] : vector<4x8x16xf32> diff --git a/mlir/test/IR/core-ops.mlir b/mlir/test/IR/core-ops.mlir index 96df40202a3..417c8723aa4 100644 --- a/mlir/test/IR/core-ops.mlir +++ b/mlir/test/IR/core-ops.mlir @@ -7,9 +7,6 @@ // CHECK: #map0 = (d0) -> (d0 + 1) // CHECK: #map1 = ()[s0] -> (s0 + 1) -// CHECK-DAG: #[[map_proj_d0d1_d0:map[0-9]+]] = (d0, d1) -> (d0) -// CHECK-DAG: #[[map_proj_d0d1_d1:map[0-9]+]] = (d0, d1) -> (d1) -// CHECK-DAG: #[[map_proj_d0d1_d1d0:map[0-9]+]] = (d0, d1) -> (d1, d0) // CHECK-DAG: #[[VIEW_MAP1:map[0-9]+]] = (d0, d1) -> (d0 * 4 + d1) // CHECK-DAG: #[[VIEW_MAP2:map[0-9]+]] = (d0, d1)[s0, s1] -> (d0 * s1 + d1 + s0) @@ -564,26 +561,6 @@ func @test_splat_op(%s : f32) { return } -// CHECK-LABEL: func @test_vector.transfer_ops(%arg0 -func @test_vector.transfer_ops(%arg0: memref<?x?xf32>) { - %c3 = constant 3 : index - %cst = constant 3.0 : f32 - // CHECK: %0 = vector.transfer_read %arg0[%c3, %c3] {permutation_map = #[[map_proj_d0d1_d0]]} : memref<?x?xf32>, vector<128xf32> - %0 = vector.transfer_read %arg0[%c3, %c3] {permutation_map = (d0, d1)->(d0)} : memref<?x?xf32>, vector<128xf32> - // CHECK: %1 = vector.transfer_read %arg0[%c3, %c3] {permutation_map = #[[map_proj_d0d1_d1d0]]} : memref<?x?xf32>, vector<3x7xf32> - %1 = vector.transfer_read %arg0[%c3, %c3] {permutation_map = (d0, d1)->(d1, d0)} : memref<?x?xf32>, vector<3x7xf32> - // CHECK: %2 = vector.transfer_read %arg0[%c3, %c3], (%cst) {permutation_map = #[[map_proj_d0d1_d0]]} : memref<?x?xf32>, vector<128xf32> - %2 = vector.transfer_read %arg0[%c3, %c3], (%cst) {permutation_map = (d0, d1)->(d0)} : memref<?x?xf32>, vector<128xf32> - // CHECK: %3 = vector.transfer_read %arg0[%c3, %c3], (%cst) {permutation_map = #[[map_proj_d0d1_d1]]} : memref<?x?xf32>, vector<128xf32> - %3 = vector.transfer_read %arg0[%c3, %c3], (%cst) {permutation_map = (d0, d1)->(d1)} : memref<?x?xf32>, vector<128xf32> - // - // CHECK: vector.transfer_write %0, %arg0[%c3, %c3] {permutation_map = #[[map_proj_d0d1_d0]]} : vector<128xf32>, memref<?x?xf32> - vector.transfer_write %0, %arg0[%c3, %c3] {permutation_map = (d0, d1)->(d0)} : vector<128xf32>, memref<?x?xf32> - // CHECK: vector.transfer_write %1, %arg0[%c3, %c3] {permutation_map = #[[map_proj_d0d1_d1d0]]} : vector<3x7xf32>, memref<?x?xf32> - vector.transfer_write %1, %arg0[%c3, %c3] {permutation_map = (d0, d1)->(d1, d0)} : vector<3x7xf32>, memref<?x?xf32> - return -} - // CHECK-LABEL: func @tensor_load_store func @tensor_load_store(%0 : memref<4x4xi32>) { // CHECK: %[[TENSOR:.*]] = tensor_load %[[MEMREF:.*]] : memref<4x4xi32> diff --git a/mlir/test/IR/invalid-ops.mlir b/mlir/test/IR/invalid-ops.mlir index 9c1807807c3..74dd4129422 100644 --- a/mlir/test/IR/invalid-ops.mlir +++ b/mlir/test/IR/invalid-ops.mlir @@ -297,185 +297,6 @@ func @func_with_ops(i1, tensor<42xi32>, tensor<?xi32>) { // ----- -func @test_vector.transfer_read(memref<?x?xf32>) { -^bb0(%arg0: memref<?x?xf32>): - %c3 = constant 3 : index - %cst = constant 3.0 : f32 - // expected-error@+1 {{expected 2 types}} - %0 = vector.transfer_read %arg0[%c3, %c3] : memref<?x?xf32> -} - -// ----- - -func @test_vector.transfer_read(memref<?x?xf32>) { -^bb0(%arg0: memref<?x?xf32>): - %c3 = constant 3 : index - %cst = constant 3.0 : f32 - // expected-error@+1 {{expected 2 indices to the memref}} - %0 = vector.transfer_read %arg0[%c3, %c3, %c3] : memref<?x?xf32>, vector<128xf32> -} - -// ----- - -func @test_vector.transfer_read(memref<?x?xf32>) { -^bb0(%arg0: memref<?x?xf32>): - %c3 = constant 3 : index - %cst = constant 3.0 : f32 - // expected-error@+1 {{requires an AffineMapAttr named 'permutation_map'}} - %0 = vector.transfer_read %arg0[%c3, %c3] : memref<?x?xf32>, vector<128xf32> -} - -// ----- - -func @test_vector.transfer_read(memref<?x?xf32>) { -^bb0(%arg0: memref<?x?xf32>): - %c3 = constant 3 : index - %cst = constant 3.0 : f32 - // expected-error@+1 {{requires an AffineMapAttr named 'permutation_map'}} - %0 = vector.transfer_read %arg0[%c3, %c3] {perm = (d0)->(d0)} : memref<?x?xf32>, vector<128xf32> -} - -// ----- - -func @test_vector.transfer_read(memref<?x?xf32>) { -^bb0(%arg0: memref<?x?xf32>): - %c3 = constant 3 : index - %cst = constant 3.0 : f32 - // expected-error@+1 {{requires a permutation_map with input dims of the same rank as the memref type}} - %0 = vector.transfer_read %arg0[%c3, %c3] {permutation_map = (d0)->(d0)} : memref<?x?xf32>, vector<128xf32> -} - -// ----- - -func @test_vector.transfer_read(memref<?x?xf32>) { -^bb0(%arg0: memref<?x?xf32>): - %c3 = constant 3 : index - %cst = constant 3.0 : f32 - // expected-error@+1 {{requires a permutation_map with result dims of the same rank as the vector type}} - %0 = vector.transfer_read %arg0[%c3, %c3] {permutation_map = (d0, d1)->(d0, d1)} : memref<?x?xf32>, vector<128xf32> -} - -// ----- - -func @test_vector.transfer_read(memref<?x?xf32>) { -^bb0(%arg0: memref<?x?xf32>): - %c3 = constant 3 : index - %cst = constant 3.0 : f32 - // expected-error@+1 {{requires a projected permutation_map (at most one dim or the zero constant can appear in each result)}} - %0 = vector.transfer_read %arg0[%c3, %c3] {permutation_map = (d0, d1)->(d0 + d1)} : memref<?x?xf32>, vector<128xf32> -} - -// ----- - -func @test_vector.transfer_read(memref<?x?xf32>) { -^bb0(%arg0: memref<?x?xf32>): - %c3 = constant 3 : index - %cst = constant 3.0 : f32 - // expected-error@+1 {{requires a projected permutation_map (at most one dim or the zero constant can appear in each result)}} - %0 = vector.transfer_read %arg0[%c3, %c3] {permutation_map = (d0, d1)->(d0 + 1)} : memref<?x?xf32>, vector<128xf32> -} - -// ----- - -func @test_vector.transfer_read(memref<?x?x?xf32>) { -^bb0(%arg0: memref<?x?x?xf32>): - %c3 = constant 3 : index - %cst = constant 3.0 : f32 - // expected-error@+1 {{requires a permutation_map that is a permutation (found one dim used more than once)}} - %0 = vector.transfer_read %arg0[%c3, %c3, %c3] {permutation_map = (d0, d1, d2)->(d0, d0)} : memref<?x?x?xf32>, vector<3x7xf32> -} - -// ----- - -func @test_vector.transfer_write(memref<?x?xf32>) { -^bb0(%arg0: memref<?x?xf32>): - %c3 = constant 3 : index - %cst = constant dense<3.0> : vector<128 x f32> - // expected-error@+1 {{expected 5 operand types but had 4}} - %0 = "vector.transfer_write"(%cst, %arg0, %c3, %c3, %c3) : (vector<128xf32>, memref<?x?xf32>, index, index) -> () -} - -// ----- - -func @test_vector.transfer_write(memref<?x?xf32>) { -^bb0(%arg0: memref<?x?xf32>): - %c3 = constant 3 : index - %cst = constant dense<3.0> : vector<128 x f32> - // expected-error@+1 {{expects 4 operands (of which 2 indices)}} - vector.transfer_write %cst, %arg0[%c3, %c3, %c3] : vector<128xf32>, memref<?x?xf32> -} - -// ----- - -func @test_vector.transfer_write(memref<?x?xf32>) { -^bb0(%arg0: memref<?x?xf32>): - %c3 = constant 3 : index - %cst = constant dense<3.0> : vector<128 x f32> - // expected-error@+1 {{requires an AffineMapAttr named 'permutation_map'}} - vector.transfer_write %cst, %arg0[%c3, %c3] : vector<128xf32>, memref<?x?xf32> -} - -// ----- - -func @test_vector.transfer_write(memref<?x?xf32>) { -^bb0(%arg0: memref<?x?xf32>): - %c3 = constant 3 : index - %cst = constant dense<3.0> : vector<128 x f32> - // expected-error@+1 {{requires an AffineMapAttr named 'permutation_map'}} - vector.transfer_write %cst, %arg0[%c3, %c3] {perm = (d0)->(d0)} : vector<128xf32>, memref<?x?xf32> -} - -// ----- - -func @test_vector.transfer_write(memref<?x?xf32>) { -^bb0(%arg0: memref<?x?xf32>): - %c3 = constant 3 : index - %cst = constant dense<3.0> : vector<128 x f32> - // expected-error@+1 {{requires a permutation_map with input dims of the same rank as the memref type}} - vector.transfer_write %cst, %arg0[%c3, %c3] {permutation_map = (d0)->(d0)} : vector<128xf32>, memref<?x?xf32> -} - -// ----- - -func @test_vector.transfer_write(memref<?x?xf32>) { -^bb0(%arg0: memref<?x?xf32>): - %c3 = constant 3 : index - %cst = constant dense<3.0> : vector<128 x f32> - // expected-error@+1 {{requires a permutation_map with result dims of the same rank as the vector type}} - vector.transfer_write %cst, %arg0[%c3, %c3] {permutation_map = (d0, d1)->(d0, d1)} : vector<128xf32>, memref<?x?xf32> -} - -// ----- - -func @test_vector.transfer_write(memref<?x?xf32>) { -^bb0(%arg0: memref<?x?xf32>): - %c3 = constant 3 : index - %cst = constant dense<3.0> : vector<128 x f32> - // expected-error@+1 {{requires a projected permutation_map (at most one dim or the zero constant can appear in each result)}} - vector.transfer_write %cst, %arg0[%c3, %c3] {permutation_map = (d0, d1)->(d0 + d1)} : vector<128xf32>, memref<?x?xf32> -} - -// ----- - -func @test_vector.transfer_write(memref<?x?xf32>) { -^bb0(%arg0: memref<?x?xf32>): - %c3 = constant 3 : index - %cst = constant dense<3.0> : vector<128 x f32> - // expected-error@+1 {{requires a projected permutation_map (at most one dim or the zero constant can appear in each result)}} - vector.transfer_write %cst, %arg0[%c3, %c3] {permutation_map = (d0, d1)->(d0 + 1)} : vector<128xf32>, memref<?x?xf32> -} -// ----- - -func @test_vector.transfer_write(memref<?x?x?xf32>) { -^bb0(%arg0: memref<?x?x?xf32>): - %c3 = constant 3 : index - %cst = constant dense<3.0> : vector<3 x 7 x f32> - // expected-error@+1 {{requires a permutation_map that is a permutation (found one dim used more than once)}} - vector.transfer_write %cst, %arg0[%c3, %c3, %c3] {permutation_map = (d0, d1, d2)->(d0, d0)} : vector<3x7xf32>, memref<?x?x?xf32> -} - -// ----- - func @invalid_select_shape(%cond : i1, %idx : () -> ()) { // expected-error@+1 {{expected type with valid i1 shape}} %sel = select %cond, %idx, %idx : () -> () diff --git a/mlir/test/Transforms/Vectorize/lower_vector_transfers.mlir b/mlir/test/Transforms/Vectorize/lower_vector_transfers.mlir index 31f8bf60b07..f9ca0d0da86 100644 --- a/mlir/test/Transforms/Vectorize/lower_vector_transfers.mlir +++ b/mlir/test/Transforms/Vectorize/lower_vector_transfers.mlir @@ -5,16 +5,17 @@ // CHECK-LABEL: func @materialize_read_1d() { func @materialize_read_1d() { + %f0 = constant 0.0: f32 %A = alloc () : memref<7x42xf32> affine.for %i0 = 0 to 7 step 4 { affine.for %i1 = 0 to 42 step 4 { - %f1 = vector.transfer_read %A[%i0, %i1] {permutation_map = (d0, d1) -> (d0)} : memref<7x42xf32>, vector<4xf32> + %f1 = vector.transfer_read %A[%i0, %i1], %f0 {permutation_map = (d0, d1) -> (d0)} : memref<7x42xf32>, vector<4xf32> %ip1 = affine.apply (d0) -> (d0 + 1) (%i1) - %f2 = vector.transfer_read %A[%i0, %ip1] {permutation_map = (d0, d1) -> (d0)} : memref<7x42xf32>, vector<4xf32> + %f2 = vector.transfer_read %A[%i0, %ip1], %f0 {permutation_map = (d0, d1) -> (d0)} : memref<7x42xf32>, vector<4xf32> %ip2 = affine.apply (d0) -> (d0 + 2) (%i1) - %f3 = vector.transfer_read %A[%i0, %ip2] {permutation_map = (d0, d1) -> (d0)} : memref<7x42xf32>, vector<4xf32> + %f3 = vector.transfer_read %A[%i0, %ip2], %f0 {permutation_map = (d0, d1) -> (d0)} : memref<7x42xf32>, vector<4xf32> %ip3 = affine.apply (d0) -> (d0 + 3) (%i1) - %f4 = vector.transfer_read %A[%i0, %ip3] {permutation_map = (d0, d1) -> (d0)} : memref<7x42xf32>, vector<4xf32> + %f4 = vector.transfer_read %A[%i0, %ip3], %f0 {permutation_map = (d0, d1) -> (d0)} : memref<7x42xf32>, vector<4xf32> // Both accesses in the load must be clipped otherwise %i1 + 2 and %i1 + 3 will go out of bounds. // CHECK: {{.*}} = select // CHECK: %[[FILTERED1:.*]] = select @@ -28,15 +29,16 @@ func @materialize_read_1d() { // CHECK-LABEL: func @materialize_read_1d_partially_specialized func @materialize_read_1d_partially_specialized(%dyn1 : index, %dyn2 : index, %dyn4 : index) { + %f0 = constant 0.0: f32 %A = alloc (%dyn1, %dyn2, %dyn4) : memref<7x?x?x42x?xf32> affine.for %i0 = 0 to 7 { affine.for %i1 = 0 to %dyn1 { affine.for %i2 = 0 to %dyn2 { affine.for %i3 = 0 to 42 step 2 { affine.for %i4 = 0 to %dyn4 { - %f1 = vector.transfer_read %A[%i0, %i1, %i2, %i3, %i4] {permutation_map = (d0, d1, d2, d3, d4) -> (d3)} : memref<7x?x?x42x?xf32>, vector<4xf32> + %f1 = vector.transfer_read %A[%i0, %i1, %i2, %i3, %i4], %f0 {permutation_map = (d0, d1, d2, d3, d4) -> (d3)} : memref<7x?x?x42x?xf32>, vector<4xf32> %i3p1 = affine.apply (d0) -> (d0 + 1) (%i3) - %f2 = vector.transfer_read %A[%i0, %i1, %i2, %i3p1, %i4] {permutation_map = (d0, d1, d2, d3, d4) -> (d3)} : memref<7x?x?x42x?xf32>, vector<4xf32> + %f2 = vector.transfer_read %A[%i0, %i1, %i2, %i3p1, %i4], %f0 {permutation_map = (d0, d1, d2, d3, d4) -> (d3)} : memref<7x?x?x42x?xf32>, vector<4xf32> } } } @@ -53,6 +55,7 @@ func @materialize_read_1d_partially_specialized(%dyn1 : index, %dyn2 : index, %d // CHECK-LABEL: func @materialize_read(%{{.*}}: index, %{{.*}}: index, %{{.*}}: index, %{{.*}}: index) { func @materialize_read(%M: index, %N: index, %O: index, %P: index) { + %f0 = constant 0.0: f32 // CHECK-DAG: %[[C0:.*]] = constant 0 : index // CHECK-DAG: %[[C1:.*]] = constant 1 : index // CHECK-DAG: %[[C3:.*]] = constant 3 : index @@ -68,7 +71,7 @@ func @materialize_read(%M: index, %N: index, %O: index, %P: index) { // CHECK-NEXT: %[[D2:.*]] = dim %{{.*}}, 2 : memref<?x?x?x?xf32> // CHECK-NEXT: %[[D3:.*]] = dim %{{.*}}, 3 : memref<?x?x?x?xf32> // CHECK: %[[ALLOC:.*]] = alloc() : memref<5x4x3xf32> - // CHECK-NEXT: %[[VECTOR_VIEW:.*]] = vector.type_cast %[[ALLOC]] : memref<5x4x3xf32>, memref<1xvector<5x4x3xf32>> + // CHECK-NEXT: %[[VECTOR_VIEW:.*]] = vector.type_cast %[[ALLOC]] : memref<5x4x3xf32> // CHECK-NEXT: loop.for %[[I4:.*]] = %[[C0]] to %[[C3]] step %[[C1]] { // CHECK-NEXT: loop.for %[[I5:.*]] = %[[C0]] to %[[C4]] step %[[C1]] { // CHECK-NEXT: loop.for %[[I6:.*]] = %[[C0]] to %[[C5]] step %[[C1]] { @@ -103,7 +106,7 @@ func @materialize_read(%M: index, %N: index, %O: index, %P: index) { // CHECK-NEXT: } // CHECK-NEXT: } // CHECK-NEXT: } - // CHECK: {{.*}} = load %[[VECTOR_VIEW]][{{.*}}] : memref<1xvector<5x4x3xf32>> + // CHECK: {{.*}} = load %[[VECTOR_VIEW]][] : memref<vector<5x4x3xf32>> // CHECK-NEXT: dealloc %[[ALLOC]] : memref<5x4x3xf32> // CHECK-NEXT: } // CHECK-NEXT: } @@ -120,7 +123,7 @@ func @materialize_read(%M: index, %N: index, %O: index, %P: index) { affine.for %i1 = 0 to %N { affine.for %i2 = 0 to %O { affine.for %i3 = 0 to %P step 5 { - %f = vector.transfer_read %A[%i0, %i1, %i2, %i3] {permutation_map = (d0, d1, d2, d3) -> (d3, 0, d0)} : memref<?x?x?x?xf32>, vector<5x4x3xf32> + %f = vector.transfer_read %A[%i0, %i1, %i2, %i3], %f0 {permutation_map = (d0, d1, d2, d3) -> (d3, 0, d0)} : memref<?x?x?x?xf32>, vector<5x4x3xf32> } } } @@ -146,8 +149,8 @@ func @materialize_write(%M: index, %N: index, %O: index, %P: index) { // CHECK-NEXT: %[[D2:.*]] = dim %{{.*}}, 2 : memref<?x?x?x?xf32> // CHECK-NEXT: %[[D3:.*]] = dim %{{.*}}, 3 : memref<?x?x?x?xf32> // CHECK: %[[ALLOC:.*]] = alloc() : memref<5x4x3xf32> - // CHECK-NEXT: %[[VECTOR_VIEW:.*]] = vector.type_cast {{.*}} : memref<5x4x3xf32>, memref<1xvector<5x4x3xf32>> - // CHECK: store %{{.*}}, {{.*}} : memref<1xvector<5x4x3xf32>> + // CHECK-NEXT: %[[VECTOR_VIEW:.*]] = vector.type_cast {{.*}} : memref<5x4x3xf32> + // CHECK: store %{{.*}}, {{.*}} : memref<vector<5x4x3xf32>> // CHECK-NEXT: loop.for %[[I4:.*]] = %[[C0]] to %[[C3]] step %[[C1]] { // CHECK-NEXT: loop.for %[[I5:.*]] = %[[C0]] to %[[C4]] step %[[C1]] { // CHECK-NEXT: loop.for %[[I6:.*]] = %[[C0]] to %[[C5]] step %[[C1]] { diff --git a/mlir/test/Transforms/Vectorize/vectorize_1d.mlir b/mlir/test/Transforms/Vectorize/vectorize_1d.mlir index afab2303c08..83f783c3aef 100644 --- a/mlir/test/Transforms/Vectorize/vectorize_1d.mlir +++ b/mlir/test/Transforms/Vectorize/vectorize_1d.mlir @@ -13,6 +13,7 @@ // Maps introduced to vectorize fastest varying memory index. // CHECK-LABEL: func @vec1d_1 func @vec1d_1(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) { +// CHECK-DAG: %{{.*}} = constant 0.0{{.*}}: f32 // CHECK-DAG: %[[C0:[a-z0-9_]+]] = constant 0 : index // CHECK-DAG: [[ARG_M:%[0-9]+]] = dim %{{.*}}, 0 : memref<?x?xf32> // CHECK-DAG: [[ARG_N:%[0-9]+]] = dim %{{.*}}, 1 : memref<?x?xf32> @@ -25,7 +26,7 @@ func @vec1d_1(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) { // CHECK: for {{.*}} step 128 // CHECK-NEXT: %{{.*}} = affine.apply #map0(%[[C0]]) // CHECK-NEXT: %{{.*}} = affine.apply #map0(%[[C0]]) -// CHECK-NEXT: {{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}] {permutation_map = #[[map_proj_d0d1_0]]} : memref<?x?xf32>, vector<128xf32> +// CHECK-NEXT: {{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}} {permutation_map = #[[map_proj_d0d1_0]]} : memref<?x?xf32>, vector<128xf32> affine.for %i0 = 0 to %M { // vectorized due to scalar -> vector %a0 = affine.load %A[%cst0, %cst0] : memref<?x?xf32> } @@ -34,6 +35,7 @@ func @vec1d_1(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) { // CHECK-LABEL: func @vec1d_2 func @vec1d_2(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) { +// CHECK-DAG: %{{.*}} = constant 0.0{{.*}}: f32 // CHECK-DAG: %[[C0:[a-z0-9_]+]] = constant 0 : index // CHECK-DAG: [[ARG_M:%[0-9]+]] = dim %{{.*}}, 0 : memref<?x?xf32> // CHECK-DAG: [[ARG_N:%[0-9]+]] = dim %{{.*}}, 1 : memref<?x?xf32> @@ -44,7 +46,7 @@ func @vec1d_2(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) { %cst0 = constant 0 : index // // CHECK:for [[IV3:%[a-zA-Z0-9]+]] = 0 to [[ARG_M]] step 128 -// CHECK-NEXT: {{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}] {permutation_map = #[[map_proj_d0d1_d1]]} : memref<?x?xf32>, vector<128xf32> +// CHECK-NEXT: {{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}} {permutation_map = #[[map_proj_d0d1_d1]]} : memref<?x?xf32>, vector<128xf32> affine.for %i3 = 0 to %M { // vectorized %a3 = affine.load %A[%cst0, %i3] : memref<?x?xf32> } @@ -53,6 +55,7 @@ func @vec1d_2(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) { // CHECK-LABEL: func @vec1d_3 func @vec1d_3(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) { +// CHECK-DAG: %{{.*}} = constant 0.0{{.*}}: f32 // CHECK-DAG: %[[C0:[a-z0-9_]+]] = constant 0 : index // CHECK-DAG: [[ARG_M:%[0-9]+]] = dim %arg0, 0 : memref<?x?xf32> // CHECK-DAG: [[ARG_N:%[0-9]+]] = dim %arg0, 1 : memref<?x?xf32> @@ -66,7 +69,7 @@ func @vec1d_3(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) { // CHECK-NEXT: for [[IV9:%[arg0-9]*]] = 0 to [[ARG_N]] { // CHECK-NEXT: %[[APP9_0:[0-9]+]] = affine.apply {{.*}}([[IV9]], [[IV8]]) // CHECK-NEXT: %[[APP9_1:[0-9]+]] = affine.apply {{.*}}([[IV9]], [[IV8]]) -// CHECK-NEXT: {{.*}} = vector.transfer_read %{{.*}}[%[[APP9_0]], %[[APP9_1]]] {permutation_map = #[[map_proj_d0d1_d1]]} : memref<?x?xf32>, vector<128xf32> +// CHECK-NEXT: {{.*}} = vector.transfer_read %{{.*}}[%[[APP9_0]], %[[APP9_1]]], %{{.*}} {permutation_map = #[[map_proj_d0d1_d1]]} : memref<?x?xf32>, vector<128xf32> affine.for %i8 = 0 to %M { // vectorized affine.for %i9 = 0 to %N { %a9 = affine.load %A[%i9, %i8 + %i9] : memref<?x?xf32> @@ -100,8 +103,8 @@ func @vector_add_2d(%M : index, %N : index) -> f32 { } affine.for %i4 = 0 to %M { affine.for %i5 = 0 to %N { - // CHECK: [[A5:%.*]] = vector.transfer_read %{{.*}}[{{.*}}] {permutation_map = #[[map_proj_d0d1_d1]]} : memref<?x?xf32>, vector<128xf32> - // CHECK: [[B5:%.*]] = vector.transfer_read %{{.*}}[{{.*}}] {permutation_map = #[[map_proj_d0d1_d1]]} : memref<?x?xf32>, vector<128xf32> + // CHECK: [[A5:%.*]] = vector.transfer_read %{{.*}}[{{.*}}], %{{.*}} {permutation_map = #[[map_proj_d0d1_d1]]} : memref<?x?xf32>, vector<128xf32> + // CHECK: [[B5:%.*]] = vector.transfer_read %{{.*}}[{{.*}}], %{{.*}} {permutation_map = #[[map_proj_d0d1_d1]]} : memref<?x?xf32>, vector<128xf32> // CHECK: [[S5:%.*]] = addf [[A5]], [[B5]] : vector<128xf32> // CHECK: [[SPLAT1:%.*]] = constant dense<1.000000e+00> : vector<128xf32> // CHECK: [[S6:%.*]] = addf [[S5]], [[SPLAT1]] : vector<128xf32> @@ -165,6 +168,7 @@ func @vec_rejected_2(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) { // CHECK-LABEL: func @vec_rejected_3 func @vec_rejected_3(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) { +// CHECK-DAG: %{{.*}} = constant 0.0{{.*}}: f32 // CHECK-DAG: [[C0:%[a-z0-9_]+]] = constant 0 : index // CHECK-DAG: [[ARG_M:%[0-9]+]] = dim %{{.*}}, 0 : memref<?x?xf32> // CHECK-DAG: [[ARG_N:%[0-9]+]] = dim %{{.*}}, 1 : memref<?x?xf32> @@ -176,7 +180,7 @@ func @vec_rejected_3(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) { // // CHECK:for [[IV4:%[arg0-9]+]] = 0 to [[ARG_M]] step 128 { // CHECK-NEXT: for [[IV5:%[arg0-9]*]] = 0 to [[ARG_N]] { -// CHECK-NEXT: {{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}] {permutation_map = #[[map_proj_d0d1_d1]]} : memref<?x?xf32>, vector<128xf32> +// CHECK-NEXT: {{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}} {permutation_map = #[[map_proj_d0d1_d1]]} : memref<?x?xf32>, vector<128xf32> affine.for %i4 = 0 to %M { // vectorized affine.for %i5 = 0 to %N { // not vectorized, would vectorize with --test-fastest-varying=1 %a5 = affine.load %A[%i5, %i4] : memref<?x?xf32> @@ -273,6 +277,7 @@ func @vec_rejected_7(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) { // CHECK-LABEL: func @vec_rejected_8 func @vec_rejected_8(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) { +// CHECK-DAG: %{{.*}} = constant 0.0{{.*}}: f32 // CHECK-DAG: %[[C0:[a-z0-9_]+]] = constant 0 : index // CHECK-DAG: [[ARG_M:%[0-9]+]] = dim %{{.*}}, 0 : memref<?x?xf32> // CHECK-DAG: [[ARG_N:%[0-9]+]] = dim %{{.*}}, 1 : memref<?x?xf32> @@ -286,7 +291,7 @@ func @vec_rejected_8(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) { // CHECK: for [[IV18:%[a-zA-Z0-9]+]] = 0 to [[ARG_M]] step 128 // CHECK: %{{.*}} = affine.apply #map0(%{{.*}}) // CHECK: %{{.*}} = affine.apply #map0(%{{.*}}) -// CHECK: {{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}] {permutation_map = #[[map_proj_d0d1_0]]} : memref<?x?xf32>, vector<128xf32> +// CHECK: {{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}} {permutation_map = #[[map_proj_d0d1_0]]} : memref<?x?xf32>, vector<128xf32> affine.for %i17 = 0 to %M { // not vectorized, the 1-D pattern that matched %{{.*}} in DFS post-order prevents vectorizing %{{.*}} affine.for %i18 = 0 to %M { // vectorized due to scalar -> vector %a18 = affine.load %A[%cst0, %cst0] : memref<?x?xf32> @@ -297,6 +302,7 @@ func @vec_rejected_8(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) { // CHECK-LABEL: func @vec_rejected_9 func @vec_rejected_9(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) { +// CHECK-DAG: %{{.*}} = constant 0.0{{.*}}: f32 // CHECK-DAG: %[[C0:[a-z0-9_]+]] = constant 0 : index // CHECK-DAG: [[ARG_M:%[0-9]+]] = dim %{{.*}}, 0 : memref<?x?xf32> // CHECK-DAG: [[ARG_N:%[0-9]+]] = dim %{{.*}}, 1 : memref<?x?xf32> @@ -310,7 +316,7 @@ func @vec_rejected_9(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) { // CHECK: for [[IV18:%[a-zA-Z0-9]+]] = 0 to [[ARG_M]] step 128 // CHECK: %{{.*}} = affine.apply #map0(%{{.*}}) // CHECK-NEXT: %{{.*}} = affine.apply #map0(%{{.*}}) -// CHECK-NEXT: {{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}] {permutation_map = #[[map_proj_d0d1_0]]} : memref<?x?xf32>, vector<128xf32> +// CHECK-NEXT: {{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}} {permutation_map = #[[map_proj_d0d1_0]]} : memref<?x?xf32>, vector<128xf32> affine.for %i17 = 0 to %M { // not vectorized, the 1-D pattern that matched %i18 in DFS post-order prevents vectorizing %{{.*}} affine.for %i18 = 0 to %M { // vectorized due to scalar -> vector %a18 = affine.load %A[%cst0, %cst0] : memref<?x?xf32> diff --git a/mlir/test/Transforms/Vectorize/vectorize_2d.mlir b/mlir/test/Transforms/Vectorize/vectorize_2d.mlir index 6526d6b757f..a7553092505 100644 --- a/mlir/test/Transforms/Vectorize/vectorize_2d.mlir +++ b/mlir/test/Transforms/Vectorize/vectorize_2d.mlir @@ -69,8 +69,8 @@ func @vector_add_2d(%M : index, %N : index) -> f32 { } affine.for %i4 = 0 to %M { affine.for %i5 = 0 to %N { - // CHECK: [[A5:%.*]] = vector.transfer_read %{{.*}}[{{.*}}] {permutation_map = #[[map_id2]]} : memref<?x?xf32>, vector<32x256xf32> - // CHECK: [[B5:%.*]] = vector.transfer_read %{{.*}}[{{.*}}] {permutation_map = #[[map_id2]]} : memref<?x?xf32>, vector<32x256xf32> + // CHECK: [[A5:%.*]] = vector.transfer_read %{{.*}}[{{.*}}], %{{.*}} {permutation_map = #[[map_id2]]} : memref<?x?xf32>, vector<32x256xf32> + // CHECK: [[B5:%.*]] = vector.transfer_read %{{.*}}[{{.*}}], %{{.*}} {permutation_map = #[[map_id2]]} : memref<?x?xf32>, vector<32x256xf32> // CHECK: [[S5:%.*]] = addf [[A5]], [[B5]] : vector<32x256xf32> // CHECK: [[SPLAT1:%.*]] = constant dense<1.000000e+00> : vector<32x256xf32> // CHECK: [[S6:%.*]] = addf [[S5]], [[SPLAT1]] : vector<32x256xf32> @@ -120,10 +120,10 @@ func @vectorize_matmul(%arg0: memref<?x?xf32>, %arg1: memref<?x?xf32>, %arg2: me // VECT: affine.for %[[I2:.*]] = #[[map_id1]](%[[C0]]) to #[[map_id1]](%[[M]]) step 4 { // VECT-NEXT: affine.for %[[I3:.*]] = #[[map_id1]](%[[C0]]) to #[[map_id1]](%[[N]]) step 8 { // VECT-NEXT: affine.for %[[I4:.*]] = #map5(%[[C0]]) to #[[map_id1]](%[[K]]) { - // VECT-NEXT: %[[A:.*]] = vector.transfer_read %{{.*}}[%[[I4]], %[[I3]]] {permutation_map = #[[map_proj_d0d1_zerod1]]} : memref<?x?xf32>, vector<4x8xf32> - // VECT-NEXT: %[[B:.*]] = vector.transfer_read %{{.*}}[%[[I2]], %[[I4]]] {permutation_map = #[[map_proj_d0d1_d0zero]]} : memref<?x?xf32>, vector<4x8xf32> + // VECT-NEXT: %[[A:.*]] = vector.transfer_read %{{.*}}[%[[I4]], %[[I3]]], %{{.*}} {permutation_map = #[[map_proj_d0d1_zerod1]]} : memref<?x?xf32>, vector<4x8xf32> + // VECT-NEXT: %[[B:.*]] = vector.transfer_read %{{.*}}[%[[I2]], %[[I4]]], %{{.*}} {permutation_map = #[[map_proj_d0d1_d0zero]]} : memref<?x?xf32>, vector<4x8xf32> // VECT-NEXT: %[[C:.*]] = mulf %[[B]], %[[A]] : vector<4x8xf32> - // VECT-NEXT: %[[D:.*]] = vector.transfer_read %{{.*}}[%[[I2]], %[[I3]]] {permutation_map = #[[map_id2]]} : memref<?x?xf32>, vector<4x8xf32> + // VECT-NEXT: %[[D:.*]] = vector.transfer_read %{{.*}}[%[[I2]], %[[I3]]], %{{.*}} {permutation_map = #[[map_id2]]} : memref<?x?xf32>, vector<4x8xf32> // VECT-NEXT: %[[E:.*]] = addf %[[D]], %[[C]] : vector<4x8xf32> // VECT-NEXT: vector.transfer_write %[[E]], %{{.*}}[%[[I2]], %[[I3]]] {permutation_map = #[[map_id2]]} : vector<4x8xf32>, memref<?x?xf32> affine.for %i2 = (d0) -> (d0)(%c0) to (d0) -> (d0)(%M) { diff --git a/mlir/test/Transforms/Vectorize/vectorize_3d.mlir b/mlir/test/Transforms/Vectorize/vectorize_3d.mlir index 797e58e0d40..df60806155a 100644 --- a/mlir/test/Transforms/Vectorize/vectorize_3d.mlir +++ b/mlir/test/Transforms/Vectorize/vectorize_3d.mlir @@ -12,7 +12,7 @@ func @vec3d(%A : memref<?x?x?xf32>) { // CHECK: affine.for %{{.*}} = 0 to %{{.*}} step 32 { // CHECK: affine.for %{{.*}} = 0 to %{{.*}} step 64 { // CHECK: affine.for %{{.*}} = 0 to %{{.*}} step 256 { - // CHECK: %{{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] {permutation_map = #[[map_proj_d0d1d2_d0d1d2]]} : memref<?x?x?xf32>, vector<32x64x256xf32> + // CHECK: %{{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}], %{{.*}} {permutation_map = #[[map_proj_d0d1d2_d0d1d2]]} : memref<?x?x?xf32>, vector<32x64x256xf32> affine.for %t0 = 0 to %0 { affine.for %t1 = 0 to %0 { affine.for %i0 = 0 to %0 { diff --git a/mlir/test/Transforms/Vectorize/vectorize_outer_loop_2d.mlir b/mlir/test/Transforms/Vectorize/vectorize_outer_loop_2d.mlir index ded8dfad815..e398144a222 100644 --- a/mlir/test/Transforms/Vectorize/vectorize_outer_loop_2d.mlir +++ b/mlir/test/Transforms/Vectorize/vectorize_outer_loop_2d.mlir @@ -10,7 +10,7 @@ func @vec2d(%A : memref<?x?x?xf32>) { // CHECK: affine.for %{{.*}} = 0 to %{{.*}} step 32 // CHECK: affine.for %{{.*}} = 0 to %{{.*}} { // CHECK: affine.for %{{.*}} = 0 to %{{.*}} step 256 - // CHECK: {{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] {permutation_map = #[[map_proj_d0d1d2_d0d2]]} : memref<?x?x?xf32>, vector<32x256xf32> + // CHECK: {{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}], %{{.*}} {permutation_map = #[[map_proj_d0d1d2_d0d2]]} : memref<?x?x?xf32>, vector<32x256xf32> affine.for %i0 = 0 to %M { affine.for %i1 = 0 to %N { affine.for %i2 = 0 to %P { diff --git a/mlir/test/Transforms/Vectorize/vectorize_outer_loop_transpose_2d.mlir b/mlir/test/Transforms/Vectorize/vectorize_outer_loop_transpose_2d.mlir index 36b1a4d4044..d2de5f8d159 100644 --- a/mlir/test/Transforms/Vectorize/vectorize_outer_loop_transpose_2d.mlir +++ b/mlir/test/Transforms/Vectorize/vectorize_outer_loop_transpose_2d.mlir @@ -22,7 +22,7 @@ func @vec2d(%A : memref<?x?x?xf32>) { // CHECK: affine.for %{{.*}} = 0 to %{{.*}} step 32 // CHECK: affine.for %{{.*}} = 0 to %{{.*}} step 256 // CHECK: affine.for %{{.*}} = 0 to %{{.*}} { - // CHECK: {{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] {permutation_map = #[[map_proj_d0d1d2_d2d0]]} : memref<?x?x?xf32>, vector<32x256xf32> + // CHECK: {{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}], %{{.*}} {permutation_map = #[[map_proj_d0d1d2_d2d0]]} : memref<?x?x?xf32>, vector<32x256xf32> affine.for %i3 = 0 to %M { affine.for %i4 = 0 to %N { affine.for %i5 = 0 to %P { @@ -40,12 +40,12 @@ func @vec2d_imperfectly_nested(%A : memref<?x?x?xf32>) { // CHECK: affine.for %{{.*}} = 0 to %{{.*}} step 32 { // CHECK: affine.for %{{.*}} = 0 to %{{.*}} { // CHECK: affine.for %{{.*}} = 0 to %{{.*}} step 256 { - // CHECK: %{{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] {permutation_map = #[[map_proj_d0d1d2_d2d0]]} : memref<?x?x?xf32>, vector<32x256xf32> + // CHECK: %{{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}], %{{.*}} {permutation_map = #[[map_proj_d0d1d2_d2d0]]} : memref<?x?x?xf32>, vector<32x256xf32> // CHECK: affine.for %{{.*}} = 0 to %{{.*}} step 256 { // CHECK: affine.for %{{.*}} = 0 to %{{.*}} { - // CHECK: %{{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] {permutation_map = #[[map_proj_d0d1d2_d2d0]]} : memref<?x?x?xf32>, vector<32x256xf32> + // CHECK: %{{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}], %{{.*}} {permutation_map = #[[map_proj_d0d1d2_d2d0]]} : memref<?x?x?xf32>, vector<32x256xf32> // CHECK: affine.for %{{.*}} = 0 to %{{.*}} { - // CHECK: %{{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] {permutation_map = #[[map_proj_d0d1d2_d2d0]]} : memref<?x?x?xf32>, vector<32x256xf32> + // CHECK: %{{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}], %{{.*}} {permutation_map = #[[map_proj_d0d1d2_d2d0]]} : memref<?x?x?xf32>, vector<32x256xf32> affine.for %i0 = 0 to %0 { affine.for %i1 = 0 to %1 { affine.for %i2 = 0 to %2 { diff --git a/mlir/test/Transforms/Vectorize/vectorize_transpose_2d.mlir b/mlir/test/Transforms/Vectorize/vectorize_transpose_2d.mlir index 4f61a26b4a8..765cd07ce7d 100644 --- a/mlir/test/Transforms/Vectorize/vectorize_transpose_2d.mlir +++ b/mlir/test/Transforms/Vectorize/vectorize_transpose_2d.mlir @@ -22,7 +22,7 @@ func @vec2d(%A : memref<?x?x?xf32>) { // CHECK: affine.for %{{.*}} = 0 to %{{.*}} step 32 // CHECK: affine.for %{{.*}} = 0 to %{{.*}} { // CHECK: affine.for %{{.*}} = 0 to %{{.*}} step 256 - // CHECK: {{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] {permutation_map = #[[map_proj_d0d1d2_d2d1]]} : memref<?x?x?xf32>, vector<32x256xf32> + // CHECK: {{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}], %{{.*}} {permutation_map = #[[map_proj_d0d1d2_d2d1]]} : memref<?x?x?xf32>, vector<32x256xf32> affine.for %i3 = 0 to %M { affine.for %i4 = 0 to %N { affine.for %i5 = 0 to %P { @@ -40,12 +40,12 @@ func @vec2d_imperfectly_nested(%A : memref<?x?x?xf32>) { // CHECK: affine.for %{{.*}} = 0 to %{{.*}} step 32 { // CHECK: affine.for %{{.*}} = 0 to %{{.*}} step 256 { // CHECK: affine.for %{{.*}} = 0 to %{{.*}} { - // CHECK: %{{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] {permutation_map = #[[map_proj_d0d1d2_d2d1]]} : memref<?x?x?xf32>, vector<32x256xf32> + // CHECK: %{{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}], %{{.*}} {permutation_map = #[[map_proj_d0d1d2_d2d1]]} : memref<?x?x?xf32>, vector<32x256xf32> // CHECK: affine.for %{{.*}} = 0 to %{{.*}} { // CHECK: affine.for %{{.*}} = 0 to %{{.*}} step 256 { - // CHECK: %{{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] {permutation_map = #[[map_proj_d0d1d2_d2d1]]} : memref<?x?x?xf32>, vector<32x256xf32> + // CHECK: %{{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}], %{{.*}} {permutation_map = #[[map_proj_d0d1d2_d2d1]]} : memref<?x?x?xf32>, vector<32x256xf32> // CHECK: affine.for %{{.*}} = 0 to %{{.*}} step 256 { - // CHECK: %{{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] {permutation_map = #[[map_proj_d0d1d2_d2d1]]} : memref<?x?x?xf32>, vector<32x256xf32> + // CHECK: %{{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}], %{{.*}} {permutation_map = #[[map_proj_d0d1d2_d2d1]]} : memref<?x?x?xf32>, vector<32x256xf32> affine.for %i0 = 0 to %0 { affine.for %i1 = 0 to %1 { affine.for %i2 = 0 to %2 { |