diff options
Diffstat (limited to 'mlir/lib/Transforms/MaterializeVectors.cpp')
| -rw-r--r-- | mlir/lib/Transforms/MaterializeVectors.cpp | 124 |
1 files changed, 121 insertions, 3 deletions
diff --git a/mlir/lib/Transforms/MaterializeVectors.cpp b/mlir/lib/Transforms/MaterializeVectors.cpp index 27f157c9234..0d7d0db2b20 100644 --- a/mlir/lib/Transforms/MaterializeVectors.cpp +++ b/mlir/lib/Transforms/MaterializeVectors.cpp @@ -82,6 +82,73 @@ /// operations and builds the slice scoped the innermost loop enclosing the /// current vector_transfer_write. These assumptions and the implementation /// details are subject to revision in the future. +/// +/// Example +/// ======== +/// In the following, the single vector_transfer_write op operates on a +/// vector<4x4x4xf32>. Let's assume the HW supports vector<4x4xf32>. +/// Materialization is achieved by instantiating each occurrence of the leading +/// dimension of vector<4x4x4xf32> into a vector<4x4xf32>. +/// The program transformation that implements this instantiation is a +/// multi-loop unroll-and-jam (it can be partial or full depending on the ratio +/// of super-vector shape to HW-vector shape). +/// +/// As a simple case, the following: +/// ```mlir +/// mlfunc @materialize(%M : index, %N : index, %O : index, %P : index) { +/// %A = alloc (%M, %N, %O, %P) : memref<?x?x?x?xf32, 0> +/// %f1 = constant splat<vector<4x4x4xf32>, 1.000000e+00> : +/// vector<4x4x4xf32> for %i0 = 0 to %M step 4 { +/// for %i1 = 0 to %N step 4 { +/// for %i2 = 0 to %O { +/// for %i3 = 0 to %P step 4 { +/// vector_transfer_write %f1, %A, %i0, %i1, %i2, %i3 +/// {permutation_map: (d0, d1, d2, d3) -> (d3, d1, d0)} : +/// vector<4x4x4xf32>, memref<?x?x?x?xf32, 0>, +/// index, index, index, index +/// }}}} +/// return +/// } +/// ``` +/// +/// is instantiated by unroll-and-jam (just unroll in this case) into: +/// +/// ```mlir +/// mlfunc @materialize(%M : index, %N : index, %O : index, %P : index) { +/// %A = alloc (%M, %N, %O, %P) : memref<?x?x?x?xf32, 0> +/// %f1 = constant splat<vector<4x4xf32>, 1.000000e+00> : vector<4x4x4xf32> +/// for %i0 = 0 to %arg0 step 4 { +/// for %i1 = 0 to %arg1 step 4 { +/// for %i2 = 0 to %arg2 { +/// for %i3 = 0 to %arg3 step 4 { +/// %1 = affine_apply (d0, d1, d2, d3) -> (d0, d1, d2, d3) +/// (%i0, %i1, %i2, %i3) +/// vector_transfer_write f1, %0, %1#0, %1#1, %1#2, %1#3 +/// {permutation_map: (d0, d1, d2, d3) -> (d1, d0)} : +/// vector<4x4xf32>, memref<?x?x?x?xf32>, +/// index, index, index, index +/// %2 = affine_apply (d0, d1, d2, d3) -> (d0, d1, d2, d3 + 1) +/// (%i0, %i1, %i2, %i3) +/// vector_transfer_write {{.*}}, %0, %2#0, %2#1, %2#2, %2#3 +/// {permutation_map: (d0, d1, d2, d3) -> (d1, d0)} : +/// vector<4x4xf32>, memref<?x?x?x?xf32>, +/// index, index, index, index +/// %3 = affine_apply (d0, d1, d2, d3) -> (d0, d1, d2, d3 + 2) +/// (%i0, %i1, %i2, %i3) +/// vector_transfer_write {{.*}}, %0, %3#0, %3#1, %3#2, %3#3 +/// {permutation_map: (d0, d1, d2, d3) -> (d1, d0)} : +/// vector<4x4xf32>, memref<?x?x?x?xf32>, +/// index, index, index, index +/// %4 = affine_apply (d0, d1, d2, d3) -> (d0, d1, d2, d3 + 3) +/// (%i0, %i1, %i2, %i3) +/// vector_transfer_write {{.*}}, %0, %4#0, %4#1, %4#2, %4#3 +/// {permutation_map: (d0, d1, d2, d3) -> (d1, d0)} : +/// vector<4x4xf32>, memref<?x?x?x?xf32>, +/// index, index, index, index +/// }}}} +/// return +/// } +/// ``` using llvm::dbgs; using llvm::DenseSet; @@ -333,6 +400,58 @@ instantiate(MLFuncBuilder *b, OperationStmt *opStmt, VectorType superVectorType, materializeAttributes(opStmt, superVectorType, hwVectorType)); } +/// Computes the permutationMap required for a VectorTransferOp from the memref +/// to the `hwVectorType`. +/// This is achieved by returning the projection of the permutationMap along the +/// dimensions of the super-vector type that remain in the hwVectorType. +/// In particular, if a dimension is fully instantiated (i.e. unrolled) then it +/// is projected out in the final result. +template <typename VectorTransferOpTy> +static AffineMap projectedPermutationMap(VectorTransferOpTy *transfer, + VectorType hwVectorType) { + static_assert( + std::is_same<VectorTransferOpTy, VectorTransferReadOp>::value || + std::is_same<VectorTransferOpTy, VectorTransferWriteOp>::value, + "Must be called on a VectorTransferOp"); + auto superVectorType = transfer->getVectorType(); + auto optionalRatio = shapeRatio(superVectorType, hwVectorType); + assert(optionalRatio && + (optionalRatio->size() == superVectorType.getShape().size()) && + "Shape and ratio not of the same size"); + unsigned dim = 0; + SmallVector<AffineExpr, 4> keep; + MLIRContext *context = transfer->getOperation()->getContext(); + functional::zipApply( + [&dim, &keep, context](int shape, int ratio) { + assert(shape >= ratio && "shape dim must be greater than ratio dim"); + if (shape != ratio) { + // HW vector is not full instantiated along this dim, keep it. + keep.push_back(getAffineDimExpr(dim, context)); + } + ++dim; + }, + superVectorType.getShape(), *optionalRatio); + auto projectionMap = AffineMap::get(optionalRatio->size(), 0, keep, {}); + (void)projectionMap; + // No seemingly simple way to compose 2 AffineMap except going through SSA + // values... Punting for now and will resolve in the next CL. + // + // return projectionMap.compose(transfer->getPermutationMap()); + + // Still, we may need to drop a few dims to pass verification, so hack this in + // for now. + auto map = transfer->getPermutationMap(); + auto exprs = map.getResults(); + assert(exprs.size() >= keep.size()); + unsigned diff = exprs.size() - keep.size(); + SmallVector<AffineExpr, 4> projectedExprs(exprs.begin() + diff, exprs.end()); + auto res = AffineMap::get(map.getNumInputs(), 0, projectedExprs, {}); + LLVM_DEBUG(projectionMap.print(dbgs() << "\nProjectionMap: ")); + LLVM_DEBUG(map.print(dbgs() << "\nOriginal: ")); + LLVM_DEBUG(res.print(dbgs() << "\nTemporarily hacked projection: ")); + return res; +} + /// Creates an instantiated version of `read` for the instance of /// `hwVectorInstance` when lowering from a super-vector type to /// `hwVectorType`. `hwVectorInstance` represents one particular instance of @@ -349,8 +468,7 @@ instantiate(MLFuncBuilder *b, VectorTransferReadOp *read, reindexAffineIndices(b, hwVectorType, hwVectorInstance, indices); auto cloned = b->create<VectorTransferReadOp>( read->getLoc(), hwVectorType, read->getMemRef(), affineIndices, - makePermutationMap(read->getMemRefType(), hwVectorType), - read->getPaddingValue()); + projectedPermutationMap(read, hwVectorType), read->getPaddingValue()); return cast<OperationStmt>(cloned->getOperation()); } @@ -371,7 +489,7 @@ instantiate(MLFuncBuilder *b, VectorTransferWriteOp *write, auto cloned = b->create<VectorTransferWriteOp>( write->getLoc(), substitute(write->getVector(), *substitutionsMap), write->getMemRef(), affineIndices, - makePermutationMap(write->getMemRefType(), hwVectorType)); + projectedPermutationMap(write, hwVectorType)); return cast<OperationStmt>(cloned->getOperation()); } |

