summaryrefslogtreecommitdiffstats
path: root/mlir/lib/Transforms/MaterializeVectors.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'mlir/lib/Transforms/MaterializeVectors.cpp')
-rw-r--r--mlir/lib/Transforms/MaterializeVectors.cpp124
1 files changed, 121 insertions, 3 deletions
diff --git a/mlir/lib/Transforms/MaterializeVectors.cpp b/mlir/lib/Transforms/MaterializeVectors.cpp
index 27f157c9234..0d7d0db2b20 100644
--- a/mlir/lib/Transforms/MaterializeVectors.cpp
+++ b/mlir/lib/Transforms/MaterializeVectors.cpp
@@ -82,6 +82,73 @@
/// operations and builds the slice scoped the innermost loop enclosing the
/// current vector_transfer_write. These assumptions and the implementation
/// details are subject to revision in the future.
+///
+/// Example
+/// ========
+/// In the following, the single vector_transfer_write op operates on a
+/// vector<4x4x4xf32>. Let's assume the HW supports vector<4x4xf32>.
+/// Materialization is achieved by instantiating each occurrence of the leading
+/// dimension of vector<4x4x4xf32> into a vector<4x4xf32>.
+/// The program transformation that implements this instantiation is a
+/// multi-loop unroll-and-jam (it can be partial or full depending on the ratio
+/// of super-vector shape to HW-vector shape).
+///
+/// As a simple case, the following:
+/// ```mlir
+/// mlfunc @materialize(%M : index, %N : index, %O : index, %P : index) {
+/// %A = alloc (%M, %N, %O, %P) : memref<?x?x?x?xf32, 0>
+/// %f1 = constant splat<vector<4x4x4xf32>, 1.000000e+00> :
+/// vector<4x4x4xf32> for %i0 = 0 to %M step 4 {
+/// for %i1 = 0 to %N step 4 {
+/// for %i2 = 0 to %O {
+/// for %i3 = 0 to %P step 4 {
+/// vector_transfer_write %f1, %A, %i0, %i1, %i2, %i3
+/// {permutation_map: (d0, d1, d2, d3) -> (d3, d1, d0)} :
+/// vector<4x4x4xf32>, memref<?x?x?x?xf32, 0>,
+/// index, index, index, index
+/// }}}}
+/// return
+/// }
+/// ```
+///
+/// is instantiated by unroll-and-jam (just unroll in this case) into:
+///
+/// ```mlir
+/// mlfunc @materialize(%M : index, %N : index, %O : index, %P : index) {
+/// %A = alloc (%M, %N, %O, %P) : memref<?x?x?x?xf32, 0>
+/// %f1 = constant splat<vector<4x4xf32>, 1.000000e+00> : vector<4x4x4xf32>
+/// for %i0 = 0 to %arg0 step 4 {
+/// for %i1 = 0 to %arg1 step 4 {
+/// for %i2 = 0 to %arg2 {
+/// for %i3 = 0 to %arg3 step 4 {
+/// %1 = affine_apply (d0, d1, d2, d3) -> (d0, d1, d2, d3)
+/// (%i0, %i1, %i2, %i3)
+/// vector_transfer_write f1, %0, %1#0, %1#1, %1#2, %1#3
+/// {permutation_map: (d0, d1, d2, d3) -> (d1, d0)} :
+/// vector<4x4xf32>, memref<?x?x?x?xf32>,
+/// index, index, index, index
+/// %2 = affine_apply (d0, d1, d2, d3) -> (d0, d1, d2, d3 + 1)
+/// (%i0, %i1, %i2, %i3)
+/// vector_transfer_write {{.*}}, %0, %2#0, %2#1, %2#2, %2#3
+/// {permutation_map: (d0, d1, d2, d3) -> (d1, d0)} :
+/// vector<4x4xf32>, memref<?x?x?x?xf32>,
+/// index, index, index, index
+/// %3 = affine_apply (d0, d1, d2, d3) -> (d0, d1, d2, d3 + 2)
+/// (%i0, %i1, %i2, %i3)
+/// vector_transfer_write {{.*}}, %0, %3#0, %3#1, %3#2, %3#3
+/// {permutation_map: (d0, d1, d2, d3) -> (d1, d0)} :
+/// vector<4x4xf32>, memref<?x?x?x?xf32>,
+/// index, index, index, index
+/// %4 = affine_apply (d0, d1, d2, d3) -> (d0, d1, d2, d3 + 3)
+/// (%i0, %i1, %i2, %i3)
+/// vector_transfer_write {{.*}}, %0, %4#0, %4#1, %4#2, %4#3
+/// {permutation_map: (d0, d1, d2, d3) -> (d1, d0)} :
+/// vector<4x4xf32>, memref<?x?x?x?xf32>,
+/// index, index, index, index
+/// }}}}
+/// return
+/// }
+/// ```
using llvm::dbgs;
using llvm::DenseSet;
@@ -333,6 +400,58 @@ instantiate(MLFuncBuilder *b, OperationStmt *opStmt, VectorType superVectorType,
materializeAttributes(opStmt, superVectorType, hwVectorType));
}
+/// Computes the permutationMap required for a VectorTransferOp from the memref
+/// to the `hwVectorType`.
+/// This is achieved by returning the projection of the permutationMap along the
+/// dimensions of the super-vector type that remain in the hwVectorType.
+/// In particular, if a dimension is fully instantiated (i.e. unrolled) then it
+/// is projected out in the final result.
+template <typename VectorTransferOpTy>
+static AffineMap projectedPermutationMap(VectorTransferOpTy *transfer,
+ VectorType hwVectorType) {
+ static_assert(
+ std::is_same<VectorTransferOpTy, VectorTransferReadOp>::value ||
+ std::is_same<VectorTransferOpTy, VectorTransferWriteOp>::value,
+ "Must be called on a VectorTransferOp");
+ auto superVectorType = transfer->getVectorType();
+ auto optionalRatio = shapeRatio(superVectorType, hwVectorType);
+ assert(optionalRatio &&
+ (optionalRatio->size() == superVectorType.getShape().size()) &&
+ "Shape and ratio not of the same size");
+ unsigned dim = 0;
+ SmallVector<AffineExpr, 4> keep;
+ MLIRContext *context = transfer->getOperation()->getContext();
+ functional::zipApply(
+ [&dim, &keep, context](int shape, int ratio) {
+ assert(shape >= ratio && "shape dim must be greater than ratio dim");
+ if (shape != ratio) {
+ // HW vector is not full instantiated along this dim, keep it.
+ keep.push_back(getAffineDimExpr(dim, context));
+ }
+ ++dim;
+ },
+ superVectorType.getShape(), *optionalRatio);
+ auto projectionMap = AffineMap::get(optionalRatio->size(), 0, keep, {});
+ (void)projectionMap;
+ // No seemingly simple way to compose 2 AffineMap except going through SSA
+ // values... Punting for now and will resolve in the next CL.
+ //
+ // return projectionMap.compose(transfer->getPermutationMap());
+
+ // Still, we may need to drop a few dims to pass verification, so hack this in
+ // for now.
+ auto map = transfer->getPermutationMap();
+ auto exprs = map.getResults();
+ assert(exprs.size() >= keep.size());
+ unsigned diff = exprs.size() - keep.size();
+ SmallVector<AffineExpr, 4> projectedExprs(exprs.begin() + diff, exprs.end());
+ auto res = AffineMap::get(map.getNumInputs(), 0, projectedExprs, {});
+ LLVM_DEBUG(projectionMap.print(dbgs() << "\nProjectionMap: "));
+ LLVM_DEBUG(map.print(dbgs() << "\nOriginal: "));
+ LLVM_DEBUG(res.print(dbgs() << "\nTemporarily hacked projection: "));
+ return res;
+}
+
/// Creates an instantiated version of `read` for the instance of
/// `hwVectorInstance` when lowering from a super-vector type to
/// `hwVectorType`. `hwVectorInstance` represents one particular instance of
@@ -349,8 +468,7 @@ instantiate(MLFuncBuilder *b, VectorTransferReadOp *read,
reindexAffineIndices(b, hwVectorType, hwVectorInstance, indices);
auto cloned = b->create<VectorTransferReadOp>(
read->getLoc(), hwVectorType, read->getMemRef(), affineIndices,
- makePermutationMap(read->getMemRefType(), hwVectorType),
- read->getPaddingValue());
+ projectedPermutationMap(read, hwVectorType), read->getPaddingValue());
return cast<OperationStmt>(cloned->getOperation());
}
@@ -371,7 +489,7 @@ instantiate(MLFuncBuilder *b, VectorTransferWriteOp *write,
auto cloned = b->create<VectorTransferWriteOp>(
write->getLoc(), substitute(write->getVector(), *substitutionsMap),
write->getMemRef(), affineIndices,
- makePermutationMap(write->getMemRefType(), hwVectorType));
+ projectedPermutationMap(write, hwVectorType));
return cast<OperationStmt>(cloned->getOperation());
}
OpenPOWER on IntegriCloud