summaryrefslogtreecommitdiffstats
path: root/mlir/lib/Conversion/VectorToLLVM
diff options
context:
space:
mode:
authorAart Bik <ajcbik@google.com>2019-12-06 11:01:54 -0800
committerA. Unique TensorFlower <gardener@tensorflow.org>2019-12-06 11:02:29 -0800
commitb36aaeafb1b026213432b5a8110467e16ed3f306 (patch)
tree00d62d7455c2fd1b9b47f19b5681492ab10e1a51 /mlir/lib/Conversion/VectorToLLVM
parent398f04aa49109fd5d1eff2c1946a2956dc6b29c6 (diff)
downloadbcm5719-llvm-b36aaeafb1b026213432b5a8110467e16ed3f306.tar.gz
bcm5719-llvm-b36aaeafb1b026213432b5a8110467e16ed3f306.zip
[VectorOps] Add lowering of vector.broadcast to LLVM IR
For example, a scalar broadcast %0 = vector.broadcast %x : f32 to vector<2xf32> return %0 : vector<2xf32> which expands scalar x into vector [x,x] by lowering to the following LLVM IR dialect to implement the duplication over the leading dimension. %0 = llvm.mlir.undef : !llvm<"<2 x float>"> %1 = llvm.mlir.constant(0 : index) : !llvm.i64 %2 = llvm.insertelement %x, %0[%1 : !llvm.i64] : !llvm<"<2 x float>"> %3 = llvm.shufflevector %2, %0 [0 : i32, 0 : i32] : !llvm<"<2 x float>">, !llvm<"<2 x float>"> return %3 : vector<2xf32> In the trailing dimensions, the operand is simply "passed through", unless a more elaborate "stretch" is required. For example %0 = vector.broadcast %arg0 : vector<1xf32> to vector<4xf32> return %0 : vector<4xf32> becomes %0 = llvm.mlir.undef : !llvm<"<4 x float>"> %1 = llvm.mlir.constant(0 : index) : !llvm.i64 %2 = llvm.extractelement %arg0[%1 : !llvm.i64] : !llvm<"<1 x float>"> %3 = llvm.mlir.constant(0 : index) : !llvm.i64 %4 = llvm.insertelement %2, %0[%3 : !llvm.i64] : !llvm<"<4 x float>"> %5 = llvm.shufflevector %4, %0 [0 : i32, 0 : i32, 0 : i32, 0 : i32] : !llvm<"<4 x float>">, !llvm<"<4 x float>"> llvm.return %5 : !llvm<"<4 x float>"> PiperOrigin-RevId: 284219926
Diffstat (limited to 'mlir/lib/Conversion/VectorToLLVM')
-rw-r--r--mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp187
1 files changed, 186 insertions, 1 deletions
diff --git a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp
index 7221998ce25..c40c7c5242a 100644
--- a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp
+++ b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp
@@ -49,6 +49,191 @@ static LLVM::LLVMType getPtrToElementType(T containerType,
.getPointerTo();
}
+class VectorBroadcastOpConversion : public LLVMOpLowering {
+public:
+ explicit VectorBroadcastOpConversion(MLIRContext *context,
+ LLVMTypeConverter &typeConverter)
+ : LLVMOpLowering(vector::BroadcastOp::getOperationName(), context,
+ typeConverter) {}
+
+ PatternMatchResult
+ matchAndRewrite(Operation *op, ArrayRef<Value *> operands,
+ ConversionPatternRewriter &rewriter) const override {
+ auto broadcastOp = cast<vector::BroadcastOp>(op);
+ VectorType dstVectorType = broadcastOp.getVectorType();
+ if (lowering.convertType(dstVectorType) == nullptr)
+ return matchFailure();
+ // Rewrite when the full vector type can be lowered (which
+ // implies all 'reduced' types can be lowered too).
+ VectorType srcVectorType =
+ broadcastOp.getSourceType().dyn_cast<VectorType>();
+ rewriter.replaceOp(
+ op, expandRanks(operands[0], // source value to be expanded
+ op->getLoc(), // location of original broadcast
+ srcVectorType, dstVectorType, rewriter));
+ return matchSuccess();
+ }
+
+private:
+ // Expands the given source value over all the ranks, as defined
+ // by the source and destination type (a null source type denotes
+ // expansion from a scalar value into a vector).
+ //
+ // TODO(ajcbik): consider replacing this one-pattern lowering
+ // with a two-pattern lowering using other vector
+ // ops once all insert/extract/shuffle operations
+ // are available with lowering implemention.
+ //
+ Value *expandRanks(Value *value, Location loc, VectorType srcVectorType,
+ VectorType dstVectorType,
+ ConversionPatternRewriter &rewriter) const {
+ assert((dstVectorType != nullptr) && "invalid result type in broadcast");
+ // Determine rank of source and destination.
+ int64_t srcRank = srcVectorType ? srcVectorType.getRank() : 0;
+ int64_t dstRank = dstVectorType.getRank();
+ int64_t curDim = dstVectorType.getDimSize(0);
+ if (srcRank < dstRank)
+ // Duplicate this rank.
+ return duplicateOneRank(value, loc, srcVectorType, dstVectorType, dstRank,
+ curDim, rewriter);
+ // If all trailing dimensions are the same, the broadcast consists of
+ // simply passing through the source value and we are done. Otherwise,
+ // any non-matching dimension forces a stretch along this rank.
+ assert((srcVectorType != nullptr) && (srcRank > 0) &&
+ (srcRank == dstRank) && "invalid rank in broadcast");
+ for (int64_t r = 0; r < dstRank; r++) {
+ if (srcVectorType.getDimSize(r) != dstVectorType.getDimSize(r)) {
+ return stretchOneRank(value, loc, srcVectorType, dstVectorType, dstRank,
+ curDim, rewriter);
+ }
+ }
+ return value;
+ }
+
+ // Picks the best way to duplicate a single rank. For the 1-D case, a
+ // single insert-elt/shuffle is the most efficient expansion. For higher
+ // dimensions, however, we need dim x insert-values on a new broadcast
+ // with one less leading dimension, which will be lowered "recursively"
+ // to matching LLVM IR.
+ // For example:
+ // v = broadcast s : f32 to vector<4x2xf32>
+ // becomes:
+ // x = broadcast s : f32 to vector<2xf32>
+ // v = [x,x,x,x]
+ // becomes:
+ // x = [s,s]
+ // v = [x,x,x,x]
+ Value *duplicateOneRank(Value *value, Location loc, VectorType srcVectorType,
+ VectorType dstVectorType, int64_t rank, int64_t dim,
+ ConversionPatternRewriter &rewriter) const {
+ Type llvmType = lowering.convertType(dstVectorType);
+ assert((llvmType != nullptr) && "unlowerable vector type");
+ if (rank == 1) {
+ Value *undef = rewriter.create<LLVM::UndefOp>(loc, llvmType);
+ Value *expand = insertOne(undef, value, loc, llvmType, rank, 0, rewriter);
+ SmallVector<int32_t, 4> zeroValues(dim, 0);
+ return rewriter.create<LLVM::ShuffleVectorOp>(
+ loc, expand, undef, rewriter.getI32ArrayAttr(zeroValues));
+ }
+ Value *expand = expandRanks(value, loc, srcVectorType,
+ reducedVectorType(dstVectorType), rewriter);
+ Value *result = rewriter.create<LLVM::UndefOp>(loc, llvmType);
+ for (int64_t d = 0; d < dim; ++d) {
+ result = insertOne(result, expand, loc, llvmType, rank, d, rewriter);
+ }
+ return result;
+ }
+
+ // Picks the best way to stretch a single rank. For the 1-D case, a
+ // single insert-elt/shuffle is the most efficient expansion when at
+ // a stretch. Otherwise, every dimension needs to be expanded
+ // individually and individually inserted in the resulting vector.
+ // For example:
+ // v = broadcast w : vector<4x1x2xf32> to vector<4x2x2xf32>
+ // becomes:
+ // a = broadcast w[0] : vector<1x2xf32> to vector<2x2xf32>
+ // b = broadcast w[1] : vector<1x2xf32> to vector<2x2xf32>
+ // c = broadcast w[2] : vector<1x2xf32> to vector<2x2xf32>
+ // d = broadcast w[3] : vector<1x2xf32> to vector<2x2xf32>
+ // v = [a,b,c,d]
+ // becomes:
+ // x = broadcast w[0][0] : vector<2xf32> to vector <2x2xf32>
+ // y = broadcast w[1][0] : vector<2xf32> to vector <2x2xf32>
+ // a = [x, y]
+ // etc.
+ Value *stretchOneRank(Value *value, Location loc, VectorType srcVectorType,
+ VectorType dstVectorType, int64_t rank, int64_t dim,
+ ConversionPatternRewriter &rewriter) const {
+ Type llvmType = lowering.convertType(dstVectorType);
+ assert((llvmType != nullptr) && "unlowerable vector type");
+ Value *result = rewriter.create<LLVM::UndefOp>(loc, llvmType);
+ bool atStretch = dim != srcVectorType.getDimSize(0);
+ if (rank == 1) {
+ Type redLlvmType = lowering.convertType(dstVectorType.getElementType());
+ if (atStretch) {
+ Value *one = extractOne(value, loc, redLlvmType, rank, 0, rewriter);
+ Value *expand =
+ insertOne(result, one, loc, llvmType, rank, 0, rewriter);
+ SmallVector<int32_t, 4> zeroValues(dim, 0);
+ return rewriter.create<LLVM::ShuffleVectorOp>(
+ loc, expand, result, rewriter.getI32ArrayAttr(zeroValues));
+ }
+ for (int64_t d = 0; d < dim; ++d) {
+ Value *one = extractOne(value, loc, redLlvmType, rank, d, rewriter);
+ result = insertOne(result, one, loc, llvmType, rank, d, rewriter);
+ }
+ } else {
+ VectorType redSrcType = reducedVectorType(srcVectorType);
+ VectorType redDstType = reducedVectorType(dstVectorType);
+ Type redLlvmType = lowering.convertType(redSrcType);
+ for (int64_t d = 0; d < dim; ++d) {
+ int64_t pos = atStretch ? 0 : d;
+ Value *one = extractOne(value, loc, redLlvmType, rank, pos, rewriter);
+ Value *expand = expandRanks(one, loc, redSrcType, redDstType, rewriter);
+ result = insertOne(result, expand, loc, llvmType, rank, d, rewriter);
+ }
+ }
+ return result;
+ }
+
+ // Picks the proper sequence for inserting.
+ Value *insertOne(Value *val1, Value *val2, Location loc, Type llvmType,
+ int64_t rank, int64_t pos,
+ ConversionPatternRewriter &rewriter) const {
+ if (rank == 1) {
+ auto idxType = rewriter.getIndexType();
+ auto constant = rewriter.create<LLVM::ConstantOp>(
+ loc, lowering.convertType(idxType),
+ rewriter.getIntegerAttr(idxType, pos));
+ return rewriter.create<LLVM::InsertElementOp>(loc, llvmType, val1, val2,
+ constant);
+ }
+ return rewriter.create<LLVM::InsertValueOp>(loc, llvmType, val1, val2,
+ rewriter.getI64ArrayAttr(pos));
+ }
+
+ // Picks the proper sequence for extracting.
+ Value *extractOne(Value *value, Location loc, Type llvmType, int64_t rank,
+ int64_t pos, ConversionPatternRewriter &rewriter) const {
+ if (rank == 1) {
+ auto idxType = rewriter.getIndexType();
+ auto constant = rewriter.create<LLVM::ConstantOp>(
+ loc, lowering.convertType(idxType),
+ rewriter.getIntegerAttr(idxType, pos));
+ return rewriter.create<LLVM::ExtractElementOp>(loc, llvmType, value,
+ constant);
+ }
+ return rewriter.create<LLVM::ExtractValueOp>(loc, llvmType, value,
+ rewriter.getI64ArrayAttr(pos));
+ }
+
+ // Helper to reduce vector type by one rank.
+ static VectorType reducedVectorType(VectorType tp) {
+ assert((tp.getRank() > 1) && "unlowerable vector type");
+ return VectorType::get(tp.getShape().drop_front(), tp.getElementType());
+ }
+};
+
class VectorExtractElementOpConversion : public LLVMOpLowering {
public:
explicit VectorExtractElementOpConversion(MLIRContext *context,
@@ -246,7 +431,7 @@ public:
/// Populate the given list with patterns that convert from Vector to LLVM.
void mlir::populateVectorToLLVMConversionPatterns(
LLVMTypeConverter &converter, OwningRewritePatternList &patterns) {
- patterns.insert<VectorExtractElementOpConversion,
+ patterns.insert<VectorBroadcastOpConversion, VectorExtractElementOpConversion,
VectorOuterProductOpConversion, VectorTypeCastOpConversion>(
converter.getDialect()->getContext(), converter);
}
OpenPOWER on IntegriCloud