diff options
Diffstat (limited to 'mlir/lib/Conversion')
15 files changed, 535 insertions, 562 deletions
diff --git a/mlir/lib/Conversion/AffineToStandard/AffineToStandard.cpp b/mlir/lib/Conversion/AffineToStandard/AffineToStandard.cpp index ce1e5c4a2af..e9a9ca82f51 100644 --- a/mlir/lib/Conversion/AffineToStandard/AffineToStandard.cpp +++ b/mlir/lib/Conversion/AffineToStandard/AffineToStandard.cpp @@ -33,16 +33,16 @@ namespace { // that correspond to it. Visitation functions return an Value of the // expression subtree they visited or `nullptr` on error. class AffineApplyExpander - : public AffineExprVisitor<AffineApplyExpander, ValuePtr> { + : public AffineExprVisitor<AffineApplyExpander, Value> { public: // This internal class expects arguments to be non-null, checks must be // performed at the call site. - AffineApplyExpander(OpBuilder &builder, ArrayRef<ValuePtr> dimValues, - ArrayRef<ValuePtr> symbolValues, Location loc) + AffineApplyExpander(OpBuilder &builder, ArrayRef<Value> dimValues, + ArrayRef<Value> symbolValues, Location loc) : builder(builder), dimValues(dimValues), symbolValues(symbolValues), loc(loc) {} - template <typename OpTy> ValuePtr buildBinaryExpr(AffineBinaryOpExpr expr) { + template <typename OpTy> Value buildBinaryExpr(AffineBinaryOpExpr expr) { auto lhs = visit(expr.getLHS()); auto rhs = visit(expr.getRHS()); if (!lhs || !rhs) @@ -51,11 +51,11 @@ public: return op.getResult(); } - ValuePtr visitAddExpr(AffineBinaryOpExpr expr) { + Value visitAddExpr(AffineBinaryOpExpr expr) { return buildBinaryExpr<AddIOp>(expr); } - ValuePtr visitMulExpr(AffineBinaryOpExpr expr) { + Value visitMulExpr(AffineBinaryOpExpr expr) { return buildBinaryExpr<MulIOp>(expr); } @@ -68,7 +68,7 @@ public: // let remainder = srem a, b; // negative = a < 0 in // select negative, remainder + b, remainder. - ValuePtr visitModExpr(AffineBinaryOpExpr expr) { + Value visitModExpr(AffineBinaryOpExpr expr) { auto rhsConst = expr.getRHS().dyn_cast<AffineConstantExpr>(); if (!rhsConst) { emitError( @@ -85,13 +85,13 @@ public: auto rhs = visit(expr.getRHS()); assert(lhs && rhs && "unexpected affine expr lowering failure"); - ValuePtr remainder = builder.create<SignedRemIOp>(loc, lhs, rhs); - ValuePtr zeroCst = builder.create<ConstantIndexOp>(loc, 0); - ValuePtr isRemainderNegative = + Value remainder = builder.create<SignedRemIOp>(loc, lhs, rhs); + Value zeroCst = builder.create<ConstantIndexOp>(loc, 0); + Value isRemainderNegative = builder.create<CmpIOp>(loc, CmpIPredicate::slt, remainder, zeroCst); - ValuePtr correctedRemainder = builder.create<AddIOp>(loc, remainder, rhs); - ValuePtr result = builder.create<SelectOp>(loc, isRemainderNegative, - correctedRemainder, remainder); + Value correctedRemainder = builder.create<AddIOp>(loc, remainder, rhs); + Value result = builder.create<SelectOp>(loc, isRemainderNegative, + correctedRemainder, remainder); return result; } @@ -105,7 +105,7 @@ public: // let absolute = negative ? -a - 1 : a in // let quotient = absolute / b in // negative ? -quotient - 1 : quotient - ValuePtr visitFloorDivExpr(AffineBinaryOpExpr expr) { + Value visitFloorDivExpr(AffineBinaryOpExpr expr) { auto rhsConst = expr.getRHS().dyn_cast<AffineConstantExpr>(); if (!rhsConst) { emitError( @@ -122,16 +122,16 @@ public: auto rhs = visit(expr.getRHS()); assert(lhs && rhs && "unexpected affine expr lowering failure"); - ValuePtr zeroCst = builder.create<ConstantIndexOp>(loc, 0); - ValuePtr noneCst = builder.create<ConstantIndexOp>(loc, -1); - ValuePtr negative = + Value zeroCst = builder.create<ConstantIndexOp>(loc, 0); + Value noneCst = builder.create<ConstantIndexOp>(loc, -1); + Value negative = builder.create<CmpIOp>(loc, CmpIPredicate::slt, lhs, zeroCst); - ValuePtr negatedDecremented = builder.create<SubIOp>(loc, noneCst, lhs); - ValuePtr dividend = + Value negatedDecremented = builder.create<SubIOp>(loc, noneCst, lhs); + Value dividend = builder.create<SelectOp>(loc, negative, negatedDecremented, lhs); - ValuePtr quotient = builder.create<SignedDivIOp>(loc, dividend, rhs); - ValuePtr correctedQuotient = builder.create<SubIOp>(loc, noneCst, quotient); - ValuePtr result = + Value quotient = builder.create<SignedDivIOp>(loc, dividend, rhs); + Value correctedQuotient = builder.create<SubIOp>(loc, noneCst, quotient); + Value result = builder.create<SelectOp>(loc, negative, correctedQuotient, quotient); return result; } @@ -146,7 +146,7 @@ public: // let absolute = negative ? -a : a - 1 in // let quotient = absolute / b in // negative ? -quotient : quotient + 1 - ValuePtr visitCeilDivExpr(AffineBinaryOpExpr expr) { + Value visitCeilDivExpr(AffineBinaryOpExpr expr) { auto rhsConst = expr.getRHS().dyn_cast<AffineConstantExpr>(); if (!rhsConst) { emitError(loc) << "semi-affine expressions (division by non-const) are " @@ -161,24 +161,23 @@ public: auto rhs = visit(expr.getRHS()); assert(lhs && rhs && "unexpected affine expr lowering failure"); - ValuePtr zeroCst = builder.create<ConstantIndexOp>(loc, 0); - ValuePtr oneCst = builder.create<ConstantIndexOp>(loc, 1); - ValuePtr nonPositive = + Value zeroCst = builder.create<ConstantIndexOp>(loc, 0); + Value oneCst = builder.create<ConstantIndexOp>(loc, 1); + Value nonPositive = builder.create<CmpIOp>(loc, CmpIPredicate::sle, lhs, zeroCst); - ValuePtr negated = builder.create<SubIOp>(loc, zeroCst, lhs); - ValuePtr decremented = builder.create<SubIOp>(loc, lhs, oneCst); - ValuePtr dividend = + Value negated = builder.create<SubIOp>(loc, zeroCst, lhs); + Value decremented = builder.create<SubIOp>(loc, lhs, oneCst); + Value dividend = builder.create<SelectOp>(loc, nonPositive, negated, decremented); - ValuePtr quotient = builder.create<SignedDivIOp>(loc, dividend, rhs); - ValuePtr negatedQuotient = builder.create<SubIOp>(loc, zeroCst, quotient); - ValuePtr incrementedQuotient = - builder.create<AddIOp>(loc, quotient, oneCst); - ValuePtr result = builder.create<SelectOp>( - loc, nonPositive, negatedQuotient, incrementedQuotient); + Value quotient = builder.create<SignedDivIOp>(loc, dividend, rhs); + Value negatedQuotient = builder.create<SubIOp>(loc, zeroCst, quotient); + Value incrementedQuotient = builder.create<AddIOp>(loc, quotient, oneCst); + Value result = builder.create<SelectOp>(loc, nonPositive, negatedQuotient, + incrementedQuotient); return result; } - ValuePtr visitConstantExpr(AffineConstantExpr expr) { + Value visitConstantExpr(AffineConstantExpr expr) { auto valueAttr = builder.getIntegerAttr(builder.getIndexType(), expr.getValue()); auto op = @@ -186,13 +185,13 @@ public: return op.getResult(); } - ValuePtr visitDimExpr(AffineDimExpr expr) { + Value visitDimExpr(AffineDimExpr expr) { assert(expr.getPosition() < dimValues.size() && "affine dim position out of range"); return dimValues[expr.getPosition()]; } - ValuePtr visitSymbolExpr(AffineSymbolExpr expr) { + Value visitSymbolExpr(AffineSymbolExpr expr) { assert(expr.getPosition() < symbolValues.size() && "symbol dim position out of range"); return symbolValues[expr.getPosition()]; @@ -200,8 +199,8 @@ public: private: OpBuilder &builder; - ArrayRef<ValuePtr> dimValues; - ArrayRef<ValuePtr> symbolValues; + ArrayRef<Value> dimValues; + ArrayRef<Value> symbolValues; Location loc; }; @@ -209,18 +208,17 @@ private: // Create a sequence of operations that implement the `expr` applied to the // given dimension and symbol values. -mlir::ValuePtr mlir::expandAffineExpr(OpBuilder &builder, Location loc, - AffineExpr expr, - ArrayRef<ValuePtr> dimValues, - ArrayRef<ValuePtr> symbolValues) { +mlir::Value mlir::expandAffineExpr(OpBuilder &builder, Location loc, + AffineExpr expr, ArrayRef<Value> dimValues, + ArrayRef<Value> symbolValues) { return AffineApplyExpander(builder, dimValues, symbolValues, loc).visit(expr); } // Create a sequence of operations that implement the `affineMap` applied to // the given `operands` (as it it were an AffineApplyOp). -Optional<SmallVector<ValuePtr, 8>> static expandAffineMap( +Optional<SmallVector<Value, 8>> static expandAffineMap( OpBuilder &builder, Location loc, AffineMap affineMap, - ArrayRef<ValuePtr> operands) { + ArrayRef<Value> operands) { auto numDims = affineMap.getNumDims(); auto expanded = functional::map( [numDims, &builder, loc, operands](AffineExpr expr) { @@ -229,7 +227,7 @@ Optional<SmallVector<ValuePtr, 8>> static expandAffineMap( operands.drop_front(numDims)); }, affineMap.getResults()); - if (llvm::all_of(expanded, [](ValuePtr v) { return v; })) + if (llvm::all_of(expanded, [](Value v) { return v; })) return expanded; return None; } @@ -245,13 +243,13 @@ Optional<SmallVector<ValuePtr, 8>> static expandAffineMap( // Multiple values are scanned in a linear sequence. This creates a data // dependences that wouldn't exist in a tree reduction, but is easier to // recognize as a reduction by the subsequent passes. -static ValuePtr buildMinMaxReductionSeq(Location loc, CmpIPredicate predicate, - ArrayRef<ValuePtr> values, - OpBuilder &builder) { +static Value buildMinMaxReductionSeq(Location loc, CmpIPredicate predicate, + ArrayRef<Value> values, + OpBuilder &builder) { assert(!llvm::empty(values) && "empty min/max chain"); auto valueIt = values.begin(); - ValuePtr value = *valueIt++; + Value value = *valueIt++; for (; valueIt != values.end(); ++valueIt) { auto cmpOp = builder.create<CmpIOp>(loc, predicate, value, *valueIt); value = builder.create<SelectOp>(loc, cmpOp.getResult(), value, *valueIt); @@ -263,8 +261,8 @@ static ValuePtr buildMinMaxReductionSeq(Location loc, CmpIPredicate predicate, // Emit instructions that correspond to the affine map in the lower bound // applied to the respective operands, and compute the maximum value across // the results. -ValuePtr mlir::lowerAffineLowerBound(AffineForOp op, OpBuilder &builder) { - SmallVector<ValuePtr, 8> boundOperands(op.getLowerBoundOperands()); +Value mlir::lowerAffineLowerBound(AffineForOp op, OpBuilder &builder) { + SmallVector<Value, 8> boundOperands(op.getLowerBoundOperands()); auto lbValues = expandAffineMap(builder, op.getLoc(), op.getLowerBoundMap(), boundOperands); if (!lbValues) @@ -276,8 +274,8 @@ ValuePtr mlir::lowerAffineLowerBound(AffineForOp op, OpBuilder &builder) { // Emit instructions that correspond to the affine map in the upper bound // applied to the respective operands, and compute the minimum value across // the results. -ValuePtr mlir::lowerAffineUpperBound(AffineForOp op, OpBuilder &builder) { - SmallVector<ValuePtr, 8> boundOperands(op.getUpperBoundOperands()); +Value mlir::lowerAffineUpperBound(AffineForOp op, OpBuilder &builder) { + SmallVector<Value, 8> boundOperands(op.getUpperBoundOperands()); auto ubValues = expandAffineMap(builder, op.getLoc(), op.getUpperBoundMap(), boundOperands); if (!ubValues) @@ -306,9 +304,9 @@ public: PatternMatchResult matchAndRewrite(AffineForOp op, PatternRewriter &rewriter) const override { Location loc = op.getLoc(); - ValuePtr lowerBound = lowerAffineLowerBound(op, rewriter); - ValuePtr upperBound = lowerAffineUpperBound(op, rewriter); - ValuePtr step = rewriter.create<ConstantIndexOp>(loc, op.getStep()); + Value lowerBound = lowerAffineLowerBound(op, rewriter); + Value upperBound = lowerAffineUpperBound(op, rewriter); + Value step = rewriter.create<ConstantIndexOp>(loc, op.getStep()); auto f = rewriter.create<loop::ForOp>(loc, lowerBound, upperBound, step); f.region().getBlocks().clear(); rewriter.inlineRegionBefore(op.region(), f.region(), f.region().end()); @@ -327,25 +325,25 @@ public: // Now we just have to handle the condition logic. auto integerSet = op.getIntegerSet(); - ValuePtr zeroConstant = rewriter.create<ConstantIndexOp>(loc, 0); - SmallVector<ValuePtr, 8> operands(op.getOperands()); + Value zeroConstant = rewriter.create<ConstantIndexOp>(loc, 0); + SmallVector<Value, 8> operands(op.getOperands()); auto operandsRef = llvm::makeArrayRef(operands); // Calculate cond as a conjunction without short-circuiting. - ValuePtr cond = nullptr; + Value cond = nullptr; for (unsigned i = 0, e = integerSet.getNumConstraints(); i < e; ++i) { AffineExpr constraintExpr = integerSet.getConstraint(i); bool isEquality = integerSet.isEq(i); // Build and apply an affine expression auto numDims = integerSet.getNumDims(); - ValuePtr affResult = expandAffineExpr(rewriter, loc, constraintExpr, - operandsRef.take_front(numDims), - operandsRef.drop_front(numDims)); + Value affResult = expandAffineExpr(rewriter, loc, constraintExpr, + operandsRef.take_front(numDims), + operandsRef.drop_front(numDims)); if (!affResult) return matchFailure(); auto pred = isEquality ? CmpIPredicate::eq : CmpIPredicate::sge; - ValuePtr cmpVal = + Value cmpVal = rewriter.create<CmpIOp>(loc, pred, affResult, zeroConstant); cond = cond ? rewriter.create<AndOp>(loc, cond, cmpVal).getResult() : cmpVal; @@ -396,7 +394,7 @@ public: PatternMatchResult matchAndRewrite(AffineLoadOp op, PatternRewriter &rewriter) const override { // Expand affine map from 'affineLoadOp'. - SmallVector<ValuePtr, 8> indices(op.getMapOperands()); + SmallVector<Value, 8> indices(op.getMapOperands()); auto resultOperands = expandAffineMap(rewriter, op.getLoc(), op.getAffineMap(), indices); if (!resultOperands) @@ -418,7 +416,7 @@ public: PatternMatchResult matchAndRewrite(AffinePrefetchOp op, PatternRewriter &rewriter) const override { // Expand affine map from 'affinePrefetchOp'. - SmallVector<ValuePtr, 8> indices(op.getMapOperands()); + SmallVector<Value, 8> indices(op.getMapOperands()); auto resultOperands = expandAffineMap(rewriter, op.getLoc(), op.getAffineMap(), indices); if (!resultOperands) @@ -442,7 +440,7 @@ public: PatternMatchResult matchAndRewrite(AffineStoreOp op, PatternRewriter &rewriter) const override { // Expand affine map from 'affineStoreOp'. - SmallVector<ValuePtr, 8> indices(op.getMapOperands()); + SmallVector<Value, 8> indices(op.getMapOperands()); auto maybeExpandedMap = expandAffineMap(rewriter, op.getLoc(), op.getAffineMap(), indices); if (!maybeExpandedMap) @@ -464,7 +462,7 @@ public: PatternMatchResult matchAndRewrite(AffineDmaStartOp op, PatternRewriter &rewriter) const override { - SmallVector<ValuePtr, 8> operands(op.getOperands()); + SmallVector<Value, 8> operands(op.getOperands()); auto operandsRef = llvm::makeArrayRef(operands); // Expand affine map for DMA source memref. @@ -505,7 +503,7 @@ public: PatternMatchResult matchAndRewrite(AffineDmaWaitOp op, PatternRewriter &rewriter) const override { // Expand affine map for DMA tag memref. - SmallVector<ValuePtr, 8> indices(op.getTagIndices()); + SmallVector<Value, 8> indices(op.getTagIndices()); auto maybeExpandedTagMap = expandAffineMap(rewriter, op.getLoc(), op.getTagMap(), indices); if (!maybeExpandedTagMap) diff --git a/mlir/lib/Conversion/GPUCommon/IndexIntrinsicsOpLowering.h b/mlir/lib/Conversion/GPUCommon/IndexIntrinsicsOpLowering.h index 2ca9717ad86..63bc15173be 100644 --- a/mlir/lib/Conversion/GPUCommon/IndexIntrinsicsOpLowering.h +++ b/mlir/lib/Conversion/GPUCommon/IndexIntrinsicsOpLowering.h @@ -48,11 +48,11 @@ public: // Convert the kernel arguments to an LLVM type, preserve the rest. PatternMatchResult - matchAndRewrite(Operation *op, ArrayRef<ValuePtr> operands, + matchAndRewrite(Operation *op, ArrayRef<Value> operands, ConversionPatternRewriter &rewriter) const override { auto loc = op->getLoc(); auto dialect = lowering.getDialect(); - ValuePtr newOp; + Value newOp; switch (dimensionToIndex(cast<Op>(op))) { case X: newOp = rewriter.create<XOp>(loc, LLVM::LLVMType::getInt32Ty(dialect)); diff --git a/mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h b/mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h index 97881d359f6..b75c1bf2d7b 100644 --- a/mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h +++ b/mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h @@ -35,7 +35,7 @@ public: f32Func(f32Func), f64Func(f64Func) {} PatternMatchResult - matchAndRewrite(Operation *op, ArrayRef<ValuePtr> operands, + matchAndRewrite(Operation *op, ArrayRef<Value> operands, ConversionPatternRewriter &rewriter) const override { using LLVM::LLVMFuncOp; using LLVM::LLVMType; @@ -60,10 +60,10 @@ public: private: LLVM::LLVMType getFunctionType(LLVM::LLVMType resultType, - ArrayRef<ValuePtr> operands) const { + ArrayRef<Value> operands) const { using LLVM::LLVMType; SmallVector<LLVMType, 1> operandTypes; - for (ValuePtr operand : operands) { + for (Value operand : operands) { operandTypes.push_back(operand->getType().cast<LLVMType>()); } return LLVMType::getFunctionTy(resultType, operandTypes, diff --git a/mlir/lib/Conversion/GPUToCUDA/ConvertLaunchFuncToCudaCalls.cpp b/mlir/lib/Conversion/GPUToCUDA/ConvertLaunchFuncToCudaCalls.cpp index 3383cf13d36..19dabcdafee 100644 --- a/mlir/lib/Conversion/GPUToCUDA/ConvertLaunchFuncToCudaCalls.cpp +++ b/mlir/lib/Conversion/GPUToCUDA/ConvertLaunchFuncToCudaCalls.cpp @@ -105,7 +105,7 @@ private: } // Allocate a void pointer on the stack. - ValuePtr allocatePointer(OpBuilder &builder, Location loc) { + Value allocatePointer(OpBuilder &builder, Location loc) { auto one = builder.create<LLVM::ConstantOp>(loc, getInt32Type(), builder.getI32IntegerAttr(1)); return builder.create<LLVM::AllocaOp>(loc, getPointerPointerType(), one, @@ -113,9 +113,9 @@ private: } void declareCudaFunctions(Location loc); - ValuePtr setupParamsArray(gpu::LaunchFuncOp launchOp, OpBuilder &builder); - ValuePtr generateKernelNameConstant(StringRef name, Location loc, - OpBuilder &builder); + Value setupParamsArray(gpu::LaunchFuncOp launchOp, OpBuilder &builder); + Value generateKernelNameConstant(StringRef name, Location loc, + OpBuilder &builder); void translateGpuLaunchCalls(mlir::gpu::LaunchFuncOp launchOp); public: @@ -239,9 +239,8 @@ void GpuLaunchFuncToCudaCallsPass::declareCudaFunctions(Location loc) { // for (i : [0, NumKernelOperands)) // %array[i] = cast<void*>(KernelOperand[i]) // return %array -ValuePtr -GpuLaunchFuncToCudaCallsPass::setupParamsArray(gpu::LaunchFuncOp launchOp, - OpBuilder &builder) { +Value GpuLaunchFuncToCudaCallsPass::setupParamsArray(gpu::LaunchFuncOp launchOp, + OpBuilder &builder) { auto numKernelOperands = launchOp.getNumKernelOperands(); Location loc = launchOp.getLoc(); auto one = builder.create<LLVM::ConstantOp>(loc, getInt32Type(), @@ -255,7 +254,7 @@ GpuLaunchFuncToCudaCallsPass::setupParamsArray(gpu::LaunchFuncOp launchOp, for (unsigned idx = 0; idx < numKernelOperands; ++idx) { auto operand = launchOp.getKernelOperand(idx); auto llvmType = operand->getType().cast<LLVM::LLVMType>(); - ValuePtr memLocation = builder.create<LLVM::AllocaOp>( + Value memLocation = builder.create<LLVM::AllocaOp>( loc, llvmType.getPointerTo(), one, /*alignment=*/1); builder.create<LLVM::StoreOp>(loc, operand, memLocation); auto casted = @@ -271,12 +270,12 @@ GpuLaunchFuncToCudaCallsPass::setupParamsArray(gpu::LaunchFuncOp launchOp, getModule().lookupSymbol<LLVM::LLVMFuncOp>(kMcuMemHostRegister); auto nullPtr = builder.create<LLVM::NullOp>(loc, llvmType.getPointerTo()); auto gep = builder.create<LLVM::GEPOp>(loc, llvmType.getPointerTo(), - ArrayRef<ValuePtr>{nullPtr, one}); + ArrayRef<Value>{nullPtr, one}); auto size = builder.create<LLVM::PtrToIntOp>(loc, getInt64Type(), gep); builder.create<LLVM::CallOp>(loc, ArrayRef<Type>{}, builder.getSymbolRefAttr(registerFunc), - ArrayRef<ValuePtr>{casted, size}); - ValuePtr memLocation = builder.create<LLVM::AllocaOp>( + ArrayRef<Value>{casted, size}); + Value memLocation = builder.create<LLVM::AllocaOp>( loc, getPointerPointerType(), one, /*alignment=*/1); builder.create<LLVM::StoreOp>(loc, casted, memLocation); casted = @@ -286,7 +285,7 @@ GpuLaunchFuncToCudaCallsPass::setupParamsArray(gpu::LaunchFuncOp launchOp, auto index = builder.create<LLVM::ConstantOp>( loc, getInt32Type(), builder.getI32IntegerAttr(idx)); auto gep = builder.create<LLVM::GEPOp>(loc, getPointerPointerType(), array, - ArrayRef<ValuePtr>{index}); + ArrayRef<Value>{index}); builder.create<LLVM::StoreOp>(loc, casted, gep); } return array; @@ -302,7 +301,7 @@ GpuLaunchFuncToCudaCallsPass::setupParamsArray(gpu::LaunchFuncOp launchOp, // %1 = llvm.constant (0 : index) // %2 = llvm.getelementptr %0[%1, %1] : !llvm<"i8*"> // } -ValuePtr GpuLaunchFuncToCudaCallsPass::generateKernelNameConstant( +Value GpuLaunchFuncToCudaCallsPass::generateKernelNameConstant( StringRef name, Location loc, OpBuilder &builder) { // Make sure the trailing zero is included in the constant. std::vector<char> kernelName(name.begin(), name.end()); @@ -358,7 +357,7 @@ void GpuLaunchFuncToCudaCallsPass::translateGpuLaunchCalls( assert(kernelModule.getName() && "expected a named module"); SmallString<128> nameBuffer(*kernelModule.getName()); nameBuffer.append(kCubinStorageSuffix); - ValuePtr data = LLVM::createGlobalString( + Value data = LLVM::createGlobalString( loc, builder, nameBuffer.str(), cubinAttr.getValue(), LLVM::Linkage::Internal, getLLVMDialect()); @@ -369,7 +368,7 @@ void GpuLaunchFuncToCudaCallsPass::translateGpuLaunchCalls( getModule().lookupSymbol<LLVM::LLVMFuncOp>(cuModuleLoadName); builder.create<LLVM::CallOp>(loc, ArrayRef<Type>{getCUResultType()}, builder.getSymbolRefAttr(cuModuleLoad), - ArrayRef<ValuePtr>{cuModule, data}); + ArrayRef<Value>{cuModule, data}); // Get the function from the module. The name corresponds to the name of // the kernel function. auto cuOwningModuleRef = @@ -381,13 +380,13 @@ void GpuLaunchFuncToCudaCallsPass::translateGpuLaunchCalls( builder.create<LLVM::CallOp>( loc, ArrayRef<Type>{getCUResultType()}, builder.getSymbolRefAttr(cuModuleGetFunction), - ArrayRef<ValuePtr>{cuFunction, cuOwningModuleRef, kernelName}); + ArrayRef<Value>{cuFunction, cuOwningModuleRef, kernelName}); // Grab the global stream needed for execution. auto cuGetStreamHelper = getModule().lookupSymbol<LLVM::LLVMFuncOp>(cuGetStreamHelperName); auto cuStream = builder.create<LLVM::CallOp>( loc, ArrayRef<Type>{getPointerType()}, - builder.getSymbolRefAttr(cuGetStreamHelper), ArrayRef<ValuePtr>{}); + builder.getSymbolRefAttr(cuGetStreamHelper), ArrayRef<Value>{}); // Invoke the function with required arguments. auto cuLaunchKernel = getModule().lookupSymbol<LLVM::LLVMFuncOp>(cuLaunchKernelName); @@ -399,19 +398,19 @@ void GpuLaunchFuncToCudaCallsPass::translateGpuLaunchCalls( builder.create<LLVM::CallOp>( loc, ArrayRef<Type>{getCUResultType()}, builder.getSymbolRefAttr(cuLaunchKernel), - ArrayRef<ValuePtr>{cuFunctionRef, launchOp.getOperand(0), - launchOp.getOperand(1), launchOp.getOperand(2), - launchOp.getOperand(3), launchOp.getOperand(4), - launchOp.getOperand(5), zero, /* sharedMemBytes */ - cuStream.getResult(0), /* stream */ - paramsArray, /* kernel params */ - nullpointer /* extra */}); + ArrayRef<Value>{cuFunctionRef, launchOp.getOperand(0), + launchOp.getOperand(1), launchOp.getOperand(2), + launchOp.getOperand(3), launchOp.getOperand(4), + launchOp.getOperand(5), zero, /* sharedMemBytes */ + cuStream.getResult(0), /* stream */ + paramsArray, /* kernel params */ + nullpointer /* extra */}); // Sync on the stream to make it synchronous. auto cuStreamSync = getModule().lookupSymbol<LLVM::LLVMFuncOp>(cuStreamSynchronizeName); builder.create<LLVM::CallOp>(loc, ArrayRef<Type>{getCUResultType()}, builder.getSymbolRefAttr(cuStreamSync), - ArrayRef<ValuePtr>(cuStream.getResult(0))); + ArrayRef<Value>(cuStream.getResult(0))); launchOp.erase(); } diff --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp index e15ad823a2b..08c18c1ec83 100644 --- a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp +++ b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp @@ -51,8 +51,8 @@ public: /// Converts all_reduce op to LLVM/NVVM ops. struct GPUAllReduceOpLowering : public LLVMOpLowering { - using AccumulatorFactory = std::function<ValuePtr( - Location, ValuePtr, ValuePtr, ConversionPatternRewriter &)>; + using AccumulatorFactory = + std::function<Value(Location, Value, Value, ConversionPatternRewriter &)>; explicit GPUAllReduceOpLowering(LLVMTypeConverter &lowering_) : LLVMOpLowering(gpu::AllReduceOp::getOperationName(), @@ -60,10 +60,10 @@ struct GPUAllReduceOpLowering : public LLVMOpLowering { int32Type(LLVM::LLVMType::getInt32Ty(lowering_.getDialect())) {} PatternMatchResult - matchAndRewrite(Operation *op, ArrayRef<ValuePtr> operands, + matchAndRewrite(Operation *op, ArrayRef<Value> operands, ConversionPatternRewriter &rewriter) const override { Location loc = op->getLoc(); - ValuePtr operand = operands.front(); + Value operand = operands.front(); // TODO(csigg): Generalize to other types of accumulation. assert(op->getOperand(0)->getType().isIntOrFloat()); @@ -72,7 +72,7 @@ struct GPUAllReduceOpLowering : public LLVMOpLowering { AccumulatorFactory factory = getFactory(cast<gpu::AllReduceOp>(op), operand); assert(factory && "failed to create accumulator factory"); - ValuePtr result = createBlockReduce(loc, operand, factory, rewriter); + Value result = createBlockReduce(loc, operand, factory, rewriter); rewriter.replaceOp(op, {result}); return matchSuccess(); @@ -82,7 +82,7 @@ private: /// Returns an accumulator factory using either the op attribute or the body /// region. AccumulatorFactory getFactory(gpu::AllReduceOp allReduce, - ValuePtr operand) const { + Value operand) const { if (!allReduce.body().empty()) { return getFactory(allReduce.body()); } @@ -97,7 +97,7 @@ private: /// block is expected to have 2 arguments. The gpu.yield return the /// accumulated value of the same type. AccumulatorFactory getFactory(Region &body) const { - return AccumulatorFactory([&](Location loc, ValuePtr lhs, ValuePtr rhs, + return AccumulatorFactory([&](Location loc, Value lhs, Value rhs, ConversionPatternRewriter &rewriter) { Block *block = rewriter.getInsertionBlock(); Block *split = rewriter.splitBlock(block, rewriter.getInsertionPoint()); @@ -111,7 +111,7 @@ private: // Add branch before inserted body, into body. block = block->getNextNode(); - rewriter.create<LLVM::BrOp>(loc, ArrayRef<ValuePtr>{}, + rewriter.create<LLVM::BrOp>(loc, ArrayRef<Value>{}, llvm::makeArrayRef(block), ValueRange()); // Replace all gpu.yield ops with branch out of body. @@ -121,7 +121,7 @@ private: continue; rewriter.setInsertionPointToEnd(block); rewriter.replaceOpWithNewOp<LLVM::BrOp>( - terminator, ArrayRef<ValuePtr>{}, llvm::makeArrayRef(split), + terminator, ArrayRef<Value>{}, llvm::makeArrayRef(split), ValueRange(terminator->getOperand(0))); } @@ -152,7 +152,7 @@ private: /// Returns an accumulator factory that creates an op of type T. template <typename T> AccumulatorFactory getFactory() const { - return [](Location loc, ValuePtr lhs, ValuePtr rhs, + return [](Location loc, Value lhs, Value rhs, ConversionPatternRewriter &rewriter) { return rewriter.create<T>(loc, lhs->getType(), lhs, rhs); }; @@ -194,60 +194,60 @@ private: /// %result = llvm.load %result_ptr /// return %result /// - ValuePtr createBlockReduce(Location loc, ValuePtr operand, - AccumulatorFactory &accumFactory, - ConversionPatternRewriter &rewriter) const { + Value createBlockReduce(Location loc, Value operand, + AccumulatorFactory &accumFactory, + ConversionPatternRewriter &rewriter) const { auto type = operand->getType().cast<LLVM::LLVMType>(); // Create shared memory array to store the warp reduction. auto module = operand->getDefiningOp()->getParentOfType<ModuleOp>(); assert(module && "op must belong to a module"); - ValuePtr sharedMemPtr = + Value sharedMemPtr = createSharedMemoryArray(loc, module, type, kWarpSize, rewriter); - ValuePtr zero = rewriter.create<LLVM::ConstantOp>( + Value zero = rewriter.create<LLVM::ConstantOp>( loc, int32Type, rewriter.getI32IntegerAttr(0u)); - ValuePtr laneId = rewriter.create<NVVM::LaneIdOp>(loc, int32Type); - ValuePtr isFirstLane = rewriter.create<LLVM::ICmpOp>( + Value laneId = rewriter.create<NVVM::LaneIdOp>(loc, int32Type); + Value isFirstLane = rewriter.create<LLVM::ICmpOp>( loc, LLVM::ICmpPredicate::eq, laneId, zero); - ValuePtr threadIdx = getLinearThreadIndex(loc, rewriter); - ValuePtr blockSize = getBlockSize(loc, rewriter); - ValuePtr activeWidth = getActiveWidth(loc, threadIdx, blockSize, rewriter); + Value threadIdx = getLinearThreadIndex(loc, rewriter); + Value blockSize = getBlockSize(loc, rewriter); + Value activeWidth = getActiveWidth(loc, threadIdx, blockSize, rewriter); // Reduce elements within each warp to produce the intermediate results. - ValuePtr warpReduce = createWarpReduce(loc, activeWidth, laneId, operand, - accumFactory, rewriter); + Value warpReduce = createWarpReduce(loc, activeWidth, laneId, operand, + accumFactory, rewriter); // Write the intermediate results to shared memory, using the first lane of // each warp. createPredicatedBlock(loc, rewriter, isFirstLane, [&] { - ValuePtr warpId = getDivideByWarpSize(threadIdx, rewriter); - ValuePtr storeDst = rewriter.create<LLVM::GEPOp>( - loc, type, sharedMemPtr, ArrayRef<ValuePtr>({zero, warpId})); + Value warpId = getDivideByWarpSize(threadIdx, rewriter); + Value storeDst = rewriter.create<LLVM::GEPOp>( + loc, type, sharedMemPtr, ArrayRef<Value>({zero, warpId})); rewriter.create<LLVM::StoreOp>(loc, warpReduce, storeDst); }); rewriter.create<NVVM::Barrier0Op>(loc); - ValuePtr numWarps = getNumWarps(loc, blockSize, rewriter); - ValuePtr isValidWarp = rewriter.create<LLVM::ICmpOp>( + Value numWarps = getNumWarps(loc, blockSize, rewriter); + Value isValidWarp = rewriter.create<LLVM::ICmpOp>( loc, LLVM::ICmpPredicate::slt, threadIdx, numWarps); - ValuePtr resultPtr = rewriter.create<LLVM::GEPOp>( - loc, type, sharedMemPtr, ArrayRef<ValuePtr>({zero, zero})); + Value resultPtr = rewriter.create<LLVM::GEPOp>( + loc, type, sharedMemPtr, ArrayRef<Value>({zero, zero})); // Use the first numWarps threads to reduce the intermediate results from // shared memory. The final result is written to shared memory again. createPredicatedBlock(loc, rewriter, isValidWarp, [&] { - ValuePtr loadSrc = rewriter.create<LLVM::GEPOp>( - loc, type, sharedMemPtr, ArrayRef<ValuePtr>({zero, threadIdx})); - ValuePtr value = rewriter.create<LLVM::LoadOp>(loc, type, loadSrc); - ValuePtr result = createWarpReduce(loc, numWarps, laneId, value, - accumFactory, rewriter); + Value loadSrc = rewriter.create<LLVM::GEPOp>( + loc, type, sharedMemPtr, ArrayRef<Value>({zero, threadIdx})); + Value value = rewriter.create<LLVM::LoadOp>(loc, type, loadSrc); + Value result = createWarpReduce(loc, numWarps, laneId, value, + accumFactory, rewriter); rewriter.create<LLVM::StoreOp>(loc, result, resultPtr); }); rewriter.create<NVVM::Barrier0Op>(loc); // Load and return result from shared memory. - ValuePtr result = rewriter.create<LLVM::LoadOp>(loc, type, resultPtr); + Value result = rewriter.create<LLVM::LoadOp>(loc, type, resultPtr); return result; } @@ -265,7 +265,7 @@ private: /// template <typename ThenOpsFactory, typename ElseOpsFactory> void createIf(Location loc, ConversionPatternRewriter &rewriter, - ValuePtr condition, ThenOpsFactory &&thenOpsFactory, + Value condition, ThenOpsFactory &&thenOpsFactory, ElseOpsFactory &&elseOpsFactory) const { Block *currentBlock = rewriter.getInsertionBlock(); auto currentPoint = rewriter.getInsertionPoint(); @@ -279,7 +279,7 @@ private: ArrayRef<Block *>{thenBlock, elseBlock}); auto addBranch = [&](ValueRange operands) { - rewriter.create<LLVM::BrOp>(loc, ArrayRef<ValuePtr>{}, + rewriter.create<LLVM::BrOp>(loc, ArrayRef<Value>{}, llvm::makeArrayRef(continueBlock), llvm::makeArrayRef(operands)); }; @@ -301,25 +301,25 @@ private: /// Shortcut for createIf with empty else block and no block operands. template <typename Factory> void createPredicatedBlock(Location loc, ConversionPatternRewriter &rewriter, - ValuePtr condition, + Value condition, Factory &&predicatedOpsFactory) const { createIf( loc, rewriter, condition, [&] { predicatedOpsFactory(); - return ArrayRef<ValuePtr>(); + return ArrayRef<Value>(); }, - [&] { return ArrayRef<ValuePtr>(); }); + [&] { return ArrayRef<Value>(); }); } /// Creates a reduction across the first activeWidth lanes of a warp. /// The first lane returns the result, all others return values are undefined. - ValuePtr createWarpReduce(Location loc, ValuePtr activeWidth, ValuePtr laneId, - ValuePtr operand, AccumulatorFactory accumFactory, - ConversionPatternRewriter &rewriter) const { - ValuePtr warpSize = rewriter.create<LLVM::ConstantOp>( + Value createWarpReduce(Location loc, Value activeWidth, Value laneId, + Value operand, AccumulatorFactory accumFactory, + ConversionPatternRewriter &rewriter) const { + Value warpSize = rewriter.create<LLVM::ConstantOp>( loc, int32Type, rewriter.getI32IntegerAttr(kWarpSize)); - ValuePtr isPartialWarp = rewriter.create<LLVM::ICmpOp>( + Value isPartialWarp = rewriter.create<LLVM::ICmpOp>( loc, LLVM::ICmpPredicate::slt, activeWidth, warpSize); auto type = operand->getType().cast<LLVM::LLVMType>(); @@ -327,16 +327,16 @@ private: loc, rewriter, isPartialWarp, // Generate reduction over a (potentially) partial warp. [&] { - ValuePtr value = operand; - ValuePtr one = rewriter.create<LLVM::ConstantOp>( + Value value = operand; + Value one = rewriter.create<LLVM::ConstantOp>( loc, int32Type, rewriter.getI32IntegerAttr(1)); // Bit mask of active lanes: `(1 << activeWidth) - 1`. - ValuePtr activeMask = rewriter.create<LLVM::SubOp>( + Value activeMask = rewriter.create<LLVM::SubOp>( loc, int32Type, rewriter.create<LLVM::ShlOp>(loc, int32Type, one, activeWidth), one); // Clamp lane: `activeWidth - 1` - ValuePtr maskAndClamp = + Value maskAndClamp = rewriter.create<LLVM::SubOp>(loc, int32Type, activeWidth, one); auto dialect = lowering.getDialect(); auto predTy = LLVM::LLVMType::getInt1Ty(dialect); @@ -347,53 +347,53 @@ private: // lane is within the active range. All lanes contain the final // result, but only the first lane's result is used. for (int i = 1; i < kWarpSize; i <<= 1) { - ValuePtr offset = rewriter.create<LLVM::ConstantOp>( + Value offset = rewriter.create<LLVM::ConstantOp>( loc, int32Type, rewriter.getI32IntegerAttr(i)); - ValuePtr shfl = rewriter.create<NVVM::ShflBflyOp>( + Value shfl = rewriter.create<NVVM::ShflBflyOp>( loc, shflTy, activeMask, value, offset, maskAndClamp, returnValueAndIsValidAttr); - ValuePtr isActiveSrcLane = rewriter.create<LLVM::ExtractValueOp>( + Value isActiveSrcLane = rewriter.create<LLVM::ExtractValueOp>( loc, predTy, shfl, rewriter.getIndexArrayAttr(1)); // Skip the accumulation if the shuffle op read from a lane outside // of the active range. createIf( loc, rewriter, isActiveSrcLane, [&] { - ValuePtr shflValue = rewriter.create<LLVM::ExtractValueOp>( + Value shflValue = rewriter.create<LLVM::ExtractValueOp>( loc, type, shfl, rewriter.getIndexArrayAttr(0)); - return SmallVector<ValuePtr, 1>{ + return SmallVector<Value, 1>{ accumFactory(loc, value, shflValue, rewriter)}; }, [&] { return llvm::makeArrayRef(value); }); value = rewriter.getInsertionBlock()->getArgument(0); } - return SmallVector<ValuePtr, 1>{value}; + return SmallVector<Value, 1>{value}; }, // Generate a reduction over the entire warp. This is a specialization // of the above reduction with unconditional accumulation. [&] { - ValuePtr value = operand; - ValuePtr activeMask = rewriter.create<LLVM::ConstantOp>( + Value value = operand; + Value activeMask = rewriter.create<LLVM::ConstantOp>( loc, int32Type, rewriter.getI32IntegerAttr(~0u)); - ValuePtr maskAndClamp = rewriter.create<LLVM::ConstantOp>( + Value maskAndClamp = rewriter.create<LLVM::ConstantOp>( loc, int32Type, rewriter.getI32IntegerAttr(kWarpSize - 1)); for (int i = 1; i < kWarpSize; i <<= 1) { - ValuePtr offset = rewriter.create<LLVM::ConstantOp>( + Value offset = rewriter.create<LLVM::ConstantOp>( loc, int32Type, rewriter.getI32IntegerAttr(i)); - ValuePtr shflValue = rewriter.create<NVVM::ShflBflyOp>( + Value shflValue = rewriter.create<NVVM::ShflBflyOp>( loc, type, activeMask, value, offset, maskAndClamp, /*return_value_and_is_valid=*/UnitAttr()); value = accumFactory(loc, value, shflValue, rewriter); } - return SmallVector<ValuePtr, 1>{value}; + return SmallVector<Value, 1>{value}; }); return rewriter.getInsertionBlock()->getArgument(0); } /// Creates a global array stored in shared memory. - ValuePtr createSharedMemoryArray(Location loc, ModuleOp module, - LLVM::LLVMType elementType, int numElements, - ConversionPatternRewriter &rewriter) const { + Value createSharedMemoryArray(Location loc, ModuleOp module, + LLVM::LLVMType elementType, int numElements, + ConversionPatternRewriter &rewriter) const { OpBuilder builder(module.getBodyRegion()); auto arrayType = LLVM::LLVMType::getArrayTy(elementType, numElements); @@ -407,32 +407,31 @@ private: } /// Returns the index of the thread within the block. - ValuePtr getLinearThreadIndex(Location loc, - ConversionPatternRewriter &rewriter) const { - ValuePtr dimX = rewriter.create<NVVM::BlockDimXOp>(loc, int32Type); - ValuePtr dimY = rewriter.create<NVVM::BlockDimYOp>(loc, int32Type); - ValuePtr idX = rewriter.create<NVVM::ThreadIdXOp>(loc, int32Type); - ValuePtr idY = rewriter.create<NVVM::ThreadIdYOp>(loc, int32Type); - ValuePtr idZ = rewriter.create<NVVM::ThreadIdZOp>(loc, int32Type); - ValuePtr tmp1 = rewriter.create<LLVM::MulOp>(loc, int32Type, idZ, dimY); - ValuePtr tmp2 = rewriter.create<LLVM::AddOp>(loc, int32Type, tmp1, idY); - ValuePtr tmp3 = rewriter.create<LLVM::MulOp>(loc, int32Type, tmp2, dimX); + Value getLinearThreadIndex(Location loc, + ConversionPatternRewriter &rewriter) const { + Value dimX = rewriter.create<NVVM::BlockDimXOp>(loc, int32Type); + Value dimY = rewriter.create<NVVM::BlockDimYOp>(loc, int32Type); + Value idX = rewriter.create<NVVM::ThreadIdXOp>(loc, int32Type); + Value idY = rewriter.create<NVVM::ThreadIdYOp>(loc, int32Type); + Value idZ = rewriter.create<NVVM::ThreadIdZOp>(loc, int32Type); + Value tmp1 = rewriter.create<LLVM::MulOp>(loc, int32Type, idZ, dimY); + Value tmp2 = rewriter.create<LLVM::AddOp>(loc, int32Type, tmp1, idY); + Value tmp3 = rewriter.create<LLVM::MulOp>(loc, int32Type, tmp2, dimX); return rewriter.create<LLVM::AddOp>(loc, int32Type, tmp3, idX); } /// Returns the number of threads in the block. - ValuePtr getBlockSize(Location loc, - ConversionPatternRewriter &rewriter) const { - ValuePtr dimX = rewriter.create<NVVM::BlockDimXOp>(loc, int32Type); - ValuePtr dimY = rewriter.create<NVVM::BlockDimYOp>(loc, int32Type); - ValuePtr dimZ = rewriter.create<NVVM::BlockDimZOp>(loc, int32Type); - ValuePtr dimXY = rewriter.create<LLVM::MulOp>(loc, int32Type, dimX, dimY); + Value getBlockSize(Location loc, ConversionPatternRewriter &rewriter) const { + Value dimX = rewriter.create<NVVM::BlockDimXOp>(loc, int32Type); + Value dimY = rewriter.create<NVVM::BlockDimYOp>(loc, int32Type); + Value dimZ = rewriter.create<NVVM::BlockDimZOp>(loc, int32Type); + Value dimXY = rewriter.create<LLVM::MulOp>(loc, int32Type, dimX, dimY); return rewriter.create<LLVM::MulOp>(loc, int32Type, dimXY, dimZ); } /// Returns the number of warps in the block. - ValuePtr getNumWarps(Location loc, ValuePtr blockSize, - ConversionPatternRewriter &rewriter) const { + Value getNumWarps(Location loc, Value blockSize, + ConversionPatternRewriter &rewriter) const { auto warpSizeMinusOne = rewriter.create<LLVM::ConstantOp>( loc, int32Type, rewriter.getI32IntegerAttr(kWarpSize - 1)); auto biasedBlockSize = rewriter.create<LLVM::AddOp>( @@ -441,19 +440,19 @@ private: } /// Returns the number of active threads in the warp, not clamped to 32. - ValuePtr getActiveWidth(Location loc, ValuePtr threadIdx, ValuePtr blockSize, - ConversionPatternRewriter &rewriter) const { - ValuePtr threadIdxMask = rewriter.create<LLVM::ConstantOp>( + Value getActiveWidth(Location loc, Value threadIdx, Value blockSize, + ConversionPatternRewriter &rewriter) const { + Value threadIdxMask = rewriter.create<LLVM::ConstantOp>( loc, int32Type, rewriter.getI32IntegerAttr(~(kWarpSize - 1))); - ValuePtr numThreadsWithSmallerWarpId = + Value numThreadsWithSmallerWarpId = rewriter.create<LLVM::AndOp>(loc, threadIdx, threadIdxMask); return rewriter.create<LLVM::SubOp>(loc, blockSize, numThreadsWithSmallerWarpId); } /// Returns value divided by the warp size (i.e. 32). - ValuePtr getDivideByWarpSize(ValuePtr value, - ConversionPatternRewriter &rewriter) const { + Value getDivideByWarpSize(Value value, + ConversionPatternRewriter &rewriter) const { auto loc = value->getLoc(); auto warpSize = rewriter.create<LLVM::ConstantOp>( loc, int32Type, rewriter.getI32IntegerAttr(kWarpSize)); @@ -487,7 +486,7 @@ struct GPUShuffleOpLowering : public LLVMOpLowering { /// %shfl_pred = llvm.extractvalue %shfl[1 : index] : /// !llvm<"{ float, i1 }"> PatternMatchResult - matchAndRewrite(Operation *op, ArrayRef<ValuePtr> operands, + matchAndRewrite(Operation *op, ArrayRef<Value> operands, ConversionPatternRewriter &rewriter) const override { Location loc = op->getLoc(); gpu::ShuffleOpOperandAdaptor adaptor(operands); @@ -498,24 +497,24 @@ struct GPUShuffleOpLowering : public LLVMOpLowering { auto predTy = LLVM::LLVMType::getInt1Ty(dialect); auto resultTy = LLVM::LLVMType::getStructTy(dialect, {valueTy, predTy}); - ValuePtr one = rewriter.create<LLVM::ConstantOp>( + Value one = rewriter.create<LLVM::ConstantOp>( loc, int32Type, rewriter.getI32IntegerAttr(1)); // Bit mask of active lanes: `(1 << activeWidth) - 1`. - ValuePtr activeMask = rewriter.create<LLVM::SubOp>( + Value activeMask = rewriter.create<LLVM::SubOp>( loc, int32Type, rewriter.create<LLVM::ShlOp>(loc, int32Type, one, adaptor.width()), one); // Clamp lane: `activeWidth - 1` - ValuePtr maskAndClamp = + Value maskAndClamp = rewriter.create<LLVM::SubOp>(loc, int32Type, adaptor.width(), one); auto returnValueAndIsValidAttr = rewriter.getUnitAttr(); - ValuePtr shfl = rewriter.create<NVVM::ShflBflyOp>( + Value shfl = rewriter.create<NVVM::ShflBflyOp>( loc, resultTy, activeMask, adaptor.value(), adaptor.offset(), maskAndClamp, returnValueAndIsValidAttr); - ValuePtr shflValue = rewriter.create<LLVM::ExtractValueOp>( + Value shflValue = rewriter.create<LLVM::ExtractValueOp>( loc, valueTy, shfl, rewriter.getIndexArrayAttr(0)); - ValuePtr isActiveSrcLane = rewriter.create<LLVM::ExtractValueOp>( + Value isActiveSrcLane = rewriter.create<LLVM::ExtractValueOp>( loc, predTy, shfl, rewriter.getIndexArrayAttr(1)); rewriter.replaceOp(op, {shflValue, isActiveSrcLane}); @@ -530,7 +529,7 @@ struct GPUFuncOpLowering : LLVMOpLowering { typeConverter) {} PatternMatchResult - matchAndRewrite(Operation *op, ArrayRef<ValuePtr> operands, + matchAndRewrite(Operation *op, ArrayRef<Value> operands, ConversionPatternRewriter &rewriter) const override { assert(operands.empty() && "func op is not expected to have operands"); auto gpuFuncOp = cast<gpu::GPUFuncOp>(op); @@ -539,7 +538,7 @@ struct GPUFuncOpLowering : LLVMOpLowering { SmallVector<LLVM::GlobalOp, 3> workgroupBuffers; workgroupBuffers.reserve(gpuFuncOp.getNumWorkgroupAttributions()); for (auto en : llvm::enumerate(gpuFuncOp.getWorkgroupAttributions())) { - ValuePtr attribution = en.value(); + Value attribution = en.value(); auto type = attribution->getType().dyn_cast<MemRefType>(); assert(type && type.hasStaticShape() && "unexpected type in attribution"); @@ -596,23 +595,23 @@ struct GPUFuncOpLowering : LLVMOpLowering { unsigned numProperArguments = gpuFuncOp.getNumArguments(); auto i32Type = LLVM::LLVMType::getInt32Ty(lowering.getDialect()); - ValuePtr zero = nullptr; + Value zero = nullptr; if (!workgroupBuffers.empty()) zero = rewriter.create<LLVM::ConstantOp>(loc, i32Type, rewriter.getI32IntegerAttr(0)); for (auto en : llvm::enumerate(workgroupBuffers)) { LLVM::GlobalOp global = en.value(); - ValuePtr address = rewriter.create<LLVM::AddressOfOp>(loc, global); + Value address = rewriter.create<LLVM::AddressOfOp>(loc, global); auto elementType = global.getType().getArrayElementType(); - ValuePtr memory = rewriter.create<LLVM::GEPOp>( + Value memory = rewriter.create<LLVM::GEPOp>( loc, elementType.getPointerTo(global.addr_space().getZExtValue()), - address, ArrayRef<ValuePtr>{zero, zero}); + address, ArrayRef<Value>{zero, zero}); // Build a memref descriptor pointing to the buffer to plug with the // existing memref infrastructure. This may use more registers than // otherwise necessary given that memref sizes are fixed, but we can try // and canonicalize that away later. - ValuePtr attribution = gpuFuncOp.getWorkgroupAttributions()[en.index()]; + Value attribution = gpuFuncOp.getWorkgroupAttributions()[en.index()]; auto type = attribution->getType().cast<MemRefType>(); auto descr = MemRefDescriptor::fromStaticShape(rewriter, loc, lowering, type, memory); @@ -624,7 +623,7 @@ struct GPUFuncOpLowering : LLVMOpLowering { gpuFuncOp.getNumWorkgroupAttributions(); auto int64Ty = LLVM::LLVMType::getInt64Ty(lowering.getDialect()); for (auto en : llvm::enumerate(gpuFuncOp.getPrivateAttributions())) { - ValuePtr attribution = en.value(); + Value attribution = en.value(); auto type = attribution->getType().cast<MemRefType>(); assert(type && type.hasStaticShape() && "unexpected type in attribution"); @@ -635,10 +634,10 @@ struct GPUFuncOpLowering : LLVMOpLowering { auto ptrType = lowering.convertType(type.getElementType()) .cast<LLVM::LLVMType>() .getPointerTo(); - ValuePtr numElements = rewriter.create<LLVM::ConstantOp>( + Value numElements = rewriter.create<LLVM::ConstantOp>( gpuFuncOp.getLoc(), int64Ty, rewriter.getI64IntegerAttr(type.getNumElements())); - ValuePtr allocated = rewriter.create<LLVM::AllocaOp>( + Value allocated = rewriter.create<LLVM::AllocaOp>( gpuFuncOp.getLoc(), ptrType, numElements, /*alignment=*/0); auto descr = MemRefDescriptor::fromStaticShape(rewriter, loc, lowering, type, allocated); @@ -666,8 +665,8 @@ struct GPUFuncOpLowering : LLVMOpLowering { !en.value().isa<UnrankedMemRefType>()) continue; - BlockArgumentPtr arg = block.getArgument(en.index()); - ValuePtr loaded = rewriter.create<LLVM::LoadOp>(loc, arg); + BlockArgument arg = block.getArgument(en.index()); + Value loaded = rewriter.create<LLVM::LoadOp>(loc, arg); rewriter.replaceUsesOfBlockArgument(arg, loaded); } } @@ -684,7 +683,7 @@ struct GPUReturnOpLowering : public LLVMOpLowering { typeConverter) {} PatternMatchResult - matchAndRewrite(Operation *op, ArrayRef<ValuePtr> operands, + matchAndRewrite(Operation *op, ArrayRef<Value> operands, ConversionPatternRewriter &rewriter) const override { rewriter.replaceOpWithNewOp<LLVM::ReturnOp>(op, operands, ArrayRef<Block *>()); diff --git a/mlir/lib/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.cpp b/mlir/lib/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.cpp index 95c46853b1f..509457d076a 100644 --- a/mlir/lib/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.cpp +++ b/mlir/lib/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.cpp @@ -27,7 +27,7 @@ public: using SPIRVOpLowering<loop::ForOp>::SPIRVOpLowering; PatternMatchResult - matchAndRewrite(loop::ForOp forOp, ArrayRef<ValuePtr> operands, + matchAndRewrite(loop::ForOp forOp, ArrayRef<Value> operands, ConversionPatternRewriter &rewriter) const override; }; @@ -39,7 +39,7 @@ public: using SPIRVOpLowering<SourceOp>::SPIRVOpLowering; PatternMatchResult - matchAndRewrite(SourceOp op, ArrayRef<ValuePtr> operands, + matchAndRewrite(SourceOp op, ArrayRef<Value> operands, ConversionPatternRewriter &rewriter) const override; }; @@ -56,7 +56,7 @@ public: } PatternMatchResult - matchAndRewrite(gpu::GPUFuncOp funcOp, ArrayRef<ValuePtr> operands, + matchAndRewrite(gpu::GPUFuncOp funcOp, ArrayRef<Value> operands, ConversionPatternRewriter &rewriter) const override; private: @@ -70,7 +70,7 @@ public: using SPIRVOpLowering<ModuleOp>::SPIRVOpLowering; PatternMatchResult - matchAndRewrite(ModuleOp moduleOp, ArrayRef<ValuePtr> operands, + matchAndRewrite(ModuleOp moduleOp, ArrayRef<Value> operands, ConversionPatternRewriter &rewriter) const override; }; @@ -83,7 +83,7 @@ public: using SPIRVOpLowering<ModuleTerminatorOp>::SPIRVOpLowering; PatternMatchResult - matchAndRewrite(ModuleTerminatorOp terminatorOp, ArrayRef<ValuePtr> operands, + matchAndRewrite(ModuleTerminatorOp terminatorOp, ArrayRef<Value> operands, ConversionPatternRewriter &rewriter) const override; }; @@ -94,7 +94,7 @@ public: using SPIRVOpLowering<gpu::ReturnOp>::SPIRVOpLowering; PatternMatchResult - matchAndRewrite(gpu::ReturnOp returnOp, ArrayRef<ValuePtr> operands, + matchAndRewrite(gpu::ReturnOp returnOp, ArrayRef<Value> operands, ConversionPatternRewriter &rewriter) const override; }; @@ -105,7 +105,7 @@ public: //===----------------------------------------------------------------------===// PatternMatchResult -ForOpConversion::matchAndRewrite(loop::ForOp forOp, ArrayRef<ValuePtr> operands, +ForOpConversion::matchAndRewrite(loop::ForOp forOp, ArrayRef<Value> operands, ConversionPatternRewriter &rewriter) const { // loop::ForOp can be lowered to the structured control flow represented by // spirv::LoopOp by making the continue block of the spirv::LoopOp the loop @@ -126,7 +126,7 @@ ForOpConversion::matchAndRewrite(loop::ForOp forOp, ArrayRef<ValuePtr> operands, loopOp.body().getBlocks().insert(std::next(loopOp.body().begin(), 1), header); // Create the new induction variable to use. - BlockArgumentPtr newIndVar = + BlockArgument newIndVar = header->addArgument(forOperands.lowerBound()->getType()); Block *body = forOp.getBody(); @@ -157,7 +157,7 @@ ForOpConversion::matchAndRewrite(loop::ForOp forOp, ArrayRef<ValuePtr> operands, auto cmpOp = rewriter.create<spirv::SLessThanOp>( loc, rewriter.getI1Type(), newIndVar, forOperands.upperBound()); rewriter.create<spirv::BranchConditionalOp>( - loc, cmpOp, body, ArrayRef<ValuePtr>(), mergeBlock, ArrayRef<ValuePtr>()); + loc, cmpOp, body, ArrayRef<Value>(), mergeBlock, ArrayRef<Value>()); // Generate instructions to increment the step of the induction variable and // branch to the header. @@ -165,7 +165,7 @@ ForOpConversion::matchAndRewrite(loop::ForOp forOp, ArrayRef<ValuePtr> operands, rewriter.setInsertionPointToEnd(continueBlock); // Add the step to the induction variable and branch to the header. - ValuePtr updatedIndVar = rewriter.create<spirv::IAddOp>( + Value updatedIndVar = rewriter.create<spirv::IAddOp>( loc, newIndVar->getType(), newIndVar, forOperands.step()); rewriter.create<spirv::BranchOp>(loc, header, updatedIndVar); @@ -179,7 +179,7 @@ ForOpConversion::matchAndRewrite(loop::ForOp forOp, ArrayRef<ValuePtr> operands, template <typename SourceOp, spirv::BuiltIn builtin> PatternMatchResult LaunchConfigConversion<SourceOp, builtin>::matchAndRewrite( - SourceOp op, ArrayRef<ValuePtr> operands, + SourceOp op, ArrayRef<Value> operands, ConversionPatternRewriter &rewriter) const { auto dimAttr = op.getOperation()->template getAttrOfType<StringAttr>("dimension"); @@ -258,7 +258,7 @@ lowerAsEntryFunction(gpu::GPUFuncOp funcOp, SPIRVTypeConverter &typeConverter, PatternMatchResult KernelFnConversion::matchAndRewrite(gpu::GPUFuncOp funcOp, - ArrayRef<ValuePtr> operands, + ArrayRef<Value> operands, ConversionPatternRewriter &rewriter) const { if (!gpu::GPUDialect::isKernel(funcOp)) { return matchFailure(); @@ -288,7 +288,7 @@ KernelFnConversion::matchAndRewrite(gpu::GPUFuncOp funcOp, //===----------------------------------------------------------------------===// PatternMatchResult KernelModuleConversion::matchAndRewrite( - ModuleOp moduleOp, ArrayRef<ValuePtr> operands, + ModuleOp moduleOp, ArrayRef<Value> operands, ConversionPatternRewriter &rewriter) const { if (!moduleOp.getAttrOfType<UnitAttr>( gpu::GPUDialect::getKernelModuleAttrName())) { @@ -318,7 +318,7 @@ PatternMatchResult KernelModuleConversion::matchAndRewrite( //===----------------------------------------------------------------------===// PatternMatchResult KernelModuleTerminatorConversion::matchAndRewrite( - ModuleTerminatorOp terminatorOp, ArrayRef<ValuePtr> operands, + ModuleTerminatorOp terminatorOp, ArrayRef<Value> operands, ConversionPatternRewriter &rewriter) const { rewriter.replaceOpWithNewOp<spirv::ModuleEndOp>(terminatorOp); return matchSuccess(); @@ -329,7 +329,7 @@ PatternMatchResult KernelModuleTerminatorConversion::matchAndRewrite( //===----------------------------------------------------------------------===// PatternMatchResult GPUReturnOpConversion::matchAndRewrite( - gpu::ReturnOp returnOp, ArrayRef<ValuePtr> operands, + gpu::ReturnOp returnOp, ArrayRef<Value> operands, ConversionPatternRewriter &rewriter) const { if (!operands.empty()) return matchFailure(); diff --git a/mlir/lib/Conversion/LinalgToLLVM/LinalgToLLVM.cpp b/mlir/lib/Conversion/LinalgToLLVM/LinalgToLLVM.cpp index 1b70df6f8bd..2a034fd15c5 100644 --- a/mlir/lib/Conversion/LinalgToLLVM/LinalgToLLVM.cpp +++ b/mlir/lib/Conversion/LinalgToLLVM/LinalgToLLVM.cpp @@ -111,23 +111,21 @@ public: BaseViewConversionHelper(Type type) : d(MemRefDescriptor::undef(rewriter(), loc(), type)) {} - BaseViewConversionHelper(ValuePtr v) : d(v) {} + BaseViewConversionHelper(Value v) : d(v) {} /// Wrappers around MemRefDescriptor that use EDSC builder and location. - ValuePtr allocatedPtr() { return d.allocatedPtr(rewriter(), loc()); } - void setAllocatedPtr(ValuePtr v) { d.setAllocatedPtr(rewriter(), loc(), v); } - ValuePtr alignedPtr() { return d.alignedPtr(rewriter(), loc()); } - void setAlignedPtr(ValuePtr v) { d.setAlignedPtr(rewriter(), loc(), v); } - ValuePtr offset() { return d.offset(rewriter(), loc()); } - void setOffset(ValuePtr v) { d.setOffset(rewriter(), loc(), v); } - ValuePtr size(unsigned i) { return d.size(rewriter(), loc(), i); } - void setSize(unsigned i, ValuePtr v) { d.setSize(rewriter(), loc(), i, v); } - ValuePtr stride(unsigned i) { return d.stride(rewriter(), loc(), i); } - void setStride(unsigned i, ValuePtr v) { - d.setStride(rewriter(), loc(), i, v); - } - - operator ValuePtr() { return d; } + Value allocatedPtr() { return d.allocatedPtr(rewriter(), loc()); } + void setAllocatedPtr(Value v) { d.setAllocatedPtr(rewriter(), loc(), v); } + Value alignedPtr() { return d.alignedPtr(rewriter(), loc()); } + void setAlignedPtr(Value v) { d.setAlignedPtr(rewriter(), loc(), v); } + Value offset() { return d.offset(rewriter(), loc()); } + void setOffset(Value v) { d.setOffset(rewriter(), loc(), v); } + Value size(unsigned i) { return d.size(rewriter(), loc(), i); } + void setSize(unsigned i, Value v) { d.setSize(rewriter(), loc(), i, v); } + Value stride(unsigned i) { return d.stride(rewriter(), loc(), i); } + void setStride(unsigned i, Value v) { d.setStride(rewriter(), loc(), i, v); } + + operator Value() { return d; } private: OpBuilder &rewriter() { return ScopedContext::getBuilder(); } @@ -144,7 +142,7 @@ public: : LLVMOpLowering(RangeOp::getOperationName(), context, lowering_) {} PatternMatchResult - matchAndRewrite(Operation *op, ArrayRef<ValuePtr> operands, + matchAndRewrite(Operation *op, ArrayRef<Value> operands, ConversionPatternRewriter &rewriter) const override { auto rangeOp = cast<RangeOp>(op); auto rangeDescriptorTy = @@ -154,7 +152,7 @@ public: // Fill in an aggregate value of the descriptor. RangeOpOperandAdaptor adaptor(operands); - ValuePtr desc = llvm_undef(rangeDescriptorTy); + Value desc = llvm_undef(rangeDescriptorTy); desc = insertvalue(desc, adaptor.min(), rewriter.getI64ArrayAttr(0)); desc = insertvalue(desc, adaptor.max(), rewriter.getI64ArrayAttr(1)); desc = insertvalue(desc, adaptor.step(), rewriter.getI64ArrayAttr(2)); @@ -177,7 +175,7 @@ public: : LLVMOpLowering(SliceOp::getOperationName(), context, lowering_) {} PatternMatchResult - matchAndRewrite(Operation *op, ArrayRef<ValuePtr> operands, + matchAndRewrite(Operation *op, ArrayRef<Value> operands, ConversionPatternRewriter &rewriter) const override { edsc::ScopedContext context(rewriter, op->getLoc()); SliceOpOperandAdaptor adaptor(operands); @@ -191,7 +189,7 @@ public: BaseViewConversionHelper desc(lowering.convertType(sliceOp.getViewType())); // TODO(ntv): extract sizes and emit asserts. - SmallVector<ValuePtr, 4> strides(memRefType.getRank()); + SmallVector<Value, 4> strides(memRefType.getRank()); for (int i = 0, e = memRefType.getRank(); i < e; ++i) strides[i] = baseDesc.stride(i); @@ -200,10 +198,10 @@ public: }; // Compute base offset. - ValuePtr baseOffset = baseDesc.offset(); + Value baseOffset = baseDesc.offset(); for (int i = 0, e = memRefType.getRank(); i < e; ++i) { - ValuePtr indexing = adaptor.indexings()[i]; - ValuePtr min = indexing; + Value indexing = adaptor.indexings()[i]; + Value min = indexing; if (sliceOp.indexing(i)->getType().isa<RangeType>()) min = extractvalue(int64Ty, indexing, pos(0)); baseOffset = add(baseOffset, mul(min, strides[i])); @@ -220,29 +218,29 @@ public: if (sliceOp.getViewType().getRank() == 0) return rewriter.replaceOp(op, {desc}), matchSuccess(); - ValuePtr zero = + Value zero = constant(int64Ty, rewriter.getIntegerAttr(rewriter.getIndexType(), 0)); // Compute and insert view sizes (max - min along the range) and strides. // Skip the non-range operands as they will be projected away from the view. int numNewDims = 0; for (auto en : llvm::enumerate(sliceOp.indexings())) { - ValuePtr indexing = en.value(); + Value indexing = en.value(); if (indexing->getType().isa<RangeType>()) { int rank = en.index(); - ValuePtr rangeDescriptor = adaptor.indexings()[rank]; - ValuePtr min = extractvalue(int64Ty, rangeDescriptor, pos(0)); - ValuePtr max = extractvalue(int64Ty, rangeDescriptor, pos(1)); - ValuePtr step = extractvalue(int64Ty, rangeDescriptor, pos(2)); - ValuePtr baseSize = baseDesc.size(rank); + Value rangeDescriptor = adaptor.indexings()[rank]; + Value min = extractvalue(int64Ty, rangeDescriptor, pos(0)); + Value max = extractvalue(int64Ty, rangeDescriptor, pos(1)); + Value step = extractvalue(int64Ty, rangeDescriptor, pos(2)); + Value baseSize = baseDesc.size(rank); // Bound upper by base view upper bound. max = llvm_select(llvm_icmp(ICmpPredicate::slt, max, baseSize), max, baseSize); - ValuePtr size = sub(max, min); + Value size = sub(max, min); // Bound lower by zero. size = llvm_select(llvm_icmp(ICmpPredicate::slt, size, zero), zero, size); - ValuePtr stride = mul(strides[rank], step); + Value stride = mul(strides[rank], step); desc.setSize(numNewDims, size); desc.setStride(numNewDims, stride); ++numNewDims; @@ -268,7 +266,7 @@ public: : LLVMOpLowering(TransposeOp::getOperationName(), context, lowering_) {} PatternMatchResult - matchAndRewrite(Operation *op, ArrayRef<ValuePtr> operands, + matchAndRewrite(Operation *op, ArrayRef<Value> operands, ConversionPatternRewriter &rewriter) const override { // Initialize the common boilerplate and alloca at the top of the FuncOp. edsc::ScopedContext context(rewriter, op->getLoc()); @@ -311,7 +309,7 @@ public: : LLVMOpLowering(YieldOp::getOperationName(), context, lowering_) {} PatternMatchResult - matchAndRewrite(Operation *op, ArrayRef<ValuePtr> operands, + matchAndRewrite(Operation *op, ArrayRef<Value> operands, ConversionPatternRewriter &rewriter) const override { rewriter.replaceOpWithNewOp<LLVM::ReturnOp>(op, operands); return matchSuccess(); @@ -446,7 +444,7 @@ public: op.getLoc(), rewriter.getIntegerAttr(rewriter.getIndexType(), 0)); auto indexedGenericOp = cast<IndexedGenericOp>(op); auto numLoops = indexedGenericOp.getNumLoops(); - SmallVector<ValuePtr, 4> operands; + SmallVector<Value, 4> operands; operands.reserve(numLoops + op.getNumOperands()); for (unsigned i = 0; i < numLoops; ++i) { operands.push_back(zero); @@ -470,7 +468,7 @@ public: PatternMatchResult matchAndRewrite(CopyOp op, PatternRewriter &rewriter) const override { - ValuePtr in = op.input(), out = op.output(); + Value in = op.input(), out = op.output(); // If either inputPerm or outputPerm are non-identities, insert transposes. auto inputPerm = op.inputPermutation(); diff --git a/mlir/lib/Conversion/LoopToStandard/ConvertLoopToStandard.cpp b/mlir/lib/Conversion/LoopToStandard/ConvertLoopToStandard.cpp index 59dac73de9c..b257e9b482b 100644 --- a/mlir/lib/Conversion/LoopToStandard/ConvertLoopToStandard.cpp +++ b/mlir/lib/Conversion/LoopToStandard/ConvertLoopToStandard.cpp @@ -187,8 +187,8 @@ ForLowering::matchAndRewrite(ForOp forOp, PatternRewriter &rewriter) const { // Compute loop bounds before branching to the condition. rewriter.setInsertionPointToEnd(initBlock); - ValuePtr lowerBound = forOp.lowerBound(); - ValuePtr upperBound = forOp.upperBound(); + Value lowerBound = forOp.lowerBound(); + Value upperBound = forOp.upperBound(); if (!lowerBound || !upperBound) return matchFailure(); rewriter.create<BranchOp>(loc, conditionBlock, lowerBound); @@ -199,8 +199,7 @@ ForLowering::matchAndRewrite(ForOp forOp, PatternRewriter &rewriter) const { rewriter.create<CmpIOp>(loc, CmpIPredicate::slt, iv, upperBound); rewriter.create<CondBranchOp>(loc, comparison, firstBodyBlock, - ArrayRef<ValuePtr>(), endBlock, - ArrayRef<ValuePtr>()); + ArrayRef<Value>(), endBlock, ArrayRef<Value>()); // Ok, we're done! rewriter.eraseOp(forOp); return matchSuccess(); @@ -239,8 +238,8 @@ IfLowering::matchAndRewrite(IfOp ifOp, PatternRewriter &rewriter) const { rewriter.setInsertionPointToEnd(condBlock); rewriter.create<CondBranchOp>(loc, ifOp.condition(), thenBlock, - /*trueArgs=*/ArrayRef<ValuePtr>(), elseBlock, - /*falseArgs=*/ArrayRef<ValuePtr>()); + /*trueArgs=*/ArrayRef<Value>(), elseBlock, + /*falseArgs=*/ArrayRef<Value>()); // Ok, we're done! rewriter.eraseOp(ifOp); diff --git a/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp b/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp index 24bb8ffc462..e500d10983c 100644 --- a/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp +++ b/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp @@ -34,7 +34,7 @@ using namespace mlir::loop; using llvm::seq; // Extract an indexed value from KernelDim3. -static ValuePtr getDim3Value(const gpu::KernelDim3 &dim3, unsigned pos) { +static Value getDim3Value(const gpu::KernelDim3 &dim3, unsigned pos) { switch (pos) { case 0: return dim3.x; @@ -52,8 +52,8 @@ static ValuePtr getDim3Value(const gpu::KernelDim3 &dim3, unsigned pos) { static Operation::operand_range getLowerBoundOperands(AffineForOp forOp) { return forOp.getLowerBoundOperands(); } -static SmallVector<ValuePtr, 1> getLowerBoundOperands(ForOp forOp) { - SmallVector<ValuePtr, 1> bounds(1, forOp.lowerBound()); +static SmallVector<Value, 1> getLowerBoundOperands(ForOp forOp) { + SmallVector<Value, 1> bounds(1, forOp.lowerBound()); return bounds; } @@ -61,35 +61,33 @@ static SmallVector<ValuePtr, 1> getLowerBoundOperands(ForOp forOp) { static Operation::operand_range getUpperBoundOperands(AffineForOp forOp) { return forOp.getUpperBoundOperands(); } -static SmallVector<ValuePtr, 1> getUpperBoundOperands(ForOp forOp) { - SmallVector<ValuePtr, 1> bounds(1, forOp.upperBound()); +static SmallVector<Value, 1> getUpperBoundOperands(ForOp forOp) { + SmallVector<Value, 1> bounds(1, forOp.upperBound()); return bounds; } // Get a Value that corresponds to the loop step. If the step is an attribute, // materialize a corresponding constant using builder. -static ValuePtr getOrCreateStep(AffineForOp forOp, OpBuilder &builder) { +static Value getOrCreateStep(AffineForOp forOp, OpBuilder &builder) { return builder.create<ConstantIndexOp>(forOp.getLoc(), forOp.getStep()); } -static ValuePtr getOrCreateStep(ForOp forOp, OpBuilder &) { - return forOp.step(); -} +static Value getOrCreateStep(ForOp forOp, OpBuilder &) { return forOp.step(); } // Get a Value for the loop lower bound. If the value requires computation, // materialize the instructions using builder. -static ValuePtr getOrEmitLowerBound(AffineForOp forOp, OpBuilder &builder) { +static Value getOrEmitLowerBound(AffineForOp forOp, OpBuilder &builder) { return lowerAffineLowerBound(forOp, builder); } -static ValuePtr getOrEmitLowerBound(ForOp forOp, OpBuilder &) { +static Value getOrEmitLowerBound(ForOp forOp, OpBuilder &) { return forOp.lowerBound(); } // Get a Value for the loop upper bound. If the value requires computation, // materialize the instructions using builder. -static ValuePtr getOrEmitUpperBound(AffineForOp forOp, OpBuilder &builder) { +static Value getOrEmitUpperBound(AffineForOp forOp, OpBuilder &builder) { return lowerAffineUpperBound(forOp, builder); } -static ValuePtr getOrEmitUpperBound(ForOp forOp, OpBuilder &) { +static Value getOrEmitUpperBound(ForOp forOp, OpBuilder &) { return forOp.upperBound(); } @@ -205,18 +203,18 @@ struct LoopToGpuConverter { unsigned numThreadDims); // Ranges of the loops mapped to blocks or threads. - SmallVector<ValuePtr, 6> dims; + SmallVector<Value, 6> dims; // Lower bounds of the loops mapped to blocks or threads. - SmallVector<ValuePtr, 6> lbs; + SmallVector<Value, 6> lbs; // Induction variables of the loops mapped to blocks or threads. - SmallVector<ValuePtr, 6> ivs; + SmallVector<Value, 6> ivs; // Steps of the loops mapped to blocks or threads. - SmallVector<ValuePtr, 6> steps; + SmallVector<Value, 6> steps; }; } // namespace // Return true if the value is obviously a constant "one". -static bool isConstantOne(ValuePtr value) { +static bool isConstantOne(Value value) { if (auto def = dyn_cast_or_null<ConstantIndexOp>(value->getDefiningOp())) return def.getValue() == 1; return false; @@ -237,15 +235,15 @@ Optional<OpTy> LoopToGpuConverter::collectBounds(OpTy forOp, steps.reserve(numLoops); OpTy currentLoop = forOp; for (unsigned i = 0; i < numLoops; ++i) { - ValuePtr lowerBound = getOrEmitLowerBound(currentLoop, builder); - ValuePtr upperBound = getOrEmitUpperBound(currentLoop, builder); + Value lowerBound = getOrEmitLowerBound(currentLoop, builder); + Value upperBound = getOrEmitUpperBound(currentLoop, builder); if (!lowerBound || !upperBound) { return llvm::None; } - ValuePtr range = + Value range = builder.create<SubIOp>(currentLoop.getLoc(), upperBound, lowerBound); - ValuePtr step = getOrCreateStep(currentLoop, builder); + Value step = getOrCreateStep(currentLoop, builder); if (!isConstantOne(step)) range = builder.create<SignedDivIOp>(currentLoop.getLoc(), range, step); dims.push_back(range); @@ -267,8 +265,8 @@ Optional<OpTy> LoopToGpuConverter::collectBounds(OpTy forOp, /// `nids`. The innermost loop is mapped to the x-dimension, followed by the /// next innermost loop to y-dimension, followed by z-dimension. template <typename OpTy> -OpTy createGPULaunchLoops(OpTy rootForOp, ArrayRef<ValuePtr> ids, - ArrayRef<ValuePtr> nids) { +OpTy createGPULaunchLoops(OpTy rootForOp, ArrayRef<Value> ids, + ArrayRef<Value> nids) { auto nDims = ids.size(); assert(nDims == nids.size()); for (auto dim : llvm::seq<unsigned>(0, nDims)) { @@ -288,11 +286,11 @@ OpTy createGPULaunchLoops(OpTy rootForOp, ArrayRef<ValuePtr> ids, /// each workgroup/workitem and number of workgroup/workitems along a dimension /// of the launch into a container. void packIdAndNumId(gpu::KernelDim3 kernelIds, gpu::KernelDim3 kernelNids, - unsigned nDims, SmallVectorImpl<ValuePtr> &ids, - SmallVectorImpl<ValuePtr> &nids) { + unsigned nDims, SmallVectorImpl<Value> &ids, + SmallVectorImpl<Value> &nids) { assert(nDims <= 3 && "invalid number of launch dimensions"); - SmallVector<ValuePtr, 3> allIds = {kernelIds.z, kernelIds.y, kernelIds.x}; - SmallVector<ValuePtr, 3> allNids = {kernelNids.z, kernelNids.y, kernelNids.x}; + SmallVector<Value, 3> allIds = {kernelIds.z, kernelIds.y, kernelIds.x}; + SmallVector<Value, 3> allNids = {kernelNids.z, kernelNids.y, kernelNids.x}; ids.clear(); ids.append(std::next(allIds.begin(), allIds.size() - nDims), allIds.end()); nids.clear(); @@ -310,7 +308,7 @@ LogicalResult createLaunchBody(OpBuilder &builder, OpTy rootForOp, auto returnOp = builder.create<gpu::ReturnOp>(launchOp.getLoc()); rootForOp.getOperation()->moveBefore(returnOp); - SmallVector<ValuePtr, 3> workgroupID, numWorkGroups; + SmallVector<Value, 3> workgroupID, numWorkGroups; packIdAndNumId(launchOp.getBlockIds(), launchOp.getGridSize(), numBlockDims, workgroupID, numWorkGroups); @@ -326,7 +324,7 @@ LogicalResult createLaunchBody(OpBuilder &builder, OpTy rootForOp, } } - SmallVector<ValuePtr, 3> workItemID, workGroupSize; + SmallVector<Value, 3> workItemID, workGroupSize; packIdAndNumId(launchOp.getThreadIds(), launchOp.getBlockSize(), numThreadDims, workItemID, workGroupSize); for (auto &loopOp : threadRootForOps) { @@ -339,18 +337,17 @@ LogicalResult createLaunchBody(OpBuilder &builder, OpTy rootForOp, // Convert the computation rooted at the `rootForOp`, into a GPU kernel with the // given workgroup size and number of workgroups. template <typename OpTy> -LogicalResult createLaunchFromOp(OpTy rootForOp, - ArrayRef<ValuePtr> numWorkGroups, - ArrayRef<ValuePtr> workGroupSizes) { +LogicalResult createLaunchFromOp(OpTy rootForOp, ArrayRef<Value> numWorkGroups, + ArrayRef<Value> workGroupSizes) { OpBuilder builder(rootForOp.getOperation()); if (numWorkGroups.size() > 3) { return rootForOp.emitError("invalid ") << numWorkGroups.size() << "-D workgroup specification"; } auto loc = rootForOp.getLoc(); - ValuePtr one = builder.create<ConstantOp>( + Value one = builder.create<ConstantOp>( loc, builder.getIntegerAttr(builder.getIndexType(), 1)); - SmallVector<ValuePtr, 3> numWorkGroups3D(3, one), workGroupSize3D(3, one); + SmallVector<Value, 3> numWorkGroups3D(3, one), workGroupSize3D(3, one); for (auto numWorkGroup : enumerate(numWorkGroups)) { numWorkGroups3D[numWorkGroup.index()] = numWorkGroup.value(); } @@ -360,7 +357,7 @@ LogicalResult createLaunchFromOp(OpTy rootForOp, // Get the values used within the region of the rootForOp but defined above // it. - llvm::SetVector<ValuePtr> valuesToForwardSet; + llvm::SetVector<Value> valuesToForwardSet; getUsedValuesDefinedAbove(rootForOp.region(), rootForOp.region(), valuesToForwardSet); // Also add the values used for the lb, ub, and step of the rootForOp. @@ -380,8 +377,8 @@ LogicalResult createLaunchFromOp(OpTy rootForOp, // defined outside. They all are replaced with kernel arguments. for (const auto &pair : llvm::zip_first(valuesToForward, launchOp.getKernelArguments())) { - ValuePtr from = std::get<0>(pair); - ValuePtr to = std::get<1>(pair); + Value from = std::get<0>(pair); + Value to = std::get<1>(pair); replaceAllUsesInRegionWith(from, to, launchOp.body()); } return success(); @@ -401,23 +398,22 @@ void LoopToGpuConverter::createLaunch(OpTy rootForOp, OpTy innermostForOp, OpBuilder builder(rootForOp.getOperation()); // Prepare the grid and block sizes for the launch operation. If there is // no loop mapped to a specific dimension, use constant "1" as its size. - ValuePtr constOne = - (numBlockDims < 3 || numThreadDims < 3) - ? builder.create<ConstantIndexOp>(rootForOp.getLoc(), 1) - : nullptr; - ValuePtr gridSizeX = dims[0]; - ValuePtr gridSizeY = numBlockDims > 1 ? dims[1] : constOne; - ValuePtr gridSizeZ = numBlockDims > 2 ? dims[2] : constOne; - ValuePtr blockSizeX = dims[numBlockDims]; - ValuePtr blockSizeY = numThreadDims > 1 ? dims[numBlockDims + 1] : constOne; - ValuePtr blockSizeZ = numThreadDims > 2 ? dims[numBlockDims + 2] : constOne; + Value constOne = (numBlockDims < 3 || numThreadDims < 3) + ? builder.create<ConstantIndexOp>(rootForOp.getLoc(), 1) + : nullptr; + Value gridSizeX = dims[0]; + Value gridSizeY = numBlockDims > 1 ? dims[1] : constOne; + Value gridSizeZ = numBlockDims > 2 ? dims[2] : constOne; + Value blockSizeX = dims[numBlockDims]; + Value blockSizeY = numThreadDims > 1 ? dims[numBlockDims + 1] : constOne; + Value blockSizeZ = numThreadDims > 2 ? dims[numBlockDims + 2] : constOne; // Create a launch op and move the body region of the innermost loop to the // launch op. Pass the values defined outside the outermost loop and used // inside the innermost loop and loop lower bounds as kernel data arguments. // Still assuming perfect nesting so there are no values other than induction // variables that are defined in one loop and used in deeper loops. - llvm::SetVector<ValuePtr> valuesToForwardSet; + llvm::SetVector<Value> valuesToForwardSet; getUsedValuesDefinedAbove(innermostForOp.region(), rootForOp.region(), valuesToForwardSet); auto valuesToForward = valuesToForwardSet.takeVector(); @@ -451,15 +447,15 @@ void LoopToGpuConverter::createLaunch(OpTy rootForOp, OpTy innermostForOp, originallyForwardedValues); auto stepArgumentIt = std::next(lbArgumentIt, lbs.size()); for (auto en : llvm::enumerate(ivs)) { - ValuePtr id = + Value id = en.index() < numBlockDims ? getDim3Value(launchOp.getBlockIds(), en.index()) : getDim3Value(launchOp.getThreadIds(), en.index() - numBlockDims); - ValuePtr step = steps[en.index()]; + Value step = steps[en.index()]; if (!isConstantOne(step)) id = builder.create<MulIOp>(rootForOp.getLoc(), step, id); - ValuePtr ivReplacement = + Value ivReplacement = builder.create<AddIOp>(rootForOp.getLoc(), *lbArgumentIt, id); en.value()->replaceAllUsesWith(ivReplacement); replaceAllUsesInRegionWith(steps[en.index()], *stepArgumentIt, @@ -473,8 +469,8 @@ void LoopToGpuConverter::createLaunch(OpTy rootForOp, OpTy innermostForOp, // trailing positions, make sure we don't touch those. for (const auto &pair : llvm::zip_first(valuesToForward, launchOp.getKernelArguments())) { - ValuePtr from = std::get<0>(pair); - ValuePtr to = std::get<1>(pair); + Value from = std::get<0>(pair); + Value to = std::get<1>(pair); replaceAllUsesInRegionWith(from, to, launchOp.body()); } @@ -504,8 +500,8 @@ static LogicalResult convertLoopNestToGPULaunch(OpTy forOp, // nested. The workgroup size and num workgroups is provided as input template <typename OpTy> static LogicalResult convertLoopToGPULaunch(OpTy forOp, - ArrayRef<ValuePtr> numWorkGroups, - ArrayRef<ValuePtr> workGroupSize) { + ArrayRef<Value> numWorkGroups, + ArrayRef<Value> workGroupSize) { if (failed(checkLoopOpMappable(forOp, numWorkGroups.size(), workGroupSize.size()))) { return failure(); @@ -526,7 +522,7 @@ LogicalResult mlir::convertLoopNestToGPULaunch(ForOp forOp, } LogicalResult mlir::convertLoopToGPULaunch(loop::ForOp forOp, - ArrayRef<ValuePtr> numWorkGroups, - ArrayRef<ValuePtr> workGroupSizes) { + ArrayRef<Value> numWorkGroups, + ArrayRef<Value> workGroupSizes) { return ::convertLoopToGPULaunch(forOp, numWorkGroups, workGroupSizes); } diff --git a/mlir/lib/Conversion/LoopsToGPU/LoopsToGPUPass.cpp b/mlir/lib/Conversion/LoopsToGPU/LoopsToGPUPass.cpp index 4dfd26a4392..c3bbf274818 100644 --- a/mlir/lib/Conversion/LoopsToGPU/LoopsToGPUPass.cpp +++ b/mlir/lib/Conversion/LoopsToGPU/LoopsToGPUPass.cpp @@ -89,7 +89,7 @@ struct ImperfectlyNestedForLoopMapper // pass is only used for testing. FuncOp funcOp = getFunction(); OpBuilder builder(funcOp.getOperation()->getRegion(0)); - SmallVector<ValuePtr, 3> numWorkGroupsVal, workGroupSizeVal; + SmallVector<Value, 3> numWorkGroupsVal, workGroupSizeVal; for (auto val : numWorkGroups) { auto constOp = builder.create<ConstantOp>( funcOp.getLoc(), builder.getIntegerAttr(builder.getIndexType(), val)); diff --git a/mlir/lib/Conversion/StandardToLLVM/ConvertStandardToLLVM.cpp b/mlir/lib/Conversion/StandardToLLVM/ConvertStandardToLLVM.cpp index 160678efe9f..0c96cc5e9c7 100644 --- a/mlir/lib/Conversion/StandardToLLVM/ConvertStandardToLLVM.cpp +++ b/mlir/lib/Conversion/StandardToLLVM/ConvertStandardToLLVM.cpp @@ -247,20 +247,20 @@ LLVMOpLowering::LLVMOpLowering(StringRef rootOpName, MLIRContext *context, /*============================================================================*/ /* StructBuilder implementation */ /*============================================================================*/ -StructBuilder::StructBuilder(ValuePtr v) : value(v) { +StructBuilder::StructBuilder(Value v) : value(v) { assert(value != nullptr && "value cannot be null"); structType = value->getType().cast<LLVM::LLVMType>(); } -ValuePtr StructBuilder::extractPtr(OpBuilder &builder, Location loc, - unsigned pos) { +Value StructBuilder::extractPtr(OpBuilder &builder, Location loc, + unsigned pos) { Type type = structType.cast<LLVM::LLVMType>().getStructElementType(pos); return builder.create<LLVM::ExtractValueOp>(loc, type, value, builder.getI64ArrayAttr(pos)); } void StructBuilder::setPtr(OpBuilder &builder, Location loc, unsigned pos, - ValuePtr ptr) { + Value ptr) { value = builder.create<LLVM::InsertValueOp>(loc, structType, value, ptr, builder.getI64ArrayAttr(pos)); } @@ -269,7 +269,7 @@ void StructBuilder::setPtr(OpBuilder &builder, Location loc, unsigned pos, /*============================================================================*/ /// Construct a helper for the given descriptor value. -MemRefDescriptor::MemRefDescriptor(ValuePtr descriptor) +MemRefDescriptor::MemRefDescriptor(Value descriptor) : StructBuilder(descriptor) { assert(value != nullptr && "value cannot be null"); indexType = value->getType().cast<LLVM::LLVMType>().getStructElementType( @@ -280,7 +280,7 @@ MemRefDescriptor::MemRefDescriptor(ValuePtr descriptor) MemRefDescriptor MemRefDescriptor::undef(OpBuilder &builder, Location loc, Type descriptorType) { - ValuePtr descriptor = + Value descriptor = builder.create<LLVM::UndefOp>(loc, descriptorType.cast<LLVM::LLVMType>()); return MemRefDescriptor(descriptor); } @@ -291,7 +291,7 @@ MemRefDescriptor MemRefDescriptor::undef(OpBuilder &builder, Location loc, MemRefDescriptor MemRefDescriptor::fromStaticShape(OpBuilder &builder, Location loc, LLVMTypeConverter &typeConverter, - MemRefType type, ValuePtr memory) { + MemRefType type, Value memory) { assert(type.hasStaticShape() && "unexpected dynamic shape"); assert(type.getAffineMaps().empty() && "unexpected layout map"); @@ -316,37 +316,37 @@ MemRefDescriptor::fromStaticShape(OpBuilder &builder, Location loc, } /// Builds IR extracting the allocated pointer from the descriptor. -ValuePtr MemRefDescriptor::allocatedPtr(OpBuilder &builder, Location loc) { +Value MemRefDescriptor::allocatedPtr(OpBuilder &builder, Location loc) { return extractPtr(builder, loc, kAllocatedPtrPosInMemRefDescriptor); } /// Builds IR inserting the allocated pointer into the descriptor. void MemRefDescriptor::setAllocatedPtr(OpBuilder &builder, Location loc, - ValuePtr ptr) { + Value ptr) { setPtr(builder, loc, kAllocatedPtrPosInMemRefDescriptor, ptr); } /// Builds IR extracting the aligned pointer from the descriptor. -ValuePtr MemRefDescriptor::alignedPtr(OpBuilder &builder, Location loc) { +Value MemRefDescriptor::alignedPtr(OpBuilder &builder, Location loc) { return extractPtr(builder, loc, kAlignedPtrPosInMemRefDescriptor); } /// Builds IR inserting the aligned pointer into the descriptor. void MemRefDescriptor::setAlignedPtr(OpBuilder &builder, Location loc, - ValuePtr ptr) { + Value ptr) { setPtr(builder, loc, kAlignedPtrPosInMemRefDescriptor, ptr); } // Creates a constant Op producing a value of `resultType` from an index-typed // integer attribute. -static ValuePtr createIndexAttrConstant(OpBuilder &builder, Location loc, - Type resultType, int64_t value) { +static Value createIndexAttrConstant(OpBuilder &builder, Location loc, + Type resultType, int64_t value) { return builder.create<LLVM::ConstantOp>( loc, resultType, builder.getIntegerAttr(builder.getIndexType(), value)); } /// Builds IR extracting the offset from the descriptor. -ValuePtr MemRefDescriptor::offset(OpBuilder &builder, Location loc) { +Value MemRefDescriptor::offset(OpBuilder &builder, Location loc) { return builder.create<LLVM::ExtractValueOp>( loc, indexType, value, builder.getI64ArrayAttr(kOffsetPosInMemRefDescriptor)); @@ -354,7 +354,7 @@ ValuePtr MemRefDescriptor::offset(OpBuilder &builder, Location loc) { /// Builds IR inserting the offset into the descriptor. void MemRefDescriptor::setOffset(OpBuilder &builder, Location loc, - ValuePtr offset) { + Value offset) { value = builder.create<LLVM::InsertValueOp>( loc, structType, value, offset, builder.getI64ArrayAttr(kOffsetPosInMemRefDescriptor)); @@ -368,8 +368,7 @@ void MemRefDescriptor::setConstantOffset(OpBuilder &builder, Location loc, } /// Builds IR extracting the pos-th size from the descriptor. -ValuePtr MemRefDescriptor::size(OpBuilder &builder, Location loc, - unsigned pos) { +Value MemRefDescriptor::size(OpBuilder &builder, Location loc, unsigned pos) { return builder.create<LLVM::ExtractValueOp>( loc, indexType, value, builder.getI64ArrayAttr({kSizePosInMemRefDescriptor, pos})); @@ -377,7 +376,7 @@ ValuePtr MemRefDescriptor::size(OpBuilder &builder, Location loc, /// Builds IR inserting the pos-th size into the descriptor void MemRefDescriptor::setSize(OpBuilder &builder, Location loc, unsigned pos, - ValuePtr size) { + Value size) { value = builder.create<LLVM::InsertValueOp>( loc, structType, value, size, builder.getI64ArrayAttr({kSizePosInMemRefDescriptor, pos})); @@ -391,8 +390,7 @@ void MemRefDescriptor::setConstantSize(OpBuilder &builder, Location loc, } /// Builds IR extracting the pos-th size from the descriptor. -ValuePtr MemRefDescriptor::stride(OpBuilder &builder, Location loc, - unsigned pos) { +Value MemRefDescriptor::stride(OpBuilder &builder, Location loc, unsigned pos) { return builder.create<LLVM::ExtractValueOp>( loc, indexType, value, builder.getI64ArrayAttr({kStridePosInMemRefDescriptor, pos})); @@ -400,7 +398,7 @@ ValuePtr MemRefDescriptor::stride(OpBuilder &builder, Location loc, /// Builds IR inserting the pos-th stride into the descriptor void MemRefDescriptor::setStride(OpBuilder &builder, Location loc, unsigned pos, - ValuePtr stride) { + Value stride) { value = builder.create<LLVM::InsertValueOp>( loc, structType, value, stride, builder.getI64ArrayAttr({kStridePosInMemRefDescriptor, pos})); @@ -423,30 +421,30 @@ LLVM::LLVMType MemRefDescriptor::getElementType() { /*============================================================================*/ /// Construct a helper for the given descriptor value. -UnrankedMemRefDescriptor::UnrankedMemRefDescriptor(ValuePtr descriptor) +UnrankedMemRefDescriptor::UnrankedMemRefDescriptor(Value descriptor) : StructBuilder(descriptor) {} /// Builds IR creating an `undef` value of the descriptor type. UnrankedMemRefDescriptor UnrankedMemRefDescriptor::undef(OpBuilder &builder, Location loc, Type descriptorType) { - ValuePtr descriptor = + Value descriptor = builder.create<LLVM::UndefOp>(loc, descriptorType.cast<LLVM::LLVMType>()); return UnrankedMemRefDescriptor(descriptor); } -ValuePtr UnrankedMemRefDescriptor::rank(OpBuilder &builder, Location loc) { +Value UnrankedMemRefDescriptor::rank(OpBuilder &builder, Location loc) { return extractPtr(builder, loc, kRankInUnrankedMemRefDescriptor); } void UnrankedMemRefDescriptor::setRank(OpBuilder &builder, Location loc, - ValuePtr v) { + Value v) { setPtr(builder, loc, kRankInUnrankedMemRefDescriptor, v); } -ValuePtr UnrankedMemRefDescriptor::memRefDescPtr(OpBuilder &builder, - Location loc) { +Value UnrankedMemRefDescriptor::memRefDescPtr(OpBuilder &builder, + Location loc) { return extractPtr(builder, loc, kPtrInUnrankedMemRefDescriptor); } void UnrankedMemRefDescriptor::setMemRefDescPtr(OpBuilder &builder, - Location loc, ValuePtr v) { + Location loc, Value v) { setPtr(builder, loc, kPtrInUnrankedMemRefDescriptor, v); } namespace { @@ -487,8 +485,8 @@ public: } // Create an LLVM IR pseudo-operation defining the given index constant. - ValuePtr createIndexConstant(ConversionPatternRewriter &builder, Location loc, - uint64_t value) const { + Value createIndexConstant(ConversionPatternRewriter &builder, Location loc, + uint64_t value) const { return createIndexAttrConstant(builder, loc, getIndexType(), value); } @@ -500,7 +498,7 @@ struct FuncOpConversion : public LLVMLegalizationPattern<FuncOp> { using LLVMLegalizationPattern<FuncOp>::LLVMLegalizationPattern; PatternMatchResult - matchAndRewrite(Operation *op, ArrayRef<ValuePtr> operands, + matchAndRewrite(Operation *op, ArrayRef<Value> operands, ConversionPatternRewriter &rewriter) const override { auto funcOp = cast<FuncOp>(op); FunctionType type = funcOp.getType(); @@ -548,8 +546,8 @@ struct FuncOpConversion : public LLVMLegalizationPattern<FuncOp> { Block *firstBlock = &newFuncOp.getBody().front(); rewriter.setInsertionPoint(firstBlock, firstBlock->begin()); for (unsigned idx : promotedArgIndices) { - BlockArgumentPtr arg = firstBlock->getArgument(idx); - ValuePtr loaded = rewriter.create<LLVM::LoadOp>(funcOp.getLoc(), arg); + BlockArgument arg = firstBlock->getArgument(idx); + Value loaded = rewriter.create<LLVM::LoadOp>(funcOp.getLoc(), arg); rewriter.replaceUsesOfBlockArgument(arg, loaded); } } @@ -648,7 +646,7 @@ struct OneToOneLLVMOpLowering : public LLVMLegalizationPattern<SourceOp> { // Convert the type of the result to an LLVM type, pass operands as is, // preserve attributes. PatternMatchResult - matchAndRewrite(Operation *op, ArrayRef<ValuePtr> operands, + matchAndRewrite(Operation *op, ArrayRef<Value> operands, ConversionPatternRewriter &rewriter) const override { unsigned numResults = op->getNumResults(); @@ -672,7 +670,7 @@ struct OneToOneLLVMOpLowering : public LLVMLegalizationPattern<SourceOp> { // Otherwise, it had been converted to an operation producing a structure. // Extract individual results from the structure and return them as list. - SmallVector<ValuePtr, 4> results; + SmallVector<Value, 4> results; results.reserve(numResults); for (unsigned i = 0; i < numResults; ++i) { auto type = this->lowering.convertType(op->getResult(i)->getType()); @@ -713,7 +711,7 @@ struct NaryOpLLVMOpLowering : public LLVMLegalizationPattern<SourceOp> { // Convert the type of the result to an LLVM type, pass operands as is, // preserve attributes. PatternMatchResult - matchAndRewrite(Operation *op, ArrayRef<ValuePtr> operands, + matchAndRewrite(Operation *op, ArrayRef<Value> operands, ConversionPatternRewriter &rewriter) const override { ValidateOpCount<SourceOp, OpCount>(); static_assert( @@ -724,7 +722,7 @@ struct NaryOpLLVMOpLowering : public LLVMLegalizationPattern<SourceOp> { "expected same operands and result type"); // Cannot convert ops if their operands are not of LLVM type. - for (ValuePtr operand : operands) { + for (Value operand : operands) { if (!operand || !operand->getType().isa<LLVM::LLVMType>()) return this->matchFailure(); } @@ -747,16 +745,16 @@ struct NaryOpLLVMOpLowering : public LLVMLegalizationPattern<SourceOp> { if (!llvmVectorTy || llvmArrayTy != vectorTypeInfo.llvmArrayTy) return this->matchFailure(); - ValuePtr desc = rewriter.create<LLVM::UndefOp>(loc, llvmArrayTy); + Value desc = rewriter.create<LLVM::UndefOp>(loc, llvmArrayTy); nDVectorIterate(vectorTypeInfo, rewriter, [&](ArrayAttr position) { // For this unrolled `position` corresponding to the `linearIndex`^th // element, extract operand vectors - SmallVector<ValuePtr, OpCount> extractedOperands; + SmallVector<Value, OpCount> extractedOperands; for (unsigned i = 0; i < OpCount; ++i) { extractedOperands.push_back(rewriter.create<LLVM::ExtractValueOp>( loc, llvmVectorTy, operands[i], position)); } - ValuePtr newVal = rewriter.create<TargetOp>( + Value newVal = rewriter.create<TargetOp>( loc, llvmVectorTy, extractedOperands, op->getAttrs()); desc = rewriter.create<LLVM::InsertValueOp>(loc, llvmArrayTy, desc, newVal, position); @@ -919,7 +917,7 @@ struct AllocOpLowering : public LLVMLegalizationPattern<AllocOp> { return matchSuccess(); } - void rewrite(Operation *op, ArrayRef<ValuePtr> operands, + void rewrite(Operation *op, ArrayRef<Value> operands, ConversionPatternRewriter &rewriter) const override { auto loc = op->getLoc(); auto allocOp = cast<AllocOp>(op); @@ -928,7 +926,7 @@ struct AllocOpLowering : public LLVMLegalizationPattern<AllocOp> { // Get actual sizes of the memref as values: static sizes are constant // values and dynamic sizes are passed to 'alloc' as operands. In case of // zero-dimensional memref, assume a scalar (size 1). - SmallVector<ValuePtr, 4> sizes; + SmallVector<Value, 4> sizes; sizes.reserve(type.getRank()); unsigned i = 0; for (int64_t s : type.getShape()) @@ -938,10 +936,10 @@ struct AllocOpLowering : public LLVMLegalizationPattern<AllocOp> { sizes.push_back(createIndexConstant(rewriter, loc, 1)); // Compute the total number of memref elements. - ValuePtr cumulativeSize = sizes.front(); + Value cumulativeSize = sizes.front(); for (unsigned i = 1, e = sizes.size(); i < e; ++i) cumulativeSize = rewriter.create<LLVM::MulOp>( - loc, getIndexType(), ArrayRef<ValuePtr>{cumulativeSize, sizes[i]}); + loc, getIndexType(), ArrayRef<Value>{cumulativeSize, sizes[i]}); // Compute the size of an individual element. This emits the MLIR equivalent // of the following sizeof(...) implementation in LLVM IR: @@ -954,17 +952,17 @@ struct AllocOpLowering : public LLVMLegalizationPattern<AllocOp> { auto nullPtr = rewriter.create<LLVM::NullOp>(loc, convertedPtrType); auto one = createIndexConstant(rewriter, loc, 1); auto gep = rewriter.create<LLVM::GEPOp>(loc, convertedPtrType, - ArrayRef<ValuePtr>{nullPtr, one}); + ArrayRef<Value>{nullPtr, one}); auto elementSize = rewriter.create<LLVM::PtrToIntOp>(loc, getIndexType(), gep); cumulativeSize = rewriter.create<LLVM::MulOp>( - loc, getIndexType(), ArrayRef<ValuePtr>{cumulativeSize, elementSize}); + loc, getIndexType(), ArrayRef<Value>{cumulativeSize, elementSize}); // Allocate the underlying buffer and store a pointer to it in the MemRef // descriptor. - ValuePtr allocated = nullptr; + Value allocated = nullptr; int alignment = 0; - ValuePtr alignmentValue = nullptr; + Value alignmentValue = nullptr; if (auto alignAttr = allocOp.alignment()) alignment = alignAttr.getValue().getSExtValue(); @@ -1000,8 +998,8 @@ struct AllocOpLowering : public LLVMLegalizationPattern<AllocOp> { auto structElementType = lowering.convertType(elementType); auto elementPtrType = structElementType.cast<LLVM::LLVMType>().getPointerTo( type.getMemorySpace()); - ValuePtr bitcastAllocated = rewriter.create<LLVM::BitcastOp>( - loc, elementPtrType, ArrayRef<ValuePtr>(allocated)); + Value bitcastAllocated = rewriter.create<LLVM::BitcastOp>( + loc, elementPtrType, ArrayRef<Value>(allocated)); int64_t offset; SmallVector<int64_t, 4> strides; @@ -1023,22 +1021,21 @@ struct AllocOpLowering : public LLVMLegalizationPattern<AllocOp> { memRefDescriptor.setAllocatedPtr(rewriter, loc, bitcastAllocated); // Field 2: Actual aligned pointer to payload. - ValuePtr bitcastAligned = bitcastAllocated; + Value bitcastAligned = bitcastAllocated; if (!useAlloca && alignment != 0) { assert(alignmentValue); // offset = (align - (ptr % align))% align - ValuePtr intVal = rewriter.create<LLVM::PtrToIntOp>( + Value intVal = rewriter.create<LLVM::PtrToIntOp>( loc, this->getIndexType(), allocated); - ValuePtr ptrModAlign = + Value ptrModAlign = rewriter.create<LLVM::URemOp>(loc, intVal, alignmentValue); - ValuePtr subbed = + Value subbed = rewriter.create<LLVM::SubOp>(loc, alignmentValue, ptrModAlign); - ValuePtr offset = - rewriter.create<LLVM::URemOp>(loc, subbed, alignmentValue); - ValuePtr aligned = rewriter.create<LLVM::GEPOp>(loc, allocated->getType(), - allocated, offset); + Value offset = rewriter.create<LLVM::URemOp>(loc, subbed, alignmentValue); + Value aligned = rewriter.create<LLVM::GEPOp>(loc, allocated->getType(), + allocated, offset); bitcastAligned = rewriter.create<LLVM::BitcastOp>( - loc, elementPtrType, ArrayRef<ValuePtr>(aligned)); + loc, elementPtrType, ArrayRef<Value>(aligned)); } memRefDescriptor.setAlignedPtr(rewriter, loc, bitcastAligned); @@ -1053,10 +1050,10 @@ struct AllocOpLowering : public LLVMLegalizationPattern<AllocOp> { // Fields 4 and 5: Sizes and strides of the strided MemRef. // Store all sizes in the descriptor. Only dynamic sizes are passed in as // operands to AllocOp. - ValuePtr runningStride = nullptr; + Value runningStride = nullptr; // Iterate strides in reverse order, compute runningStride and strideValues. auto nStrides = strides.size(); - SmallVector<ValuePtr, 4> strideValues(nStrides, nullptr); + SmallVector<Value, 4> strideValues(nStrides, nullptr); for (auto indexedStride : llvm::enumerate(llvm::reverse(strides))) { int64_t index = nStrides - 1 - indexedStride.index(); if (strides[index] == MemRefType::getDynamicStrideOrOffset()) @@ -1093,7 +1090,7 @@ struct CallOpInterfaceLowering : public LLVMLegalizationPattern<CallOpType> { using Base = LLVMLegalizationPattern<CallOpType>; PatternMatchResult - matchAndRewrite(Operation *op, ArrayRef<ValuePtr> operands, + matchAndRewrite(Operation *op, ArrayRef<Value> operands, ConversionPatternRewriter &rewriter) const override { OperandAdaptor<CallOpType> transformed(operands); auto callOp = cast<CallOpType>(op); @@ -1131,7 +1128,7 @@ struct CallOpInterfaceLowering : public LLVMLegalizationPattern<CallOpType> { // TODO(aminim, ntv, riverriddle, zinenko): this seems like patching around // a particular interaction between MemRefType and CallOp lowering. Find a // way to avoid special casing. - SmallVector<ValuePtr, 4> results; + SmallVector<Value, 4> results; results.reserve(numResults); for (unsigned i = 0; i < numResults; ++i) { auto type = this->lowering.convertType(op->getResult(i)->getType()); @@ -1165,7 +1162,7 @@ struct DeallocOpLowering : public LLVMLegalizationPattern<DeallocOp> { useAlloca(useAlloca) {} PatternMatchResult - matchAndRewrite(Operation *op, ArrayRef<ValuePtr> operands, + matchAndRewrite(Operation *op, ArrayRef<Value> operands, ConversionPatternRewriter &rewriter) const override { if (useAlloca) return rewriter.eraseOp(op), matchSuccess(); @@ -1185,7 +1182,7 @@ struct DeallocOpLowering : public LLVMLegalizationPattern<DeallocOp> { } MemRefDescriptor memref(transformed.memref()); - ValuePtr casted = rewriter.create<LLVM::BitcastOp>( + Value casted = rewriter.create<LLVM::BitcastOp>( op->getLoc(), getVoidPtrType(), memref.allocatedPtr(rewriter, op->getLoc())); rewriter.replaceOpWithNewOp<LLVM::CallOp>( @@ -1201,7 +1198,7 @@ struct TanhOpLowering : public LLVMLegalizationPattern<TanhOp> { using LLVMLegalizationPattern<TanhOp>::LLVMLegalizationPattern; PatternMatchResult - matchAndRewrite(Operation *op, ArrayRef<ValuePtr> operands, + matchAndRewrite(Operation *op, ArrayRef<Value> operands, ConversionPatternRewriter &rewriter) const override { using LLVMFuncOpT = LLVM::LLVMFuncOp; @@ -1275,7 +1272,7 @@ struct MemRefCastOpLowering : public LLVMLegalizationPattern<MemRefCastOp> { : matchFailure(); } - void rewrite(Operation *op, ArrayRef<ValuePtr> operands, + void rewrite(Operation *op, ArrayRef<Value> operands, ConversionPatternRewriter &rewriter) const override { auto memRefCastOp = cast<MemRefCastOp>(op); OperandAdaptor<MemRefCastOp> transformed(operands); @@ -1316,7 +1313,7 @@ struct MemRefCastOpLowering : public LLVMLegalizationPattern<MemRefCastOp> { memRefDesc.setRank(rewriter, loc, rankVal); // d2 = InsertValueOp d1, voidptr, 1 memRefDesc.setMemRefDescPtr(rewriter, loc, voidPtr); - rewriter.replaceOp(op, (ValuePtr)memRefDesc); + rewriter.replaceOp(op, (Value)memRefDesc); } else if (srcType.isa<UnrankedMemRefType>() && dstType.isa<MemRefType>()) { // Casting from unranked type to ranked. @@ -1347,7 +1344,7 @@ struct DimOpLowering : public LLVMLegalizationPattern<DimOp> { using LLVMLegalizationPattern<DimOp>::LLVMLegalizationPattern; PatternMatchResult - matchAndRewrite(Operation *op, ArrayRef<ValuePtr> operands, + matchAndRewrite(Operation *op, ArrayRef<Value> operands, ConversionPatternRewriter &rewriter) const override { auto dimOp = cast<DimOp>(op); OperandAdaptor<DimOp> transformed(operands); @@ -1389,45 +1386,42 @@ struct LoadStoreOpLowering : public LLVMLegalizationPattern<Derived> { // by accumulating the running linearized value. // Note that `indices` and `allocSizes` are passed in the same order as they // appear in load/store operations and memref type declarations. - ValuePtr linearizeSubscripts(ConversionPatternRewriter &builder, Location loc, - ArrayRef<ValuePtr> indices, - ArrayRef<ValuePtr> allocSizes) const { + Value linearizeSubscripts(ConversionPatternRewriter &builder, Location loc, + ArrayRef<Value> indices, + ArrayRef<Value> allocSizes) const { assert(indices.size() == allocSizes.size() && "mismatching number of indices and allocation sizes"); assert(!indices.empty() && "cannot linearize a 0-dimensional access"); - ValuePtr linearized = indices.front(); + Value linearized = indices.front(); for (int i = 1, nSizes = allocSizes.size(); i < nSizes; ++i) { linearized = builder.create<LLVM::MulOp>( loc, this->getIndexType(), - ArrayRef<ValuePtr>{linearized, allocSizes[i]}); + ArrayRef<Value>{linearized, allocSizes[i]}); linearized = builder.create<LLVM::AddOp>( - loc, this->getIndexType(), - ArrayRef<ValuePtr>{linearized, indices[i]}); + loc, this->getIndexType(), ArrayRef<Value>{linearized, indices[i]}); } return linearized; } // This is a strided getElementPtr variant that linearizes subscripts as: // `base_offset + index_0 * stride_0 + ... + index_n * stride_n`. - ValuePtr getStridedElementPtr(Location loc, Type elementTypePtr, - ValuePtr descriptor, ArrayRef<ValuePtr> indices, - ArrayRef<int64_t> strides, int64_t offset, - ConversionPatternRewriter &rewriter) const { + Value getStridedElementPtr(Location loc, Type elementTypePtr, + Value descriptor, ArrayRef<Value> indices, + ArrayRef<int64_t> strides, int64_t offset, + ConversionPatternRewriter &rewriter) const { MemRefDescriptor memRefDescriptor(descriptor); - ValuePtr base = memRefDescriptor.alignedPtr(rewriter, loc); - ValuePtr offsetValue = - offset == MemRefType::getDynamicStrideOrOffset() - ? memRefDescriptor.offset(rewriter, loc) - : this->createIndexConstant(rewriter, loc, offset); + Value base = memRefDescriptor.alignedPtr(rewriter, loc); + Value offsetValue = offset == MemRefType::getDynamicStrideOrOffset() + ? memRefDescriptor.offset(rewriter, loc) + : this->createIndexConstant(rewriter, loc, offset); for (int i = 0, e = indices.size(); i < e; ++i) { - ValuePtr stride = - strides[i] == MemRefType::getDynamicStrideOrOffset() - ? memRefDescriptor.stride(rewriter, loc, i) - : this->createIndexConstant(rewriter, loc, strides[i]); - ValuePtr additionalOffset = + Value stride = strides[i] == MemRefType::getDynamicStrideOrOffset() + ? memRefDescriptor.stride(rewriter, loc, i) + : this->createIndexConstant(rewriter, loc, strides[i]); + Value additionalOffset = rewriter.create<LLVM::MulOp>(loc, indices[i], stride); offsetValue = rewriter.create<LLVM::AddOp>(loc, offsetValue, additionalOffset); @@ -1435,10 +1429,9 @@ struct LoadStoreOpLowering : public LLVMLegalizationPattern<Derived> { return rewriter.create<LLVM::GEPOp>(loc, elementTypePtr, base, offsetValue); } - ValuePtr getDataPtr(Location loc, MemRefType type, ValuePtr memRefDesc, - ArrayRef<ValuePtr> indices, - ConversionPatternRewriter &rewriter, - llvm::Module &module) const { + Value getDataPtr(Location loc, MemRefType type, Value memRefDesc, + ArrayRef<Value> indices, ConversionPatternRewriter &rewriter, + llvm::Module &module) const { LLVM::LLVMType ptrType = MemRefDescriptor(memRefDesc).getElementType(); int64_t offset; SmallVector<int64_t, 4> strides; @@ -1456,14 +1449,14 @@ struct LoadOpLowering : public LoadStoreOpLowering<LoadOp> { using Base::Base; PatternMatchResult - matchAndRewrite(Operation *op, ArrayRef<ValuePtr> operands, + matchAndRewrite(Operation *op, ArrayRef<Value> operands, ConversionPatternRewriter &rewriter) const override { auto loadOp = cast<LoadOp>(op); OperandAdaptor<LoadOp> transformed(operands); auto type = loadOp.getMemRefType(); - ValuePtr dataPtr = getDataPtr(op->getLoc(), type, transformed.memref(), - transformed.indices(), rewriter, getModule()); + Value dataPtr = getDataPtr(op->getLoc(), type, transformed.memref(), + transformed.indices(), rewriter, getModule()); rewriter.replaceOpWithNewOp<LLVM::LoadOp>(op, dataPtr); return matchSuccess(); } @@ -1475,13 +1468,13 @@ struct StoreOpLowering : public LoadStoreOpLowering<StoreOp> { using Base::Base; PatternMatchResult - matchAndRewrite(Operation *op, ArrayRef<ValuePtr> operands, + matchAndRewrite(Operation *op, ArrayRef<Value> operands, ConversionPatternRewriter &rewriter) const override { auto type = cast<StoreOp>(op).getMemRefType(); OperandAdaptor<StoreOp> transformed(operands); - ValuePtr dataPtr = getDataPtr(op->getLoc(), type, transformed.memref(), - transformed.indices(), rewriter, getModule()); + Value dataPtr = getDataPtr(op->getLoc(), type, transformed.memref(), + transformed.indices(), rewriter, getModule()); rewriter.replaceOpWithNewOp<LLVM::StoreOp>(op, transformed.value(), dataPtr); return matchSuccess(); @@ -1494,14 +1487,14 @@ struct PrefetchOpLowering : public LoadStoreOpLowering<PrefetchOp> { using Base::Base; PatternMatchResult - matchAndRewrite(Operation *op, ArrayRef<ValuePtr> operands, + matchAndRewrite(Operation *op, ArrayRef<Value> operands, ConversionPatternRewriter &rewriter) const override { auto prefetchOp = cast<PrefetchOp>(op); OperandAdaptor<PrefetchOp> transformed(operands); auto type = prefetchOp.getMemRefType(); - ValuePtr dataPtr = getDataPtr(op->getLoc(), type, transformed.memref(), - transformed.indices(), rewriter, getModule()); + Value dataPtr = getDataPtr(op->getLoc(), type, transformed.memref(), + transformed.indices(), rewriter, getModule()); // Replace with llvm.prefetch. auto llvmI32Type = lowering.convertType(rewriter.getIntegerType(32)); @@ -1529,7 +1522,7 @@ struct IndexCastOpLowering : public LLVMLegalizationPattern<IndexCastOp> { using LLVMLegalizationPattern<IndexCastOp>::LLVMLegalizationPattern; PatternMatchResult - matchAndRewrite(Operation *op, ArrayRef<ValuePtr> operands, + matchAndRewrite(Operation *op, ArrayRef<Value> operands, ConversionPatternRewriter &rewriter) const override { IndexCastOpOperandAdaptor transformed(operands); auto indexCastOp = cast<IndexCastOp>(op); @@ -1564,7 +1557,7 @@ struct CmpIOpLowering : public LLVMLegalizationPattern<CmpIOp> { using LLVMLegalizationPattern<CmpIOp>::LLVMLegalizationPattern; PatternMatchResult - matchAndRewrite(Operation *op, ArrayRef<ValuePtr> operands, + matchAndRewrite(Operation *op, ArrayRef<Value> operands, ConversionPatternRewriter &rewriter) const override { auto cmpiOp = cast<CmpIOp>(op); CmpIOpOperandAdaptor transformed(operands); @@ -1583,7 +1576,7 @@ struct CmpFOpLowering : public LLVMLegalizationPattern<CmpFOp> { using LLVMLegalizationPattern<CmpFOp>::LLVMLegalizationPattern; PatternMatchResult - matchAndRewrite(Operation *op, ArrayRef<ValuePtr> operands, + matchAndRewrite(Operation *op, ArrayRef<Value> operands, ConversionPatternRewriter &rewriter) const override { auto cmpfOp = cast<CmpFOp>(op); CmpFOpOperandAdaptor transformed(operands); @@ -1635,9 +1628,9 @@ struct OneToOneLLVMTerminatorLowering using Super = OneToOneLLVMTerminatorLowering<SourceOp, TargetOp>; PatternMatchResult - matchAndRewrite(Operation *op, ArrayRef<ValuePtr> properOperands, + matchAndRewrite(Operation *op, ArrayRef<Value> properOperands, ArrayRef<Block *> destinations, - ArrayRef<ArrayRef<ValuePtr>> operands, + ArrayRef<ArrayRef<Value>> operands, ConversionPatternRewriter &rewriter) const override { SmallVector<ValueRange, 2> operandRanges(operands.begin(), operands.end()); rewriter.replaceOpWithNewOp<TargetOp>(op, properOperands, destinations, @@ -1656,19 +1649,19 @@ struct ReturnOpLowering : public LLVMLegalizationPattern<ReturnOp> { using LLVMLegalizationPattern<ReturnOp>::LLVMLegalizationPattern; PatternMatchResult - matchAndRewrite(Operation *op, ArrayRef<ValuePtr> operands, + matchAndRewrite(Operation *op, ArrayRef<Value> operands, ConversionPatternRewriter &rewriter) const override { unsigned numArguments = op->getNumOperands(); // If ReturnOp has 0 or 1 operand, create it and return immediately. if (numArguments == 0) { rewriter.replaceOpWithNewOp<LLVM::ReturnOp>( - op, ArrayRef<ValuePtr>(), ArrayRef<Block *>(), op->getAttrs()); + op, ArrayRef<Value>(), ArrayRef<Block *>(), op->getAttrs()); return matchSuccess(); } if (numArguments == 1) { rewriter.replaceOpWithNewOp<LLVM::ReturnOp>( - op, ArrayRef<ValuePtr>(operands.front()), ArrayRef<Block *>(), + op, ArrayRef<Value>(operands.front()), ArrayRef<Block *>(), op->getAttrs()); return matchSuccess(); } @@ -1678,7 +1671,7 @@ struct ReturnOpLowering : public LLVMLegalizationPattern<ReturnOp> { auto packedType = lowering.packFunctionResults(llvm::to_vector<4>(op->getOperandTypes())); - ValuePtr packed = rewriter.create<LLVM::UndefOp>(op->getLoc(), packedType); + Value packed = rewriter.create<LLVM::UndefOp>(op->getLoc(), packedType); for (unsigned i = 0; i < numArguments; ++i) { packed = rewriter.create<LLVM::InsertValueOp>( op->getLoc(), packedType, packed, operands[i], @@ -1706,7 +1699,7 @@ struct SplatOpLowering : public LLVMLegalizationPattern<SplatOp> { using LLVMLegalizationPattern<SplatOp>::LLVMLegalizationPattern; PatternMatchResult - matchAndRewrite(Operation *op, ArrayRef<ValuePtr> operands, + matchAndRewrite(Operation *op, ArrayRef<Value> operands, ConversionPatternRewriter &rewriter) const override { auto splatOp = cast<SplatOp>(op); VectorType resultType = splatOp.getType().dyn_cast<VectorType>(); @@ -1715,7 +1708,7 @@ struct SplatOpLowering : public LLVMLegalizationPattern<SplatOp> { // First insert it into an undef vector so we can shuffle it. auto vectorType = lowering.convertType(splatOp.getType()); - ValuePtr undef = rewriter.create<LLVM::UndefOp>(op->getLoc(), vectorType); + Value undef = rewriter.create<LLVM::UndefOp>(op->getLoc(), vectorType); auto zero = rewriter.create<LLVM::ConstantOp>( op->getLoc(), lowering.convertType(rewriter.getIntegerType(32)), rewriter.getZeroAttr(rewriter.getIntegerType(32))); @@ -1740,7 +1733,7 @@ struct SplatNdOpLowering : public LLVMLegalizationPattern<SplatOp> { using LLVMLegalizationPattern<SplatOp>::LLVMLegalizationPattern; PatternMatchResult - matchAndRewrite(Operation *op, ArrayRef<ValuePtr> operands, + matchAndRewrite(Operation *op, ArrayRef<Value> operands, ConversionPatternRewriter &rewriter) const override { auto splatOp = cast<SplatOp>(op); OperandAdaptor<SplatOp> adaptor(operands); @@ -1757,16 +1750,16 @@ struct SplatNdOpLowering : public LLVMLegalizationPattern<SplatOp> { return matchFailure(); // Construct returned value. - ValuePtr desc = rewriter.create<LLVM::UndefOp>(loc, llvmArrayTy); + Value desc = rewriter.create<LLVM::UndefOp>(loc, llvmArrayTy); // Construct a 1-D vector with the splatted value that we insert in all the // places within the returned descriptor. - ValuePtr vdesc = rewriter.create<LLVM::UndefOp>(loc, llvmVectorTy); + Value vdesc = rewriter.create<LLVM::UndefOp>(loc, llvmVectorTy); auto zero = rewriter.create<LLVM::ConstantOp>( loc, lowering.convertType(rewriter.getIntegerType(32)), rewriter.getZeroAttr(rewriter.getIntegerType(32))); - ValuePtr v = rewriter.create<LLVM::InsertElementOp>( - loc, llvmVectorTy, vdesc, adaptor.input(), zero); + Value v = rewriter.create<LLVM::InsertElementOp>(loc, llvmVectorTy, vdesc, + adaptor.input(), zero); // Shuffle the value across the desired number of elements. int64_t width = resultType.getDimSize(resultType.getRank() - 1); @@ -1794,21 +1787,21 @@ struct SubViewOpLowering : public LLVMLegalizationPattern<SubViewOp> { using LLVMLegalizationPattern<SubViewOp>::LLVMLegalizationPattern; PatternMatchResult - matchAndRewrite(Operation *op, ArrayRef<ValuePtr> operands, + matchAndRewrite(Operation *op, ArrayRef<Value> operands, ConversionPatternRewriter &rewriter) const override { auto loc = op->getLoc(); auto viewOp = cast<SubViewOp>(op); // TODO(b/144779634, ravishankarm) : After Tblgen is adapted to support // having multiple variadic operands where each operand can have different // number of entries, clean all of this up. - SmallVector<ValuePtr, 2> dynamicOffsets( + SmallVector<Value, 2> dynamicOffsets( std::next(operands.begin()), std::next(operands.begin(), 1 + viewOp.getNumOffsets())); - SmallVector<ValuePtr, 2> dynamicSizes( + SmallVector<Value, 2> dynamicSizes( std::next(operands.begin(), 1 + viewOp.getNumOffsets()), std::next(operands.begin(), 1 + viewOp.getNumOffsets() + viewOp.getNumSizes())); - SmallVector<ValuePtr, 2> dynamicStrides( + SmallVector<Value, 2> dynamicStrides( std::next(operands.begin(), 1 + viewOp.getNumOffsets() + viewOp.getNumSizes()), operands.end()); @@ -1845,8 +1838,8 @@ struct SubViewOpLowering : public LLVMLegalizationPattern<SubViewOp> { auto targetMemRef = MemRefDescriptor::undef(rewriter, loc, targetDescTy); // Copy the buffer pointer from the old descriptor to the new one. - ValuePtr extracted = sourceMemRef.allocatedPtr(rewriter, loc); - ValuePtr bitcastPtr = rewriter.create<LLVM::BitcastOp>( + Value extracted = sourceMemRef.allocatedPtr(rewriter, loc); + Value bitcastPtr = rewriter.create<LLVM::BitcastOp>( loc, targetElementTy.getPointerTo(), extracted); targetMemRef.setAllocatedPtr(rewriter, loc, bitcastPtr); @@ -1856,7 +1849,7 @@ struct SubViewOpLowering : public LLVMLegalizationPattern<SubViewOp> { targetMemRef.setAlignedPtr(rewriter, loc, bitcastPtr); // Extract strides needed to compute offset. - SmallVector<ValuePtr, 4> strideValues; + SmallVector<Value, 4> strideValues; strideValues.reserve(viewMemRefType.getRank()); for (int i = 0, e = viewMemRefType.getRank(); i < e; ++i) strideValues.push_back(sourceMemRef.stride(rewriter, loc, i)); @@ -1873,9 +1866,9 @@ struct SubViewOpLowering : public LLVMLegalizationPattern<SubViewOp> { } // Offset. - ValuePtr baseOffset = sourceMemRef.offset(rewriter, loc); + Value baseOffset = sourceMemRef.offset(rewriter, loc); for (int i = 0, e = viewMemRefType.getRank(); i < e; ++i) { - ValuePtr min = dynamicOffsets[i]; + Value min = dynamicOffsets[i]; baseOffset = rewriter.create<LLVM::AddOp>( loc, baseOffset, rewriter.create<LLVM::MulOp>(loc, min, strideValues[i])); @@ -1885,7 +1878,7 @@ struct SubViewOpLowering : public LLVMLegalizationPattern<SubViewOp> { // Update sizes and strides. for (int i = viewMemRefType.getRank() - 1; i >= 0; --i) { targetMemRef.setSize(rewriter, loc, i, dynamicSizes[i]); - ValuePtr newStride; + Value newStride; if (dynamicStrides.empty()) newStride = rewriter.create<LLVM::ConstantOp>( loc, llvmIndexType, rewriter.getI64IntegerAttr(strides[i])); @@ -1910,9 +1903,9 @@ struct ViewOpLowering : public LLVMLegalizationPattern<ViewOp> { // Build and return the value for the idx^th shape dimension, either by // returning the constant shape dimension or counting the proper dynamic size. - ValuePtr getSize(ConversionPatternRewriter &rewriter, Location loc, - ArrayRef<int64_t> shape, ArrayRef<ValuePtr> dynamicSizes, - unsigned idx) const { + Value getSize(ConversionPatternRewriter &rewriter, Location loc, + ArrayRef<int64_t> shape, ArrayRef<Value> dynamicSizes, + unsigned idx) const { assert(idx < shape.size()); if (!ShapedType::isDynamic(shape[idx])) return createIndexConstant(rewriter, loc, shape[idx]); @@ -1927,9 +1920,9 @@ struct ViewOpLowering : public LLVMLegalizationPattern<ViewOp> { // or by computing the dynamic stride from the current `runningStride` and // `nextSize`. The caller should keep a running stride and update it with the // result returned by this function. - ValuePtr getStride(ConversionPatternRewriter &rewriter, Location loc, - ArrayRef<int64_t> strides, ValuePtr nextSize, - ValuePtr runningStride, unsigned idx) const { + Value getStride(ConversionPatternRewriter &rewriter, Location loc, + ArrayRef<int64_t> strides, Value nextSize, + Value runningStride, unsigned idx) const { assert(idx < strides.size()); if (strides[idx] != MemRefType::getDynamicStrideOrOffset()) return createIndexConstant(rewriter, loc, strides[idx]); @@ -1942,7 +1935,7 @@ struct ViewOpLowering : public LLVMLegalizationPattern<ViewOp> { } PatternMatchResult - matchAndRewrite(Operation *op, ArrayRef<ValuePtr> operands, + matchAndRewrite(Operation *op, ArrayRef<Value> operands, ConversionPatternRewriter &rewriter) const override { auto loc = op->getLoc(); auto viewOp = cast<ViewOp>(op); @@ -1969,8 +1962,8 @@ struct ViewOpLowering : public LLVMLegalizationPattern<ViewOp> { auto targetMemRef = MemRefDescriptor::undef(rewriter, loc, targetDescTy); // Field 1: Copy the allocated pointer, used for malloc/free. - ValuePtr extracted = sourceMemRef.allocatedPtr(rewriter, loc); - ValuePtr bitcastPtr = rewriter.create<LLVM::BitcastOp>( + Value extracted = sourceMemRef.allocatedPtr(rewriter, loc); + Value bitcastPtr = rewriter.create<LLVM::BitcastOp>( loc, targetElementTy.getPointerTo(), extracted); targetMemRef.setAllocatedPtr(rewriter, loc, bitcastPtr); @@ -1987,10 +1980,10 @@ struct ViewOpLowering : public LLVMLegalizationPattern<ViewOp> { auto sizeAndOffsetOperands = adaptor.operands(); assert(llvm::size(sizeAndOffsetOperands) == numDynamicSizes + (hasDynamicOffset ? 1 : 0)); - ValuePtr baseOffset = !hasDynamicOffset - ? createIndexConstant(rewriter, loc, offset) - // TODO(ntv): better adaptor. - : sizeAndOffsetOperands.front(); + Value baseOffset = !hasDynamicOffset + ? createIndexConstant(rewriter, loc, offset) + // TODO(ntv): better adaptor. + : sizeAndOffsetOperands.front(); targetMemRef.setOffset(rewriter, loc, baseOffset); // Early exit for 0-D corner case. @@ -2001,14 +1994,14 @@ struct ViewOpLowering : public LLVMLegalizationPattern<ViewOp> { if (strides.back() != 1) return op->emitWarning("cannot cast to non-contiguous shape"), matchFailure(); - ValuePtr stride = nullptr, nextSize = nullptr; + Value stride = nullptr, nextSize = nullptr; // Drop the dynamic stride from the operand list, if present. - ArrayRef<ValuePtr> sizeOperands(sizeAndOffsetOperands); + ArrayRef<Value> sizeOperands(sizeAndOffsetOperands); if (hasDynamicOffset) sizeOperands = sizeOperands.drop_front(); for (int i = viewMemRefType.getRank() - 1; i >= 0; --i) { // Update size. - ValuePtr size = + Value size = getSize(rewriter, loc, viewMemRefType.getShape(), sizeOperands, i); targetMemRef.setSize(rewriter, loc, i, size); // Update stride. @@ -2052,7 +2045,7 @@ static void ensureDistinctSuccessors(Block &bb) { auto *dummyBlock = new Block(); bb.getParent()->push_back(dummyBlock); auto builder = OpBuilder(dummyBlock); - SmallVector<ValuePtr, 8> operands( + SmallVector<Value, 8> operands( terminator->getSuccessorOperands(*position)); builder.create<BranchOp>(terminator->getLoc(), successor.first, operands); terminator->setSuccessor(dummyBlock, *position); @@ -2173,29 +2166,28 @@ Type LLVMTypeConverter::packFunctionResults(ArrayRef<Type> types) { return LLVM::LLVMType::getStructTy(llvmDialect, resultTypes); } -ValuePtr LLVMTypeConverter::promoteOneMemRefDescriptor(Location loc, - ValuePtr operand, - OpBuilder &builder) { +Value LLVMTypeConverter::promoteOneMemRefDescriptor(Location loc, Value operand, + OpBuilder &builder) { auto *context = builder.getContext(); auto int64Ty = LLVM::LLVMType::getInt64Ty(getDialect()); auto indexType = IndexType::get(context); // Alloca with proper alignment. We do not expect optimizations of this // alloca op and so we omit allocating at the entry block. auto ptrType = operand->getType().cast<LLVM::LLVMType>().getPointerTo(); - ValuePtr one = builder.create<LLVM::ConstantOp>( - loc, int64Ty, IntegerAttr::get(indexType, 1)); - ValuePtr allocated = + Value one = builder.create<LLVM::ConstantOp>(loc, int64Ty, + IntegerAttr::get(indexType, 1)); + Value allocated = builder.create<LLVM::AllocaOp>(loc, ptrType, one, /*alignment=*/0); // Store into the alloca'ed descriptor. builder.create<LLVM::StoreOp>(loc, operand, allocated); return allocated; } -SmallVector<ValuePtr, 4> +SmallVector<Value, 4> LLVMTypeConverter::promoteMemRefDescriptors(Location loc, ValueRange opOperands, ValueRange operands, OpBuilder &builder) { - SmallVector<ValuePtr, 4> promotedOperands; + SmallVector<Value, 4> promotedOperands; promotedOperands.reserve(operands.size()); for (auto it : llvm::zip(opOperands, operands)) { auto operand = std::get<0>(it); diff --git a/mlir/lib/Conversion/StandardToSPIRV/ConvertStandardToSPIRV.cpp b/mlir/lib/Conversion/StandardToSPIRV/ConvertStandardToSPIRV.cpp index af1c92ef11d..a02dee4419a 100644 --- a/mlir/lib/Conversion/StandardToSPIRV/ConvertStandardToSPIRV.cpp +++ b/mlir/lib/Conversion/StandardToSPIRV/ConvertStandardToSPIRV.cpp @@ -35,7 +35,7 @@ public: using SPIRVOpLowering<ConstantOp>::SPIRVOpLowering; PatternMatchResult - matchAndRewrite(ConstantOp constIndexOp, ArrayRef<ValuePtr> operands, + matchAndRewrite(ConstantOp constIndexOp, ArrayRef<Value> operands, ConversionPatternRewriter &rewriter) const override; }; @@ -45,7 +45,7 @@ public: using SPIRVOpLowering<CmpIOp>::SPIRVOpLowering; PatternMatchResult - matchAndRewrite(CmpIOp cmpIOp, ArrayRef<ValuePtr> operands, + matchAndRewrite(CmpIOp cmpIOp, ArrayRef<Value> operands, ConversionPatternRewriter &rewriter) const override; }; @@ -61,7 +61,7 @@ public: using SPIRVOpLowering<StdOp>::SPIRVOpLowering; PatternMatchResult - matchAndRewrite(StdOp operation, ArrayRef<ValuePtr> operands, + matchAndRewrite(StdOp operation, ArrayRef<Value> operands, ConversionPatternRewriter &rewriter) const override { auto resultType = this->typeConverter.convertType(operation.getResult()->getType()); @@ -80,7 +80,7 @@ public: using SPIRVOpLowering<LoadOp>::SPIRVOpLowering; PatternMatchResult - matchAndRewrite(LoadOp loadOp, ArrayRef<ValuePtr> operands, + matchAndRewrite(LoadOp loadOp, ArrayRef<Value> operands, ConversionPatternRewriter &rewriter) const override; }; @@ -91,7 +91,7 @@ public: using SPIRVOpLowering<ReturnOp>::SPIRVOpLowering; PatternMatchResult - matchAndRewrite(ReturnOp returnOp, ArrayRef<ValuePtr> operands, + matchAndRewrite(ReturnOp returnOp, ArrayRef<Value> operands, ConversionPatternRewriter &rewriter) const override; }; @@ -101,7 +101,7 @@ class SelectOpConversion final : public SPIRVOpLowering<SelectOp> { public: using SPIRVOpLowering<SelectOp>::SPIRVOpLowering; PatternMatchResult - matchAndRewrite(SelectOp op, ArrayRef<ValuePtr> operands, + matchAndRewrite(SelectOp op, ArrayRef<Value> operands, ConversionPatternRewriter &rewriter) const override; }; @@ -114,7 +114,7 @@ public: using SPIRVOpLowering<StoreOp>::SPIRVOpLowering; PatternMatchResult - matchAndRewrite(StoreOp storeOp, ArrayRef<ValuePtr> operands, + matchAndRewrite(StoreOp storeOp, ArrayRef<Value> operands, ConversionPatternRewriter &rewriter) const override; }; @@ -132,8 +132,7 @@ public: spirv::AccessChainOp getElementPtr(OpBuilder &builder, SPIRVTypeConverter &typeConverter, Location loc, MemRefType origBaseType, - ValuePtr basePtr, - ArrayRef<ValuePtr> indices) { + Value basePtr, ArrayRef<Value> indices) { // Get base and offset of the MemRefType and verify they are static. int64_t offset; SmallVector<int64_t, 4> strides; @@ -144,18 +143,17 @@ spirv::AccessChainOp getElementPtr(OpBuilder &builder, auto indexType = typeConverter.getIndexType(builder.getContext()); - ValuePtr ptrLoc = nullptr; + Value ptrLoc = nullptr; assert(indices.size() == strides.size()); for (auto index : enumerate(indices)) { - ValuePtr strideVal = builder.create<spirv::ConstantOp>( + Value strideVal = builder.create<spirv::ConstantOp>( loc, indexType, IntegerAttr::get(indexType, strides[index.index()])); - ValuePtr update = - builder.create<spirv::IMulOp>(loc, strideVal, index.value()); + Value update = builder.create<spirv::IMulOp>(loc, strideVal, index.value()); ptrLoc = (ptrLoc ? builder.create<spirv::IAddOp>(loc, ptrLoc, update).getResult() : update); } - SmallVector<ValuePtr, 2> linearizedIndices; + SmallVector<Value, 2> linearizedIndices; // Add a '0' at the start to index into the struct. linearizedIndices.push_back(builder.create<spirv::ConstantOp>( loc, indexType, IntegerAttr::get(indexType, 0))); @@ -168,7 +166,7 @@ spirv::AccessChainOp getElementPtr(OpBuilder &builder, //===----------------------------------------------------------------------===// PatternMatchResult ConstantIndexOpConversion::matchAndRewrite( - ConstantOp constIndexOp, ArrayRef<ValuePtr> operands, + ConstantOp constIndexOp, ArrayRef<Value> operands, ConversionPatternRewriter &rewriter) const { if (!constIndexOp.getResult()->getType().isa<IndexType>()) { return matchFailure(); @@ -202,7 +200,7 @@ PatternMatchResult ConstantIndexOpConversion::matchAndRewrite( //===----------------------------------------------------------------------===// PatternMatchResult -CmpIOpConversion::matchAndRewrite(CmpIOp cmpIOp, ArrayRef<ValuePtr> operands, +CmpIOpConversion::matchAndRewrite(CmpIOp cmpIOp, ArrayRef<Value> operands, ConversionPatternRewriter &rewriter) const { CmpIOpOperandAdaptor cmpIOpOperands(operands); @@ -234,7 +232,7 @@ CmpIOpConversion::matchAndRewrite(CmpIOp cmpIOp, ArrayRef<ValuePtr> operands, //===----------------------------------------------------------------------===// PatternMatchResult -LoadOpConversion::matchAndRewrite(LoadOp loadOp, ArrayRef<ValuePtr> operands, +LoadOpConversion::matchAndRewrite(LoadOp loadOp, ArrayRef<Value> operands, ConversionPatternRewriter &rewriter) const { LoadOpOperandAdaptor loadOperands(operands); auto loadPtr = getElementPtr(rewriter, typeConverter, loadOp.getLoc(), @@ -251,8 +249,7 @@ LoadOpConversion::matchAndRewrite(LoadOp loadOp, ArrayRef<ValuePtr> operands, //===----------------------------------------------------------------------===// PatternMatchResult -ReturnOpConversion::matchAndRewrite(ReturnOp returnOp, - ArrayRef<ValuePtr> operands, +ReturnOpConversion::matchAndRewrite(ReturnOp returnOp, ArrayRef<Value> operands, ConversionPatternRewriter &rewriter) const { if (returnOp.getNumOperands()) { return matchFailure(); @@ -266,7 +263,7 @@ ReturnOpConversion::matchAndRewrite(ReturnOp returnOp, //===----------------------------------------------------------------------===// PatternMatchResult -SelectOpConversion::matchAndRewrite(SelectOp op, ArrayRef<ValuePtr> operands, +SelectOpConversion::matchAndRewrite(SelectOp op, ArrayRef<Value> operands, ConversionPatternRewriter &rewriter) const { SelectOpOperandAdaptor selectOperands(operands); rewriter.replaceOpWithNewOp<spirv::SelectOp>(op, selectOperands.condition(), @@ -280,7 +277,7 @@ SelectOpConversion::matchAndRewrite(SelectOp op, ArrayRef<ValuePtr> operands, //===----------------------------------------------------------------------===// PatternMatchResult -StoreOpConversion::matchAndRewrite(StoreOp storeOp, ArrayRef<ValuePtr> operands, +StoreOpConversion::matchAndRewrite(StoreOp storeOp, ArrayRef<Value> operands, ConversionPatternRewriter &rewriter) const { StoreOpOperandAdaptor storeOperands(operands); auto storePtr = diff --git a/mlir/lib/Conversion/StandardToSPIRV/ConvertStandardToSPIRVPass.cpp b/mlir/lib/Conversion/StandardToSPIRV/ConvertStandardToSPIRVPass.cpp index c3937358c47..52456b6e46d 100644 --- a/mlir/lib/Conversion/StandardToSPIRV/ConvertStandardToSPIRVPass.cpp +++ b/mlir/lib/Conversion/StandardToSPIRV/ConvertStandardToSPIRVPass.cpp @@ -28,7 +28,7 @@ public: using SPIRVOpLowering<FuncOp>::SPIRVOpLowering; PatternMatchResult - matchAndRewrite(FuncOp funcOp, ArrayRef<ValuePtr> operands, + matchAndRewrite(FuncOp funcOp, ArrayRef<Value> operands, ConversionPatternRewriter &rewriter) const override; }; @@ -40,7 +40,7 @@ class ConvertStandardToSPIRVPass } // namespace PatternMatchResult -FuncOpConversion::matchAndRewrite(FuncOp funcOp, ArrayRef<ValuePtr> operands, +FuncOpConversion::matchAndRewrite(FuncOp funcOp, ArrayRef<Value> operands, ConversionPatternRewriter &rewriter) const { auto fnType = funcOp.getType(); if (fnType.getNumResults()) { diff --git a/mlir/lib/Conversion/StandardToSPIRV/LegalizeStandardForSPIRV.cpp b/mlir/lib/Conversion/StandardToSPIRV/LegalizeStandardForSPIRV.cpp index 5d693336c3f..a658356f76c 100644 --- a/mlir/lib/Conversion/StandardToSPIRV/LegalizeStandardForSPIRV.cpp +++ b/mlir/lib/Conversion/StandardToSPIRV/LegalizeStandardForSPIRV.cpp @@ -60,7 +60,7 @@ public: static LogicalResult resolveSourceIndices(Location loc, PatternRewriter &rewriter, SubViewOp subViewOp, ValueRange indices, - SmallVectorImpl<ValuePtr> &sourceIndices) { + SmallVectorImpl<Value> &sourceIndices) { // TODO: Aborting when the offsets are static. There might be a way to fold // the subview op with load even if the offsets have been canonicalized // away. @@ -68,7 +68,7 @@ resolveSourceIndices(Location loc, PatternRewriter &rewriter, return failure(); ValueRange opOffsets = subViewOp.offsets(); - SmallVector<ValuePtr, 2> opStrides; + SmallVector<Value, 2> opStrides; if (subViewOp.getNumStrides()) { // If the strides are dynamic, get the stride operands. opStrides = llvm::to_vector<2>(subViewOp.strides()); @@ -115,7 +115,7 @@ LoadOpOfSubViewFolder::matchAndRewrite(LoadOp loadOp, if (!subViewOp) { return matchFailure(); } - SmallVector<ValuePtr, 4> sourceIndices; + SmallVector<Value, 4> sourceIndices; if (failed(resolveSourceIndices(loadOp.getLoc(), rewriter, subViewOp, loadOp.indices(), sourceIndices))) return matchFailure(); @@ -137,7 +137,7 @@ StoreOpOfSubViewFolder::matchAndRewrite(StoreOp storeOp, if (!subViewOp) { return matchFailure(); } - SmallVector<ValuePtr, 4> sourceIndices; + SmallVector<Value, 4> sourceIndices; if (failed(resolveSourceIndices(storeOp.getLoc(), rewriter, subViewOp, storeOp.indices(), sourceIndices))) return matchFailure(); diff --git a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp index 56005220d3f..b48930c4dda 100644 --- a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp +++ b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp @@ -53,10 +53,9 @@ static VectorType reducedVectorTypeBack(VectorType tp) { } // Helper that picks the proper sequence for inserting. -static ValuePtr insertOne(ConversionPatternRewriter &rewriter, - LLVMTypeConverter &lowering, Location loc, - ValuePtr val1, ValuePtr val2, Type llvmType, - int64_t rank, int64_t pos) { +static Value insertOne(ConversionPatternRewriter &rewriter, + LLVMTypeConverter &lowering, Location loc, Value val1, + Value val2, Type llvmType, int64_t rank, int64_t pos) { if (rank == 1) { auto idxType = rewriter.getIndexType(); auto constant = rewriter.create<LLVM::ConstantOp>( @@ -70,10 +69,9 @@ static ValuePtr insertOne(ConversionPatternRewriter &rewriter, } // Helper that picks the proper sequence for extracting. -static ValuePtr extractOne(ConversionPatternRewriter &rewriter, - LLVMTypeConverter &lowering, Location loc, - ValuePtr val, Type llvmType, int64_t rank, - int64_t pos) { +static Value extractOne(ConversionPatternRewriter &rewriter, + LLVMTypeConverter &lowering, Location loc, Value val, + Type llvmType, int64_t rank, int64_t pos) { if (rank == 1) { auto idxType = rewriter.getIndexType(); auto constant = rewriter.create<LLVM::ConstantOp>( @@ -94,7 +92,7 @@ public: typeConverter) {} PatternMatchResult - matchAndRewrite(Operation *op, ArrayRef<ValuePtr> operands, + matchAndRewrite(Operation *op, ArrayRef<Value> operands, ConversionPatternRewriter &rewriter) const override { auto broadcastOp = cast<vector::BroadcastOp>(op); VectorType dstVectorType = broadcastOp.getVectorType(); @@ -122,9 +120,9 @@ private: // ops once all insert/extract/shuffle operations // are available with lowering implemention. // - ValuePtr expandRanks(ValuePtr value, Location loc, VectorType srcVectorType, - VectorType dstVectorType, - ConversionPatternRewriter &rewriter) const { + Value expandRanks(Value value, Location loc, VectorType srcVectorType, + VectorType dstVectorType, + ConversionPatternRewriter &rewriter) const { assert((dstVectorType != nullptr) && "invalid result type in broadcast"); // Determine rank of source and destination. int64_t srcRank = srcVectorType ? srcVectorType.getRank() : 0; @@ -161,24 +159,22 @@ private: // becomes: // x = [s,s] // v = [x,x,x,x] - ValuePtr duplicateOneRank(ValuePtr value, Location loc, - VectorType srcVectorType, VectorType dstVectorType, - int64_t rank, int64_t dim, - ConversionPatternRewriter &rewriter) const { + Value duplicateOneRank(Value value, Location loc, VectorType srcVectorType, + VectorType dstVectorType, int64_t rank, int64_t dim, + ConversionPatternRewriter &rewriter) const { Type llvmType = lowering.convertType(dstVectorType); assert((llvmType != nullptr) && "unlowerable vector type"); if (rank == 1) { - ValuePtr undef = rewriter.create<LLVM::UndefOp>(loc, llvmType); - ValuePtr expand = + Value undef = rewriter.create<LLVM::UndefOp>(loc, llvmType); + Value expand = insertOne(rewriter, lowering, loc, undef, value, llvmType, rank, 0); SmallVector<int32_t, 4> zeroValues(dim, 0); return rewriter.create<LLVM::ShuffleVectorOp>( loc, expand, undef, rewriter.getI32ArrayAttr(zeroValues)); } - ValuePtr expand = - expandRanks(value, loc, srcVectorType, - reducedVectorTypeFront(dstVectorType), rewriter); - ValuePtr result = rewriter.create<LLVM::UndefOp>(loc, llvmType); + Value expand = expandRanks(value, loc, srcVectorType, + reducedVectorTypeFront(dstVectorType), rewriter); + Value result = rewriter.create<LLVM::UndefOp>(loc, llvmType); for (int64_t d = 0; d < dim; ++d) { result = insertOne(rewriter, lowering, loc, result, expand, llvmType, rank, d); @@ -203,20 +199,19 @@ private: // y = broadcast w[1][0] : vector<2xf32> to vector <2x2xf32> // a = [x, y] // etc. - ValuePtr stretchOneRank(ValuePtr value, Location loc, - VectorType srcVectorType, VectorType dstVectorType, - int64_t rank, int64_t dim, - ConversionPatternRewriter &rewriter) const { + Value stretchOneRank(Value value, Location loc, VectorType srcVectorType, + VectorType dstVectorType, int64_t rank, int64_t dim, + ConversionPatternRewriter &rewriter) const { Type llvmType = lowering.convertType(dstVectorType); assert((llvmType != nullptr) && "unlowerable vector type"); - ValuePtr result = rewriter.create<LLVM::UndefOp>(loc, llvmType); + Value result = rewriter.create<LLVM::UndefOp>(loc, llvmType); bool atStretch = dim != srcVectorType.getDimSize(0); if (rank == 1) { assert(atStretch); Type redLlvmType = lowering.convertType(dstVectorType.getElementType()); - ValuePtr one = + Value one = extractOne(rewriter, lowering, loc, value, redLlvmType, rank, 0); - ValuePtr expand = + Value expand = insertOne(rewriter, lowering, loc, result, one, llvmType, rank, 0); SmallVector<int32_t, 4> zeroValues(dim, 0); return rewriter.create<LLVM::ShuffleVectorOp>( @@ -227,9 +222,9 @@ private: Type redLlvmType = lowering.convertType(redSrcType); for (int64_t d = 0; d < dim; ++d) { int64_t pos = atStretch ? 0 : d; - ValuePtr one = + Value one = extractOne(rewriter, lowering, loc, value, redLlvmType, rank, pos); - ValuePtr expand = expandRanks(one, loc, redSrcType, redDstType, rewriter); + Value expand = expandRanks(one, loc, redSrcType, redDstType, rewriter); result = insertOne(rewriter, lowering, loc, result, expand, llvmType, rank, d); } @@ -245,7 +240,7 @@ public: typeConverter) {} PatternMatchResult - matchAndRewrite(Operation *op, ArrayRef<ValuePtr> operands, + matchAndRewrite(Operation *op, ArrayRef<Value> operands, ConversionPatternRewriter &rewriter) const override { auto loc = op->getLoc(); auto adaptor = vector::ShuffleOpOperandAdaptor(operands); @@ -269,23 +264,23 @@ public: // For rank 1, where both operands have *exactly* the same vector type, // there is direct shuffle support in LLVM. Use it! if (rank == 1 && v1Type == v2Type) { - ValuePtr shuffle = rewriter.create<LLVM::ShuffleVectorOp>( + Value shuffle = rewriter.create<LLVM::ShuffleVectorOp>( loc, adaptor.v1(), adaptor.v2(), maskArrayAttr); rewriter.replaceOp(op, shuffle); return matchSuccess(); } // For all other cases, insert the individual values individually. - ValuePtr insert = rewriter.create<LLVM::UndefOp>(loc, llvmType); + Value insert = rewriter.create<LLVM::UndefOp>(loc, llvmType); int64_t insPos = 0; for (auto en : llvm::enumerate(maskArrayAttr)) { int64_t extPos = en.value().cast<IntegerAttr>().getInt(); - ValuePtr value = adaptor.v1(); + Value value = adaptor.v1(); if (extPos >= v1Dim) { extPos -= v1Dim; value = adaptor.v2(); } - ValuePtr extract = + Value extract = extractOne(rewriter, lowering, loc, value, llvmType, rank, extPos); insert = insertOne(rewriter, lowering, loc, insert, extract, llvmType, rank, insPos++); @@ -303,7 +298,7 @@ public: typeConverter) {} PatternMatchResult - matchAndRewrite(Operation *op, ArrayRef<ValuePtr> operands, + matchAndRewrite(Operation *op, ArrayRef<Value> operands, ConversionPatternRewriter &rewriter) const override { auto adaptor = vector::ExtractElementOpOperandAdaptor(operands); auto extractEltOp = cast<vector::ExtractElementOp>(op); @@ -328,7 +323,7 @@ public: typeConverter) {} PatternMatchResult - matchAndRewrite(Operation *op, ArrayRef<ValuePtr> operands, + matchAndRewrite(Operation *op, ArrayRef<Value> operands, ConversionPatternRewriter &rewriter) const override { auto loc = op->getLoc(); auto adaptor = vector::ExtractOpOperandAdaptor(operands); @@ -344,7 +339,7 @@ public: // One-shot extraction of vector from array (only requires extractvalue). if (resultType.isa<VectorType>()) { - ValuePtr extracted = rewriter.create<LLVM::ExtractValueOp>( + Value extracted = rewriter.create<LLVM::ExtractValueOp>( loc, llvmResultType, adaptor.vector(), positionArrayAttr); rewriter.replaceOp(op, extracted); return matchSuccess(); @@ -352,7 +347,7 @@ public: // Potential extraction of 1-D vector from array. auto *context = op->getContext(); - ValuePtr extracted = adaptor.vector(); + Value extracted = adaptor.vector(); auto positionAttrs = positionArrayAttr.getValue(); if (positionAttrs.size() > 1) { auto oneDVectorType = reducedVectorTypeBack(vectorType); @@ -383,7 +378,7 @@ public: typeConverter) {} PatternMatchResult - matchAndRewrite(Operation *op, ArrayRef<ValuePtr> operands, + matchAndRewrite(Operation *op, ArrayRef<Value> operands, ConversionPatternRewriter &rewriter) const override { auto adaptor = vector::InsertElementOpOperandAdaptor(operands); auto insertEltOp = cast<vector::InsertElementOp>(op); @@ -408,7 +403,7 @@ public: typeConverter) {} PatternMatchResult - matchAndRewrite(Operation *op, ArrayRef<ValuePtr> operands, + matchAndRewrite(Operation *op, ArrayRef<Value> operands, ConversionPatternRewriter &rewriter) const override { auto loc = op->getLoc(); auto adaptor = vector::InsertOpOperandAdaptor(operands); @@ -424,7 +419,7 @@ public: // One-shot insertion of a vector into an array (only requires insertvalue). if (sourceType.isa<VectorType>()) { - ValuePtr inserted = rewriter.create<LLVM::InsertValueOp>( + Value inserted = rewriter.create<LLVM::InsertValueOp>( loc, llvmResultType, adaptor.dest(), adaptor.source(), positionArrayAttr); rewriter.replaceOp(op, inserted); @@ -433,7 +428,7 @@ public: // Potential extraction of 1-D vector from array. auto *context = op->getContext(); - ValuePtr extracted = adaptor.dest(); + Value extracted = adaptor.dest(); auto positionAttrs = positionArrayAttr.getValue(); auto position = positionAttrs.back().cast<IntegerAttr>(); auto oneDVectorType = destVectorType; @@ -449,7 +444,7 @@ public: // Insertion of an element into a 1-D LLVM vector. auto i64Type = LLVM::LLVMType::getInt64Ty(lowering.getDialect()); auto constant = rewriter.create<LLVM::ConstantOp>(loc, i64Type, position); - ValuePtr inserted = rewriter.create<LLVM::InsertElementOp>( + Value inserted = rewriter.create<LLVM::InsertElementOp>( loc, lowering.convertType(oneDVectorType), extracted, adaptor.source(), constant); @@ -475,7 +470,7 @@ public: typeConverter) {} PatternMatchResult - matchAndRewrite(Operation *op, ArrayRef<ValuePtr> operands, + matchAndRewrite(Operation *op, ArrayRef<Value> operands, ConversionPatternRewriter &rewriter) const override { auto loc = op->getLoc(); auto adaptor = vector::OuterProductOpOperandAdaptor(operands); @@ -486,10 +481,10 @@ public: auto rankRHS = vRHS.getUnderlyingType()->getVectorNumElements(); auto llvmArrayOfVectType = lowering.convertType( cast<vector::OuterProductOp>(op).getResult()->getType()); - ValuePtr desc = rewriter.create<LLVM::UndefOp>(loc, llvmArrayOfVectType); - ValuePtr a = adaptor.lhs(), b = adaptor.rhs(); - ValuePtr acc = adaptor.acc().empty() ? nullptr : adaptor.acc().front(); - SmallVector<ValuePtr, 8> lhs, accs; + Value desc = rewriter.create<LLVM::UndefOp>(loc, llvmArrayOfVectType); + Value a = adaptor.lhs(), b = adaptor.rhs(); + Value acc = adaptor.acc().empty() ? nullptr : adaptor.acc().front(); + SmallVector<Value, 8> lhs, accs; lhs.reserve(rankLHS); accs.reserve(rankLHS); for (unsigned d = 0, e = rankLHS; d < e; ++d) { @@ -497,7 +492,7 @@ public: auto attr = rewriter.getI32IntegerAttr(d); SmallVector<Attribute, 4> bcastAttr(rankRHS, attr); auto bcastArrayAttr = ArrayAttr::get(bcastAttr, ctx); - ValuePtr aD = nullptr, accD = nullptr; + Value aD = nullptr, accD = nullptr; // 1. Broadcast the element a[d] into vector aD. aD = rewriter.create<LLVM::ShuffleVectorOp>(loc, a, a, bcastArrayAttr); // 2. If acc is present, extract 1-d vector acc[d] into accD. @@ -505,7 +500,7 @@ public: accD = rewriter.create<LLVM::ExtractValueOp>( loc, vRHS, acc, rewriter.getI64ArrayAttr(d)); // 3. Compute aD outer b (plus accD, if relevant). - ValuePtr aOuterbD = + Value aOuterbD = accD ? rewriter.create<LLVM::FMulAddOp>(loc, vRHS, aD, b, accD) .getResult() : rewriter.create<LLVM::FMulOp>(loc, aD, b).getResult(); @@ -527,7 +522,7 @@ public: typeConverter) {} PatternMatchResult - matchAndRewrite(Operation *op, ArrayRef<ValuePtr> operands, + matchAndRewrite(Operation *op, ArrayRef<Value> operands, ConversionPatternRewriter &rewriter) const override { auto loc = op->getLoc(); vector::TypeCastOp castOp = cast<vector::TypeCastOp>(op); @@ -576,12 +571,12 @@ public: auto desc = MemRefDescriptor::undef(rewriter, loc, llvmTargetDescriptorTy); Type llvmTargetElementTy = desc.getElementType(); // Set allocated ptr. - ValuePtr allocated = sourceMemRef.allocatedPtr(rewriter, loc); + Value allocated = sourceMemRef.allocatedPtr(rewriter, loc); allocated = rewriter.create<LLVM::BitcastOp>(loc, llvmTargetElementTy, allocated); desc.setAllocatedPtr(rewriter, loc, allocated); // Set aligned ptr. - ValuePtr ptr = sourceMemRef.alignedPtr(rewriter, loc); + Value ptr = sourceMemRef.alignedPtr(rewriter, loc); ptr = rewriter.create<LLVM::BitcastOp>(loc, llvmTargetElementTy, ptr); desc.setAlignedPtr(rewriter, loc, ptr); // Fill offset 0. @@ -627,7 +622,7 @@ public: // TODO(ajcbik): rely solely on libc in future? something else? // PatternMatchResult - matchAndRewrite(Operation *op, ArrayRef<ValuePtr> operands, + matchAndRewrite(Operation *op, ArrayRef<Value> operands, ConversionPatternRewriter &rewriter) const override { auto printOp = cast<vector::PrintOp>(op); auto adaptor = vector::PrintOpOperandAdaptor(operands); @@ -657,7 +652,7 @@ public: private: void emitRanks(ConversionPatternRewriter &rewriter, Operation *op, - ValuePtr value, VectorType vectorType, Operation *printer, + Value value, VectorType vectorType, Operation *printer, int64_t rank) const { Location loc = op->getLoc(); if (rank == 0) { @@ -673,7 +668,7 @@ private: rank > 1 ? reducedVectorTypeFront(vectorType) : nullptr; auto llvmType = lowering.convertType( rank > 1 ? reducedType : vectorType.getElementType()); - ValuePtr nestedVal = + Value nestedVal = extractOne(rewriter, lowering, loc, value, llvmType, rank, d); emitRanks(rewriter, op, nestedVal, reducedType, printer, rank - 1); if (d != dim - 1) |