diff options
| author | Nicolas Vasilache <ntv@google.com> | 2019-07-16 01:46:23 -0700 |
|---|---|---|
| committer | Mehdi Amini <joker.eph@gmail.com> | 2019-07-16 13:44:57 -0700 |
| commit | e78ea03b24badc24bb1eec2c2bcccac5c8a1bd07 (patch) | |
| tree | bdd70056fd618fce4a0b44f34bd905c0f639c005 /mlir/lib/Conversion/LoopsToGPU | |
| parent | dec1942cdf664347e85e668ee910694c15cf1855 (diff) | |
| download | bcm5719-llvm-e78ea03b24badc24bb1eec2c2bcccac5c8a1bd07.tar.gz bcm5719-llvm-e78ea03b24badc24bb1eec2c2bcccac5c8a1bd07.zip | |
Replace linalg.for by loop.for
With the introduction of the Loop dialect, uses of the `linalg.for` operation can now be subsumed 1-to-1 by `loop.for`.
This CL performs the replacement and tests are updated accordingly.
PiperOrigin-RevId: 258322565
Diffstat (limited to 'mlir/lib/Conversion/LoopsToGPU')
| -rw-r--r-- | mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp | 59 | ||||
| -rw-r--r-- | mlir/lib/Conversion/LoopsToGPU/LoopsToGPUPass.cpp | 9 |
2 files changed, 37 insertions, 31 deletions
diff --git a/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp b/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp index 96ac947a1e0..ac164ab816f 100644 --- a/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp +++ b/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp @@ -23,10 +23,10 @@ #include "mlir/Conversion/LoopsToGPU/LoopsToGPU.h" #include "mlir/AffineOps/AffineOps.h" +#include "mlir/Dialect/LoopOps/LoopOps.h" #include "mlir/GPU/GPUDialect.h" #include "mlir/IR/AffineExpr.h" #include "mlir/IR/Builders.h" -#include "mlir/Linalg/IR/LinalgOps.h" #include "mlir/StandardOps/Ops.h" #include "mlir/Transforms/LowerAffine.h" #include "mlir/Transforms/RegionUtils.h" @@ -36,6 +36,7 @@ #define DEBUG_TYPE "loops-to-gpu" using namespace mlir; +using namespace mlir::loop; // Extract an indexed value from KernelDim3. static Value *getDim3Value(const gpu::KernelDim3 &dim3, unsigned pos) { @@ -56,8 +57,8 @@ static Value *getDim3Value(const gpu::KernelDim3 &dim3, unsigned pos) { static Operation::operand_range getLowerBoundOperands(AffineForOp forOp) { return forOp.getLowerBoundOperands(); } -static SmallVector<Value *, 1> getLowerBoundOperands(linalg::ForOp forOp) { - SmallVector<Value *, 1> bounds(1, forOp.getLowerBound()); +static SmallVector<Value *, 1> getLowerBoundOperands(ForOp forOp) { + SmallVector<Value *, 1> bounds(1, forOp.lowerBound()); return bounds; } @@ -65,8 +66,8 @@ static SmallVector<Value *, 1> getLowerBoundOperands(linalg::ForOp forOp) { static Operation::operand_range getUpperBoundOperands(AffineForOp forOp) { return forOp.getUpperBoundOperands(); } -static SmallVector<Value *, 1> getUpperBoundOperands(linalg::ForOp forOp) { - SmallVector<Value *, 1> bounds(1, forOp.getUpperBound()); +static SmallVector<Value *, 1> getUpperBoundOperands(ForOp forOp) { + SmallVector<Value *, 1> bounds(1, forOp.upperBound()); return bounds; } @@ -75,17 +76,15 @@ static SmallVector<Value *, 1> getUpperBoundOperands(linalg::ForOp forOp) { static Value *getOrCreateStep(AffineForOp forOp, OpBuilder &builder) { return builder.create<ConstantIndexOp>(forOp.getLoc(), forOp.getStep()); } -static Value *getOrCreateStep(linalg::ForOp forOp, OpBuilder &) { - return forOp.getStep(); -} +static Value *getOrCreateStep(ForOp forOp, OpBuilder &) { return forOp.step(); } // Get a Value for the loop lower bound. If the value requires computation, // materialize the instructions using builder. static Value *getOrEmitLowerBound(AffineForOp forOp, OpBuilder &builder) { return lowerAffineLowerBound(forOp, builder); } -static Value *getOrEmitLowerBound(linalg::ForOp forOp, OpBuilder &) { - return forOp.getLowerBound(); +static Value *getOrEmitLowerBound(ForOp forOp, OpBuilder &) { + return forOp.lowerBound(); } // Get a Value for the loop upper bound. If the value requires computation, @@ -93,10 +92,16 @@ static Value *getOrEmitLowerBound(linalg::ForOp forOp, OpBuilder &) { static Value *getOrEmitUpperBound(AffineForOp forOp, OpBuilder &builder) { return lowerAffineUpperBound(forOp, builder); } -static Value *getOrEmitUpperBound(linalg::ForOp forOp, OpBuilder &) { - return forOp.getUpperBound(); +static Value *getOrEmitUpperBound(ForOp forOp, OpBuilder &) { + return forOp.upperBound(); } +// TODO(ntv): uniformize back once AffineForOp is in ODS. +static Region &getRegion(ForOp op) { return op.region(); } +static Region &getRegion(AffineForOp op) { return op.getRegion(); } +static Block *getBody(ForOp op) { return op.body(); } +static Block *getBody(AffineForOp op) { return op.getBody(); } + // Check the structure of the loop nest: // - there are enough loops to map to numBlockDims + numThreadDims; // - the loops are perfectly nested; @@ -122,9 +127,9 @@ LogicalResult checkLoopNestMappable(OpTy forOp, unsigned numBlockDims, } OpTy currentLoop = forOp; - Region &limit = forOp.getRegion(); + Region &limit = getRegion(forOp); for (unsigned i = 0, e = numBlockDims + numThreadDims; i < e; ++i) { - Operation *nested = ¤tLoop.getBody()->front(); + Operation *nested = &getBody(currentLoop)->front(); if (!areValuesDefinedAbove(getLowerBoundOperands(currentLoop), limit) || !areValuesDefinedAbove(getUpperBoundOperands(currentLoop), limit)) return currentLoop.emitError( @@ -136,9 +141,9 @@ LogicalResult checkLoopNestMappable(OpTy forOp, unsigned numBlockDims, if (i == e - 1) break; - auto begin = currentLoop.getBody()->begin(), - end = currentLoop.getBody()->end(); - if (currentLoop.getBody()->empty() || std::next(begin, 2) != end) + auto begin = getBody(currentLoop)->begin(), + end = getBody(currentLoop)->end(); + if (getBody(currentLoop)->empty() || std::next(begin, 2) != end) return currentLoop.emitError( "expected perfectly nested loops in the body"); @@ -211,7 +216,7 @@ Optional<OpTy> LoopToGpuConverter::collectBounds(OpTy forOp, steps.push_back(step); if (i != numLoops - 1) - currentLoop = cast<OpTy>(¤tLoop.getBody()->front()); + currentLoop = cast<OpTy>(&getBody(currentLoop)->front()); } return currentLoop; } @@ -243,7 +248,7 @@ void LoopToGpuConverter::createLaunch(OpTy rootForOp, OpTy innermostForOp, // Still assuming perfect nesting so there are no values other than induction // variables that are defined in one loop and used in deeper loops. llvm::SetVector<Value *> valuesToForwardSet; - getUsedValuesDefinedAbove(innermostForOp.getRegion(), rootForOp.getRegion(), + getUsedValuesDefinedAbove(getRegion(innermostForOp), getRegion(rootForOp), valuesToForwardSet); auto valuesToForward = valuesToForwardSet.takeVector(); auto originallyForwardedValues = valuesToForward.size(); @@ -258,14 +263,14 @@ void LoopToGpuConverter::createLaunch(OpTy rootForOp, OpTy innermostForOp, // gpu return and move the operations from the loop body block to the gpu // launch body block. Do not move the entire block because of the difference // in block arguments. - Operation &terminator = innermostForOp.getBody()->back(); + Operation &terminator = getBody(innermostForOp)->back(); Location terminatorLoc = terminator.getLoc(); terminator.erase(); - builder.setInsertionPointToEnd(innermostForOp.getBody()); + builder.setInsertionPointToEnd(getBody(innermostForOp)); builder.create<gpu::Return>(terminatorLoc); launchOp.getBody().front().getOperations().splice( launchOp.getBody().front().begin(), - innermostForOp.getBody()->getOperations()); + getBody(innermostForOp)->getOperations()); // Remap the loop iterators to use block/thread identifiers instead. Loops // may iterate from LB with step S whereas GPU thread/block ids always iterate @@ -328,11 +333,11 @@ static LogicalResult convertLoopNestToGPULaunch(OpTy forOp, LogicalResult mlir::convertAffineLoopNestToGPULaunch(AffineForOp forOp, unsigned numBlockDims, unsigned numThreadDims) { - return convertLoopNestToGPULaunch(forOp, numBlockDims, numThreadDims); + return ::convertLoopNestToGPULaunch(forOp, numBlockDims, numThreadDims); } -LogicalResult mlir::convertLinalgLoopNestToGPULaunch(linalg::ForOp forOp, - unsigned numBlockDims, - unsigned numThreadDims) { - return convertLoopNestToGPULaunch(forOp, numBlockDims, numThreadDims); +LogicalResult mlir::convertLoopNestToGPULaunch(ForOp forOp, + unsigned numBlockDims, + unsigned numThreadDims) { + return ::convertLoopNestToGPULaunch(forOp, numBlockDims, numThreadDims); } diff --git a/mlir/lib/Conversion/LoopsToGPU/LoopsToGPUPass.cpp b/mlir/lib/Conversion/LoopsToGPU/LoopsToGPUPass.cpp index 13e4171033e..7c785b5c995 100644 --- a/mlir/lib/Conversion/LoopsToGPU/LoopsToGPUPass.cpp +++ b/mlir/lib/Conversion/LoopsToGPU/LoopsToGPUPass.cpp @@ -18,7 +18,7 @@ #include "mlir/Conversion/LoopsToGPU/LoopsToGPUPass.h" #include "mlir/AffineOps/AffineOps.h" #include "mlir/Conversion/LoopsToGPU/LoopsToGPU.h" -#include "mlir/Linalg/IR/LinalgOps.h" +#include "mlir/Dialect/LoopOps/LoopOps.h" #include "mlir/Pass/Pass.h" #include "llvm/Support/CommandLine.h" @@ -26,6 +26,7 @@ #define PASS_NAME "convert-loops-to-gpu" using namespace mlir; +using namespace mlir::loop; static llvm::cl::OptionCategory clOptionsCategory(PASS_NAME " options"); static llvm::cl::opt<unsigned> @@ -52,9 +53,9 @@ struct ForLoopMapper : public FunctionPass<ForLoopMapper> { if (failed(convertAffineLoopNestToGPULaunch(forOp, numBlockDims, numThreadDims))) signalPassFailure(); - } else if (auto forOp = dyn_cast<linalg::ForOp>(&op)) { - if (failed(convertLinalgLoopNestToGPULaunch(forOp, numBlockDims, - numThreadDims))) + } else if (auto forOp = dyn_cast<ForOp>(&op)) { + if (failed(convertLoopNestToGPULaunch(forOp, numBlockDims, + numThreadDims))) signalPassFailure(); } } |

