summaryrefslogtreecommitdiffstats
path: root/mlir/lib/Conversion/LoopsToGPU
diff options
context:
space:
mode:
authorNicolas Vasilache <ntv@google.com>2019-07-16 01:46:23 -0700
committerMehdi Amini <joker.eph@gmail.com>2019-07-16 13:44:57 -0700
commite78ea03b24badc24bb1eec2c2bcccac5c8a1bd07 (patch)
treebdd70056fd618fce4a0b44f34bd905c0f639c005 /mlir/lib/Conversion/LoopsToGPU
parentdec1942cdf664347e85e668ee910694c15cf1855 (diff)
downloadbcm5719-llvm-e78ea03b24badc24bb1eec2c2bcccac5c8a1bd07.tar.gz
bcm5719-llvm-e78ea03b24badc24bb1eec2c2bcccac5c8a1bd07.zip
Replace linalg.for by loop.for
With the introduction of the Loop dialect, uses of the `linalg.for` operation can now be subsumed 1-to-1 by `loop.for`. This CL performs the replacement and tests are updated accordingly. PiperOrigin-RevId: 258322565
Diffstat (limited to 'mlir/lib/Conversion/LoopsToGPU')
-rw-r--r--mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp59
-rw-r--r--mlir/lib/Conversion/LoopsToGPU/LoopsToGPUPass.cpp9
2 files changed, 37 insertions, 31 deletions
diff --git a/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp b/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp
index 96ac947a1e0..ac164ab816f 100644
--- a/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp
+++ b/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp
@@ -23,10 +23,10 @@
#include "mlir/Conversion/LoopsToGPU/LoopsToGPU.h"
#include "mlir/AffineOps/AffineOps.h"
+#include "mlir/Dialect/LoopOps/LoopOps.h"
#include "mlir/GPU/GPUDialect.h"
#include "mlir/IR/AffineExpr.h"
#include "mlir/IR/Builders.h"
-#include "mlir/Linalg/IR/LinalgOps.h"
#include "mlir/StandardOps/Ops.h"
#include "mlir/Transforms/LowerAffine.h"
#include "mlir/Transforms/RegionUtils.h"
@@ -36,6 +36,7 @@
#define DEBUG_TYPE "loops-to-gpu"
using namespace mlir;
+using namespace mlir::loop;
// Extract an indexed value from KernelDim3.
static Value *getDim3Value(const gpu::KernelDim3 &dim3, unsigned pos) {
@@ -56,8 +57,8 @@ static Value *getDim3Value(const gpu::KernelDim3 &dim3, unsigned pos) {
static Operation::operand_range getLowerBoundOperands(AffineForOp forOp) {
return forOp.getLowerBoundOperands();
}
-static SmallVector<Value *, 1> getLowerBoundOperands(linalg::ForOp forOp) {
- SmallVector<Value *, 1> bounds(1, forOp.getLowerBound());
+static SmallVector<Value *, 1> getLowerBoundOperands(ForOp forOp) {
+ SmallVector<Value *, 1> bounds(1, forOp.lowerBound());
return bounds;
}
@@ -65,8 +66,8 @@ static SmallVector<Value *, 1> getLowerBoundOperands(linalg::ForOp forOp) {
static Operation::operand_range getUpperBoundOperands(AffineForOp forOp) {
return forOp.getUpperBoundOperands();
}
-static SmallVector<Value *, 1> getUpperBoundOperands(linalg::ForOp forOp) {
- SmallVector<Value *, 1> bounds(1, forOp.getUpperBound());
+static SmallVector<Value *, 1> getUpperBoundOperands(ForOp forOp) {
+ SmallVector<Value *, 1> bounds(1, forOp.upperBound());
return bounds;
}
@@ -75,17 +76,15 @@ static SmallVector<Value *, 1> getUpperBoundOperands(linalg::ForOp forOp) {
static Value *getOrCreateStep(AffineForOp forOp, OpBuilder &builder) {
return builder.create<ConstantIndexOp>(forOp.getLoc(), forOp.getStep());
}
-static Value *getOrCreateStep(linalg::ForOp forOp, OpBuilder &) {
- return forOp.getStep();
-}
+static Value *getOrCreateStep(ForOp forOp, OpBuilder &) { return forOp.step(); }
// Get a Value for the loop lower bound. If the value requires computation,
// materialize the instructions using builder.
static Value *getOrEmitLowerBound(AffineForOp forOp, OpBuilder &builder) {
return lowerAffineLowerBound(forOp, builder);
}
-static Value *getOrEmitLowerBound(linalg::ForOp forOp, OpBuilder &) {
- return forOp.getLowerBound();
+static Value *getOrEmitLowerBound(ForOp forOp, OpBuilder &) {
+ return forOp.lowerBound();
}
// Get a Value for the loop upper bound. If the value requires computation,
@@ -93,10 +92,16 @@ static Value *getOrEmitLowerBound(linalg::ForOp forOp, OpBuilder &) {
static Value *getOrEmitUpperBound(AffineForOp forOp, OpBuilder &builder) {
return lowerAffineUpperBound(forOp, builder);
}
-static Value *getOrEmitUpperBound(linalg::ForOp forOp, OpBuilder &) {
- return forOp.getUpperBound();
+static Value *getOrEmitUpperBound(ForOp forOp, OpBuilder &) {
+ return forOp.upperBound();
}
+// TODO(ntv): uniformize back once AffineForOp is in ODS.
+static Region &getRegion(ForOp op) { return op.region(); }
+static Region &getRegion(AffineForOp op) { return op.getRegion(); }
+static Block *getBody(ForOp op) { return op.body(); }
+static Block *getBody(AffineForOp op) { return op.getBody(); }
+
// Check the structure of the loop nest:
// - there are enough loops to map to numBlockDims + numThreadDims;
// - the loops are perfectly nested;
@@ -122,9 +127,9 @@ LogicalResult checkLoopNestMappable(OpTy forOp, unsigned numBlockDims,
}
OpTy currentLoop = forOp;
- Region &limit = forOp.getRegion();
+ Region &limit = getRegion(forOp);
for (unsigned i = 0, e = numBlockDims + numThreadDims; i < e; ++i) {
- Operation *nested = &currentLoop.getBody()->front();
+ Operation *nested = &getBody(currentLoop)->front();
if (!areValuesDefinedAbove(getLowerBoundOperands(currentLoop), limit) ||
!areValuesDefinedAbove(getUpperBoundOperands(currentLoop), limit))
return currentLoop.emitError(
@@ -136,9 +141,9 @@ LogicalResult checkLoopNestMappable(OpTy forOp, unsigned numBlockDims,
if (i == e - 1)
break;
- auto begin = currentLoop.getBody()->begin(),
- end = currentLoop.getBody()->end();
- if (currentLoop.getBody()->empty() || std::next(begin, 2) != end)
+ auto begin = getBody(currentLoop)->begin(),
+ end = getBody(currentLoop)->end();
+ if (getBody(currentLoop)->empty() || std::next(begin, 2) != end)
return currentLoop.emitError(
"expected perfectly nested loops in the body");
@@ -211,7 +216,7 @@ Optional<OpTy> LoopToGpuConverter::collectBounds(OpTy forOp,
steps.push_back(step);
if (i != numLoops - 1)
- currentLoop = cast<OpTy>(&currentLoop.getBody()->front());
+ currentLoop = cast<OpTy>(&getBody(currentLoop)->front());
}
return currentLoop;
}
@@ -243,7 +248,7 @@ void LoopToGpuConverter::createLaunch(OpTy rootForOp, OpTy innermostForOp,
// Still assuming perfect nesting so there are no values other than induction
// variables that are defined in one loop and used in deeper loops.
llvm::SetVector<Value *> valuesToForwardSet;
- getUsedValuesDefinedAbove(innermostForOp.getRegion(), rootForOp.getRegion(),
+ getUsedValuesDefinedAbove(getRegion(innermostForOp), getRegion(rootForOp),
valuesToForwardSet);
auto valuesToForward = valuesToForwardSet.takeVector();
auto originallyForwardedValues = valuesToForward.size();
@@ -258,14 +263,14 @@ void LoopToGpuConverter::createLaunch(OpTy rootForOp, OpTy innermostForOp,
// gpu return and move the operations from the loop body block to the gpu
// launch body block. Do not move the entire block because of the difference
// in block arguments.
- Operation &terminator = innermostForOp.getBody()->back();
+ Operation &terminator = getBody(innermostForOp)->back();
Location terminatorLoc = terminator.getLoc();
terminator.erase();
- builder.setInsertionPointToEnd(innermostForOp.getBody());
+ builder.setInsertionPointToEnd(getBody(innermostForOp));
builder.create<gpu::Return>(terminatorLoc);
launchOp.getBody().front().getOperations().splice(
launchOp.getBody().front().begin(),
- innermostForOp.getBody()->getOperations());
+ getBody(innermostForOp)->getOperations());
// Remap the loop iterators to use block/thread identifiers instead. Loops
// may iterate from LB with step S whereas GPU thread/block ids always iterate
@@ -328,11 +333,11 @@ static LogicalResult convertLoopNestToGPULaunch(OpTy forOp,
LogicalResult mlir::convertAffineLoopNestToGPULaunch(AffineForOp forOp,
unsigned numBlockDims,
unsigned numThreadDims) {
- return convertLoopNestToGPULaunch(forOp, numBlockDims, numThreadDims);
+ return ::convertLoopNestToGPULaunch(forOp, numBlockDims, numThreadDims);
}
-LogicalResult mlir::convertLinalgLoopNestToGPULaunch(linalg::ForOp forOp,
- unsigned numBlockDims,
- unsigned numThreadDims) {
- return convertLoopNestToGPULaunch(forOp, numBlockDims, numThreadDims);
+LogicalResult mlir::convertLoopNestToGPULaunch(ForOp forOp,
+ unsigned numBlockDims,
+ unsigned numThreadDims) {
+ return ::convertLoopNestToGPULaunch(forOp, numBlockDims, numThreadDims);
}
diff --git a/mlir/lib/Conversion/LoopsToGPU/LoopsToGPUPass.cpp b/mlir/lib/Conversion/LoopsToGPU/LoopsToGPUPass.cpp
index 13e4171033e..7c785b5c995 100644
--- a/mlir/lib/Conversion/LoopsToGPU/LoopsToGPUPass.cpp
+++ b/mlir/lib/Conversion/LoopsToGPU/LoopsToGPUPass.cpp
@@ -18,7 +18,7 @@
#include "mlir/Conversion/LoopsToGPU/LoopsToGPUPass.h"
#include "mlir/AffineOps/AffineOps.h"
#include "mlir/Conversion/LoopsToGPU/LoopsToGPU.h"
-#include "mlir/Linalg/IR/LinalgOps.h"
+#include "mlir/Dialect/LoopOps/LoopOps.h"
#include "mlir/Pass/Pass.h"
#include "llvm/Support/CommandLine.h"
@@ -26,6 +26,7 @@
#define PASS_NAME "convert-loops-to-gpu"
using namespace mlir;
+using namespace mlir::loop;
static llvm::cl::OptionCategory clOptionsCategory(PASS_NAME " options");
static llvm::cl::opt<unsigned>
@@ -52,9 +53,9 @@ struct ForLoopMapper : public FunctionPass<ForLoopMapper> {
if (failed(convertAffineLoopNestToGPULaunch(forOp, numBlockDims,
numThreadDims)))
signalPassFailure();
- } else if (auto forOp = dyn_cast<linalg::ForOp>(&op)) {
- if (failed(convertLinalgLoopNestToGPULaunch(forOp, numBlockDims,
- numThreadDims)))
+ } else if (auto forOp = dyn_cast<ForOp>(&op)) {
+ if (failed(convertLoopNestToGPULaunch(forOp, numBlockDims,
+ numThreadDims)))
signalPassFailure();
}
}
OpenPOWER on IntegriCloud