Replace linalg.for by loop.for

With the introduction of the Loop dialect, uses of the `linalg.for` operation can now be subsumed 1-to-1 by `loop.for`. This CL performs the replacement and tests are updated accordingly. PiperOrigin-RevId: 258322565
author: Nicolas Vasilache <ntv@google.com> 2019-07-16 01:46:23 -0700
committer: Mehdi Amini <joker.eph@gmail.com> 2019-07-16 13:44:57 -0700
commit: e78ea03b24badc24bb1eec2c2bcccac5c8a1bd07 (patch)
tree: bdd70056fd618fce4a0b44f34bd905c0f639c005 /mlir/lib/Conversion/LoopsToGPU
parent: dec1942cdf664347e85e668ee910694c15cf1855 (diff)
download: bcm5719-llvm-e78ea03b24badc24bb1eec2c2bcccac5c8a1bd07.tar.gz
bcm5719-llvm-e78ea03b24badc24bb1eec2c2bcccac5c8a1bd07.zip
2 files changed, 37 insertions, 31 deletions
diff --git a/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp b/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp
index 96ac947a1e0..ac164ab816f 100644
--- a/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp
+++ b/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp
@@ -23,10 +23,10 @@
 
 #include "mlir/Conversion/LoopsToGPU/LoopsToGPU.h"
 #include "mlir/AffineOps/AffineOps.h"
+#include "mlir/Dialect/LoopOps/LoopOps.h"
 #include "mlir/GPU/GPUDialect.h"
 #include "mlir/IR/AffineExpr.h"
 #include "mlir/IR/Builders.h"
-#include "mlir/Linalg/IR/LinalgOps.h"
 #include "mlir/StandardOps/Ops.h"
 #include "mlir/Transforms/LowerAffine.h"
 #include "mlir/Transforms/RegionUtils.h"
@@ -36,6 +36,7 @@
 #define DEBUG_TYPE "loops-to-gpu"
 
 using namespace mlir;
+using namespace mlir::loop;
 
 // Extract an indexed value from KernelDim3.
 static Value *getDim3Value(const gpu::KernelDim3 &dim3, unsigned pos) {
@@ -56,8 +57,8 @@ static Value *getDim3Value(const gpu::KernelDim3 &dim3, unsigned pos) {
 static Operation::operand_range getLowerBoundOperands(AffineForOp forOp) {
   return forOp.getLowerBoundOperands();
 }
-static SmallVector<Value *, 1> getLowerBoundOperands(linalg::ForOp forOp) {
-  SmallVector<Value *, 1> bounds(1, forOp.getLowerBound());
+static SmallVector<Value *, 1> getLowerBoundOperands(ForOp forOp) {
+  SmallVector<Value *, 1> bounds(1, forOp.lowerBound());
   return bounds;
 }
 
@@ -65,8 +66,8 @@ static SmallVector<Value *, 1> getLowerBoundOperands(linalg::ForOp forOp) {
 static Operation::operand_range getUpperBoundOperands(AffineForOp forOp) {
   return forOp.getUpperBoundOperands();
 }
-static SmallVector<Value *, 1> getUpperBoundOperands(linalg::ForOp forOp) {
-  SmallVector<Value *, 1> bounds(1, forOp.getUpperBound());
+static SmallVector<Value *, 1> getUpperBoundOperands(ForOp forOp) {
+  SmallVector<Value *, 1> bounds(1, forOp.upperBound());
   return bounds;
 }
 
@@ -75,17 +76,15 @@ static SmallVector<Value *, 1> getUpperBoundOperands(linalg::ForOp forOp) {
 static Value *getOrCreateStep(AffineForOp forOp, OpBuilder &builder) {
   return builder.create<ConstantIndexOp>(forOp.getLoc(), forOp.getStep());
 }
-static Value *getOrCreateStep(linalg::ForOp forOp, OpBuilder &) {
-  return forOp.getStep();
-}
+static Value *getOrCreateStep(ForOp forOp, OpBuilder &) { return forOp.step(); }
 
 // Get a Value for the loop lower bound.  If the value requires computation,
 // materialize the instructions using builder.
 static Value *getOrEmitLowerBound(AffineForOp forOp, OpBuilder &builder) {
   return lowerAffineLowerBound(forOp, builder);
 }
-static Value *getOrEmitLowerBound(linalg::ForOp forOp, OpBuilder &) {
-  return forOp.getLowerBound();
+static Value *getOrEmitLowerBound(ForOp forOp, OpBuilder &) {
+  return forOp.lowerBound();
 }
 
 // Get a Value for the loop upper bound.  If the value requires computation,
@@ -93,10 +92,16 @@ static Value *getOrEmitLowerBound(linalg::ForOp forOp, OpBuilder &) {
 static Value *getOrEmitUpperBound(AffineForOp forOp, OpBuilder &builder) {
   return lowerAffineUpperBound(forOp, builder);
 }
-static Value *getOrEmitUpperBound(linalg::ForOp forOp, OpBuilder &) {
-  return forOp.getUpperBound();
+static Value *getOrEmitUpperBound(ForOp forOp, OpBuilder &) {
+  return forOp.upperBound();
 }
 
+// TODO(ntv): uniformize back once AffineForOp is in ODS.
+static Region &getRegion(ForOp op) { return op.region(); }
+static Region &getRegion(AffineForOp op) { return op.getRegion(); }
+static Block *getBody(ForOp op) { return op.body(); }
+static Block *getBody(AffineForOp op) { return op.getBody(); }
+
 // Check the structure of the loop nest:
 //   - there are enough loops to map to numBlockDims + numThreadDims;
 //   - the loops are perfectly nested;
@@ -122,9 +127,9 @@ LogicalResult checkLoopNestMappable(OpTy forOp, unsigned numBlockDims,
   }
 
   OpTy currentLoop = forOp;
-  Region &limit = forOp.getRegion();
+  Region &limit = getRegion(forOp);
   for (unsigned i = 0, e = numBlockDims + numThreadDims; i < e; ++i) {
-    Operation *nested = &currentLoop.getBody()->front();
+    Operation *nested = &getBody(currentLoop)->front();
     if (!areValuesDefinedAbove(getLowerBoundOperands(currentLoop), limit) ||
         !areValuesDefinedAbove(getUpperBoundOperands(currentLoop), limit))
       return currentLoop.emitError(
@@ -136,9 +141,9 @@ LogicalResult checkLoopNestMappable(OpTy forOp, unsigned numBlockDims,
     if (i == e - 1)
       break;
 
-    auto begin = currentLoop.getBody()->begin(),
-         end = currentLoop.getBody()->end();
-    if (currentLoop.getBody()->empty() || std::next(begin, 2) != end)
+    auto begin = getBody(currentLoop)->begin(),
+         end = getBody(currentLoop)->end();
+    if (getBody(currentLoop)->empty() || std::next(begin, 2) != end)
       return currentLoop.emitError(
           "expected perfectly nested loops in the body");
 
@@ -211,7 +216,7 @@ Optional<OpTy> LoopToGpuConverter::collectBounds(OpTy forOp,
     steps.push_back(step);
 
     if (i != numLoops - 1)
-      currentLoop = cast<OpTy>(&currentLoop.getBody()->front());
+      currentLoop = cast<OpTy>(&getBody(currentLoop)->front());
   }
   return currentLoop;
 }
@@ -243,7 +248,7 @@ void LoopToGpuConverter::createLaunch(OpTy rootForOp, OpTy innermostForOp,
   // Still assuming perfect nesting so there are no values other than induction
   // variables that are defined in one loop and used in deeper loops.
   llvm::SetVector<Value *> valuesToForwardSet;
-  getUsedValuesDefinedAbove(innermostForOp.getRegion(), rootForOp.getRegion(),
+  getUsedValuesDefinedAbove(getRegion(innermostForOp), getRegion(rootForOp),
                             valuesToForwardSet);
   auto valuesToForward = valuesToForwardSet.takeVector();
   auto originallyForwardedValues = valuesToForward.size();
@@ -258,14 +263,14 @@ void LoopToGpuConverter::createLaunch(OpTy rootForOp, OpTy innermostForOp,
   // gpu return and move the operations from the loop body block to the gpu
   // launch body block.  Do not move the entire block because of the difference
   // in block arguments.
-  Operation &terminator = innermostForOp.getBody()->back();
+  Operation &terminator = getBody(innermostForOp)->back();
   Location terminatorLoc = terminator.getLoc();
   terminator.erase();
-  builder.setInsertionPointToEnd(innermostForOp.getBody());
+  builder.setInsertionPointToEnd(getBody(innermostForOp));
   builder.create<gpu::Return>(terminatorLoc);
   launchOp.getBody().front().getOperations().splice(
       launchOp.getBody().front().begin(),
-      innermostForOp.getBody()->getOperations());
+      getBody(innermostForOp)->getOperations());
 
   // Remap the loop iterators to use block/thread identifiers instead.  Loops
   // may iterate from LB with step S whereas GPU thread/block ids always iterate
@@ -328,11 +333,11 @@ static LogicalResult convertLoopNestToGPULaunch(OpTy forOp,
 LogicalResult mlir::convertAffineLoopNestToGPULaunch(AffineForOp forOp,
                                                      unsigned numBlockDims,
                                                      unsigned numThreadDims) {
-  return convertLoopNestToGPULaunch(forOp, numBlockDims, numThreadDims);
+  return ::convertLoopNestToGPULaunch(forOp, numBlockDims, numThreadDims);
 }
 
-LogicalResult mlir::convertLinalgLoopNestToGPULaunch(linalg::ForOp forOp,
-                                                     unsigned numBlockDims,
-                                                     unsigned numThreadDims) {
-  return convertLoopNestToGPULaunch(forOp, numBlockDims, numThreadDims);
+LogicalResult mlir::convertLoopNestToGPULaunch(ForOp forOp,
+                                               unsigned numBlockDims,
+                                               unsigned numThreadDims) {
+  return ::convertLoopNestToGPULaunch(forOp, numBlockDims, numThreadDims);
 }
diff --git a/mlir/lib/Conversion/LoopsToGPU/LoopsToGPUPass.cpp b/mlir/lib/Conversion/LoopsToGPU/LoopsToGPUPass.cpp
index 13e4171033e..7c785b5c995 100644
--- a/mlir/lib/Conversion/LoopsToGPU/LoopsToGPUPass.cpp
+++ b/mlir/lib/Conversion/LoopsToGPU/LoopsToGPUPass.cpp
@@ -18,7 +18,7 @@
 #include "mlir/Conversion/LoopsToGPU/LoopsToGPUPass.h"
 #include "mlir/AffineOps/AffineOps.h"
 #include "mlir/Conversion/LoopsToGPU/LoopsToGPU.h"
-#include "mlir/Linalg/IR/LinalgOps.h"
+#include "mlir/Dialect/LoopOps/LoopOps.h"
 #include "mlir/Pass/Pass.h"
 
 #include "llvm/Support/CommandLine.h"
@@ -26,6 +26,7 @@
 #define PASS_NAME "convert-loops-to-gpu"
 
 using namespace mlir;
+using namespace mlir::loop;
 
 static llvm::cl::OptionCategory clOptionsCategory(PASS_NAME " options");
 static llvm::cl::opt<unsigned>
@@ -52,9 +53,9 @@ struct ForLoopMapper : public FunctionPass<ForLoopMapper> {
           if (failed(convertAffineLoopNestToGPULaunch(forOp, numBlockDims,
                                                       numThreadDims)))
             signalPassFailure();
-        } else if (auto forOp = dyn_cast<linalg::ForOp>(&op)) {
-          if (failed(convertLinalgLoopNestToGPULaunch(forOp, numBlockDims,
-                                                      numThreadDims)))
+        } else if (auto forOp = dyn_cast<ForOp>(&op)) {
+          if (failed(convertLoopNestToGPULaunch(forOp, numBlockDims,
+                                                numThreadDims)))
             signalPassFailure();
         }
       }
author	Nicolas Vasilache <ntv@google.com>	2019-07-16 01:46:23 -0700
committer	Mehdi Amini <joker.eph@gmail.com>	2019-07-16 13:44:57 -0700
commit	e78ea03b24badc24bb1eec2c2bcccac5c8a1bd07 (patch)
tree	bdd70056fd618fce4a0b44f34bd905c0f639c005 /mlir/lib/Conversion/LoopsToGPU
parent	dec1942cdf664347e85e668ee910694c15cf1855 (diff)
download	bcm5719-llvm-e78ea03b24badc24bb1eec2c2bcccac5c8a1bd07.tar.gz bcm5719-llvm-e78ea03b24badc24bb1eec2c2bcccac5c8a1bd07.zip