Support lowering of imperfectly nested loops into GPU dialect.

The current lowering of loops to GPU only supports lowering of loop nests where the loops mapped to workgroups and workitems are perfectly nested. Here a new lowering is added to handle lowering of imperfectly nested loop body with the following properties 1) The loops partitioned to workgroups are perfectly nested. 2) The loop body of the inner most loop partitioned to workgroups can contain one or more loop nests that are to be partitioned across workitems. Each individual loops nests partitioned to workitems should also be perfectly nested. 3) The number of workgroups and workitems are not deduced from the loop bounds but are passed in by the caller of the lowering as values. 4) For statements within the perfectly nested loop nest partitioned across workgroups that are not loops, it is valid to have all threads execute that statement. This is NOT verified. PiperOrigin-RevId: 277958868
author: Mahesh Ravishankar <ravishankarm@google.com> 2019-11-01 10:51:33 -0700
committer: A. Unique TensorFlower <gardener@tensorflow.org> 2019-11-01 10:52:06 -0700
commit: 9cbbd8f4dfa47d84bd7531b255f065762b981fba (patch)
tree: 7830c46295312a3393d3e32bdfd8f444bc2ef218 /mlir/lib/Transforms/Utils/LoopUtils.cpp
parent: bd94a10c02a641e59c5ccfec143f728e13b516c2 (diff)
download: bcm5719-llvm-9cbbd8f4dfa47d84bd7531b255f065762b981fba.tar.gz
bcm5719-llvm-9cbbd8f4dfa47d84bd7531b255f065762b981fba.zip
1 files changed, 4 insertions, 3 deletions
diff --git a/mlir/lib/Transforms/Utils/LoopUtils.cpp b/mlir/lib/Transforms/Utils/LoopUtils.cpp
index e09d8c89b37..405116e72e7 100644
--- a/mlir/lib/Transforms/Utils/LoopUtils.cpp
+++ b/mlir/lib/Transforms/Utils/LoopUtils.cpp
@@ -1118,11 +1118,12 @@ void mlir::mapLoopToProcessorIds(loop::ForOp forOp,
   for (unsigned i = 1, e = processorId.size(); i < e; ++i)
     mul = b.create<AddIOp>(loc, b.create<MulIOp>(loc, mul, numProcessors[i]),
                            processorId[i]);
-  Value *lb = b.create<AddIOp>(loc, forOp.lowerBound(), mul);
+  Value *lb = b.create<AddIOp>(loc, forOp.lowerBound(),
+                               b.create<MulIOp>(loc, forOp.step(), mul));
   forOp.setLowerBound(lb);
 
-  Value *step = numProcessors.front();
-  for (auto *numProcs : numProcessors.drop_front())
+  Value *step = forOp.step();
+  for (auto *numProcs : numProcessors)
     step = b.create<MulIOp>(loc, step, numProcs);
   forOp.setStep(step);
 }
author	Mahesh Ravishankar <ravishankarm@google.com>	2019-11-01 10:51:33 -0700
committer	A. Unique TensorFlower <gardener@tensorflow.org>	2019-11-01 10:52:06 -0700
commit	9cbbd8f4dfa47d84bd7531b255f065762b981fba (patch)
tree	7830c46295312a3393d3e32bdfd8f444bc2ef218 /mlir/lib/Transforms/Utils/LoopUtils.cpp
parent	bd94a10c02a641e59c5ccfec143f728e13b516c2 (diff)
download	bcm5719-llvm-9cbbd8f4dfa47d84bd7531b255f065762b981fba.tar.gz bcm5719-llvm-9cbbd8f4dfa47d84bd7531b255f065762b981fba.zip