summaryrefslogtreecommitdiffstats
path: root/mlir/lib/Transforms/LoopUnrollAndJam.cpp
diff options
context:
space:
mode:
authorUday Bondhugula <udayb@iisc.ac.in>2019-08-28 23:42:17 -0700
committerA. Unique TensorFlower <gardener@tensorflow.org>2019-08-28 23:42:50 -0700
commitbc2a543225abdd3876ae0b31fac89a3b31872d86 (patch)
tree578e1fa8cfbfe392ad7639a2027667513ba730f7 /mlir/lib/Transforms/LoopUnrollAndJam.cpp
parente90542c03b998800f61272b56a12f2f98c47aca7 (diff)
downloadbcm5719-llvm-bc2a543225abdd3876ae0b31fac89a3b31872d86.tar.gz
bcm5719-llvm-bc2a543225abdd3876ae0b31fac89a3b31872d86.zip
fix loop unroll and jam - operand mapping - imperfect nest case
- fix operand mapping while cloning sub-blocks to jam - was incorrect for imperfect nests where def/use was across sub-blocks - strengthen/generalize the first test case to cover the previously missed scenario - clean up the other cases while on this. Previously, unroll-jamming the following nest ``` affine.for %arg0 = 0 to 2048 { %0 = alloc() : memref<512x10xf32> affine.for %arg1 = 0 to 10 { %1 = affine.load %0[%arg0, %arg1] : memref<512x10xf32> } dealloc %0 : memref<512x10xf32> } ``` would yield ``` %0 = alloc() : memref<512x10xf32> %1 = affine.apply #map0(%arg0) %2 = alloc() : memref<512x10xf32> affine.for %arg1 = 0 to 10 { %4 = affine.load %0[%arg0, %arg1] : memref<512x10xf32> %5 = affine.apply #map0(%arg0) %6 = affine.load %0[%5, %arg1] : memref<512x10xf32> } dealloc %0 : memref<512x10xf32> %3 = affine.apply #map0(%arg0) dealloc %0 : memref<512x10xf32> ``` instead of ``` module { affine.for %arg0 = 0 to 2048 step 2 { %0 = alloc() : memref<512x10xf32> %1 = affine.apply #map0(%arg0) %2 = alloc() : memref<512x10xf32> affine.for %arg1 = 0 to 10 { %4 = affine.load %0[%arg0, %arg1] : memref<512x10xf32> %5 = affine.apply #map0(%arg0) %6 = affine.load %2[%5, %arg1] : memref<512x10xf32> } dealloc %0 : memref<512x10xf32> %3 = affine.apply #map0(%arg0) dealloc %2 : memref<512x10xf32> } ``` Signed-off-by: Uday Bondhugula <uday@polymagelabs.com> Closes tensorflow/mlir#98 COPYBARA_INTEGRATE_REVIEW=https://github.com/tensorflow/mlir/pull/98 from bondhugula:ujam ddbc853f69b5608b3e8ff9b5ac1f6a5a0bb315a4 PiperOrigin-RevId: 266073460
Diffstat (limited to 'mlir/lib/Transforms/LoopUnrollAndJam.cpp')
-rw-r--r--mlir/lib/Transforms/LoopUnrollAndJam.cpp16
1 files changed, 8 insertions, 8 deletions
diff --git a/mlir/lib/Transforms/LoopUnrollAndJam.cpp b/mlir/lib/Transforms/LoopUnrollAndJam.cpp
index 3e92ad739e8..b6b2f3d4ad7 100644
--- a/mlir/lib/Transforms/LoopUnrollAndJam.cpp
+++ b/mlir/lib/Transforms/LoopUnrollAndJam.cpp
@@ -209,14 +209,14 @@ LogicalResult mlir::loopUnrollJamByFactor(AffineForOp forOp,
forOp.setStep(step * unrollJamFactor);
auto *forOpIV = forOp.getInductionVar();
- for (auto &subBlock : subBlocks) {
- // Builder to insert unroll-jammed bodies. Insert right at the end of
- // sub-block.
- OpBuilder builder(subBlock.first->getBlock(), std::next(subBlock.second));
-
- // Unroll and jam (appends unrollJamFactor-1 additional copies).
- for (unsigned i = 1; i < unrollJamFactor; i++) {
- BlockAndValueMapping operandMapping;
+ // Unroll and jam (appends unrollJamFactor-1 additional copies).
+ for (unsigned i = 1; i < unrollJamFactor; i++) {
+ // Operand map persists across all sub-blocks.
+ BlockAndValueMapping operandMapping;
+ for (auto &subBlock : subBlocks) {
+ // Builder to insert unroll-jammed bodies. Insert right at the end of
+ // sub-block.
+ OpBuilder builder(subBlock.first->getBlock(), std::next(subBlock.second));
// If the induction variable is used, create a remapping to the value for
// this unrolled instance.
OpenPOWER on IntegriCloud