diff options
| author | Uday Bondhugula <bondhugula@google.com> | 2018-12-10 13:14:28 -0800 |
|---|---|---|
| committer | jpienaar <jpienaar@google.com> | 2019-03-29 14:24:53 -0700 |
| commit | d59a95a05c4fdf15a5b676d852f6b790a931494e (patch) | |
| tree | 40e72777a2099bf237976a33e4df11cea9367d36 /mlir | |
| parent | 6757fb151d17d75f56d452916e2acfea3345ef78 (diff) | |
| download | bcm5719-llvm-d59a95a05c4fdf15a5b676d852f6b790a931494e.tar.gz bcm5719-llvm-d59a95a05c4fdf15a5b676d852f6b790a931494e.zip | |
Fix missing check for dependent DMAs in pipeline-data-transfer
- adding a conservative check for now (TODO: use the dependence analysis pass
once the latter is extended to deal with DMA ops). resolve an existing bug on
a test case.
- update test cases
PiperOrigin-RevId: 224869526
Diffstat (limited to 'mlir')
| -rw-r--r-- | mlir/lib/Transforms/PipelineDataTransfer.cpp | 28 | ||||
| -rw-r--r-- | mlir/test/Transforms/pipeline-data-transfer.mlir | 33 |
2 files changed, 53 insertions, 8 deletions
diff --git a/mlir/lib/Transforms/PipelineDataTransfer.cpp b/mlir/lib/Transforms/PipelineDataTransfer.cpp index fc97aa8d2d2..a3455de2039 100644 --- a/mlir/lib/Transforms/PipelineDataTransfer.cpp +++ b/mlir/lib/Transforms/PipelineDataTransfer.cpp @@ -181,6 +181,19 @@ static void findMatchingStartFinishStmts( ForStmt *forStmt, SmallVectorImpl<std::pair<OperationStmt *, OperationStmt *>> &startWaitPairs) { + + // Collect outgoing DMA statements - needed to check for dependences below. + SmallVector<OpPointer<DmaStartOp>, 4> outgoingDmaOps; + for (auto &stmt : *forStmt) { + auto *opStmt = dyn_cast<OperationStmt>(&stmt); + if (!opStmt) + continue; + OpPointer<DmaStartOp> dmaStartOp; + if ((dmaStartOp = opStmt->dyn_cast<DmaStartOp>()) && + dmaStartOp->isSrcMemorySpaceFaster()) + outgoingDmaOps.push_back(dmaStartOp); + } + SmallVector<OperationStmt *, 4> dmaStartStmts, dmaFinishStmts; for (auto &stmt : *forStmt) { auto *opStmt = dyn_cast<OperationStmt>(&stmt); @@ -194,11 +207,22 @@ static void findMatchingStartFinishStmts( OpPointer<DmaStartOp> dmaStartOp; if (!(dmaStartOp = opStmt->dyn_cast<DmaStartOp>())) continue; - // Only DMAs incoming into higher memory spaces. - // TODO(bondhugula): outgoing DMAs. + // Only DMAs incoming into higher memory spaces are pipelined for now. + // TODO(bondhugula): handle outgoing DMA pipelining. if (!dmaStartOp->isDestMemorySpaceFaster()) continue; + // Check for dependence with outgoing DMAs. Doing this conservatively. + // TODO(andydavis,bondhugula): use the dependence analysis to check for + // dependences between an incoming and outgoing DMA in the same iteration. + auto it = outgoingDmaOps.begin(); + for (; it != outgoingDmaOps.end(); ++it) { + if ((*it)->getDstMemRef() == dmaStartOp->getSrcMemRef()) + break; + } + if (it != outgoingDmaOps.end()) + continue; + // We only double buffer if the buffer is not live out of loop. const MLValue *memref = cast<MLValue>(dmaStartOp->getOperand(dmaStartOp->getFasterMemPos())); diff --git a/mlir/test/Transforms/pipeline-data-transfer.mlir b/mlir/test/Transforms/pipeline-data-transfer.mlir index 70468f7e6c1..2fa583ae33b 100644 --- a/mlir/test/Transforms/pipeline-data-transfer.mlir +++ b/mlir/test/Transforms/pipeline-data-transfer.mlir @@ -117,13 +117,34 @@ mlfunc @loop_dma_nested(%arg0 : memref<512x32xvector<8xf32>, #map0>, %arg1 : mem // CHECK: dma_wait %37[ // CHECK: for %i6 = 0 to 4 { - // The DMAs below are outgoing DMAs on arg2, not yet overlapped. - // CHECK: dma_start %0{{.*}}, %arg2[ - // CHECK-NEXT: dma_wait %1[ - dma_start %2[%c0, %c0], %arg2[%6#0, %6#1], %num_elts, %5[%c0] : memref<64x4xvector<8xf32>, #map0, 2>, memref<512x32xvector<8xf32>, #map0>, memref<2xi32> + } // CHECK: } + return // CHECK-NEXT: return +} + +// CHECK: mlfunc @loop_dma_dependent +mlfunc @loop_dma_dependent(%arg2 : memref<512x32xvector<8xf32>>) { + %num_elts = constant 256 : index + %c0 = constant 0 : index + %0 = alloc() : memref<64x4xvector<8xf32>, 2> + %1 = alloc() : memref<64x4xvector<8xf32>, 2> + %2 = alloc() : memref<64x4xvector<8xf32>, 2> + %3 = alloc() : memref<2xi32> + %4 = alloc() : memref<2xi32> + %5 = alloc() : memref<2xi32> + + // The two DMAs below are dependent (incoming and outgoing on the same + // memref) in the same iteration; so no pipelining here. + // CHECK-NOT: dma_start + // CHECK: for %i0 = 0 to 8 { + for %i0 = 0 to 8 { + %6 = affine_apply #map2(%i0) + dma_start %arg2[%6#0, %6#1], %2[%c0, %c0], %num_elts, %5[%c0] : memref<512x32xvector<8xf32>>, memref<64x4xvector<8xf32>, 2>, memref<2xi32> dma_wait %5[%c0], %num_elts : memref<2xi32> - } // CHECK } - return + + dma_start %2[%c0, %c0], %arg2[%6#0, %6#1], %num_elts, %5[%c0] : memref<64x4xvector<8xf32>, 2>, memref<512x32xvector<8xf32>>, memref<2xi32> + dma_wait %5[%c0], %num_elts : memref<2xi32> + } // CHECK: } + return // CHECK-NEXT: return } // CHECK-LABEL: mlfunc @escaping_use |

