diff options
| author | Uday Bondhugula <bondhugula@google.com> | 2019-02-11 16:33:53 -0800 |
|---|---|---|
| committer | jpienaar <jpienaar@google.com> | 2019-03-29 16:24:08 -0700 |
| commit | 8b3f841daf3ad728eab81c73b8fafdc050627fa6 (patch) | |
| tree | 87ea816e56dd331f72ec4d9c2a7bc86cf5a36a11 /mlir/lib | |
| parent | f5eed89df06fbaf8c1dc7241bbbcfba8b5dea6ea (diff) | |
| download | bcm5719-llvm-8b3f841daf3ad728eab81c73b8fafdc050627fa6.tar.gz bcm5719-llvm-8b3f841daf3ad728eab81c73b8fafdc050627fa6.zip | |
Generate dealloc's for the alloc's of dma-generate.
- for the DMA buffers being allocated (and their tags), generate corresponding deallocs
- minor related update to replaceAllMemRefUsesWith and PipelineDataTransfer pass
Code generation for DMA transfers was being done with the initial simplifying
assumption that the alloc's would map to scoped allocations, and so no
deallocations would be necessary. Drop this assumption to generalize. Note that
even with scoped allocations, unrolling loops that have scoped allocations
could create a series of allocations and exhaustion of fast memory. Having a
end of lifetime marker like a dealloc in fact allows creating new scopes if
necessary when lowering to a backend and still utilize scoped allocation.
DMA buffers created by -dma-generate are guaranteed to have either
non-overlapping lifetimes or nested lifetimes.
PiperOrigin-RevId: 233502632
Diffstat (limited to 'mlir/lib')
| -rw-r--r-- | mlir/lib/Transforms/DmaGeneration.cpp | 23 | ||||
| -rw-r--r-- | mlir/lib/Transforms/PipelineDataTransfer.cpp | 21 | ||||
| -rw-r--r-- | mlir/lib/Transforms/Utils/Utils.cpp | 5 |
3 files changed, 36 insertions, 13 deletions
diff --git a/mlir/lib/Transforms/DmaGeneration.cpp b/mlir/lib/Transforms/DmaGeneration.cpp index 45e57416111..29cc435a8a9 100644 --- a/mlir/lib/Transforms/DmaGeneration.cpp +++ b/mlir/lib/Transforms/DmaGeneration.cpp @@ -59,8 +59,6 @@ namespace { /// by the latter. Only load op's handled for now. // TODO(bondhugula): We currently can't generate DMAs correctly when stores are // strided. Check for strided stores. -// TODO(mlir-team): we don't insert dealloc's for the DMA buffers; this is thus -// natural only for scoped allocations. struct DmaGeneration : public FunctionPass { explicit DmaGeneration( unsigned slowMemorySpace = 0, unsigned fastMemorySpace = 1, @@ -331,10 +329,8 @@ bool DmaGeneration::generateDma(const MemRefRegion ®ion, Block *block, Value *fastMemRef; // Check if a buffer was already created. - // TODO(bondhugula): union across all memory op's per buffer. For now assuming - // that multiple memory op's on the same memref have the *same* memory - // footprint. - if (fastBufferMap.count(memref) == 0) { + bool existingBuf = fastBufferMap.count(memref) > 0; + if (!existingBuf) { auto fastMemRefType = top.getMemRefType( fastBufferShape, memRefType.getElementType(), {}, fastMemorySpace); @@ -358,6 +354,7 @@ bool DmaGeneration::generateDma(const MemRefRegion ®ion, Block *block, // Create a tag (single element 1-d memref) for the DMA. auto tagMemRefType = top.getMemRefType({1}, top.getIntegerType(32)); auto tagMemRef = prologue.create<AllocOp>(loc, tagMemRefType); + auto numElementsSSA = top.create<ConstantIndexOp>(loc, numElements.getValue()); @@ -397,13 +394,23 @@ bool DmaGeneration::generateDma(const MemRefRegion ®ion, Block *block, zeroIndex, stride, numEltPerStride); // Since new ops are being appended (for outgoing DMAs), adjust the end to // mark end of range of the original. - if (*nEnd == end) - *nEnd = Block::iterator(op->getInstruction()); + *nEnd = Block::iterator(op->getInstruction()); } // Matching DMA wait to block on completion; tag always has a 0 index. b->create<DmaWaitOp>(loc, tagMemRef, zeroIndex, numElementsSSA); + // Generate dealloc for the tag. + auto tagDeallocOp = epilogue.create<DeallocOp>(loc, tagMemRef); + if (*nEnd == end) + // Since new ops are being appended (for outgoing DMAs), adjust the end to + // mark end of range of the original. + *nEnd = Block::iterator(tagDeallocOp->getInstruction()); + + // Generate dealloc for the DMA buffer. + if (!existingBuf) + epilogue.create<DeallocOp>(loc, fastMemRef); + // Replace all uses of the old memref with the faster one while remapping // access indices (subtracting out lower bound offsets for each dimension). // Ex: to replace load %A[%i, %j] with load %Abuf[%i - %iT, %j - %jT], diff --git a/mlir/lib/Transforms/PipelineDataTransfer.cpp b/mlir/lib/Transforms/PipelineDataTransfer.cpp index cfa045f2279..5c2e38205e7 100644 --- a/mlir/lib/Transforms/PipelineDataTransfer.cpp +++ b/mlir/lib/Transforms/PipelineDataTransfer.cpp @@ -124,8 +124,9 @@ static bool doubleBuffer(Value *oldMemRef, OpPointer<AffineForOp> forOp) { // replaceAllMemRefUsesWith will always succeed unless the forOp body has // non-deferencing uses of the memref. - if (!replaceAllMemRefUsesWith(oldMemRef, newMemRef, {ivModTwoOp}, AffineMap(), - {}, &*forOp->getBody()->begin())) { + if (!replaceAllMemRefUsesWith( + oldMemRef, newMemRef, {ivModTwoOp}, AffineMap(), {}, + /*domInstFilter=*/&*forOp->getBody()->begin())) { LLVM_DEBUG(llvm::dbgs() << "memref replacement for double buffering failed\n";); ivModTwoOp->getInstruction()->erase(); @@ -284,10 +285,20 @@ PipelineDataTransfer::runOnAffineForOp(OpPointer<AffineForOp> forOp) { // If the old memref has no more uses, remove its 'dead' alloc if it was // alloc'ed. (note: DMA buffers are rarely function live-in; but a 'dim' // operation could have been used on it if it was dynamically shaped in - // order to create the double buffer above) - if (oldMemRef->use_empty()) - if (auto *allocInst = oldMemRef->getDefiningInst()) + // order to create the double buffer above.) + // '-canonicalize' does this in a more general way, but we'll anyway do the + // simple/common case so that the output / test cases looks clear. + if (auto *allocInst = oldMemRef->getDefiningInst()) { + if (oldMemRef->use_empty()) { allocInst->erase(); + } else if (oldMemRef->hasOneUse()) { + auto *singleUse = oldMemRef->use_begin()->getOwner(); + if (singleUse->isa<DeallocOp>()) { + singleUse->erase(); + oldMemRef->getDefiningInst()->erase(); + } + } + } } // Double the buffers for tag memrefs. diff --git a/mlir/lib/Transforms/Utils/Utils.cpp b/mlir/lib/Transforms/Utils/Utils.cpp index 41689be52fc..519885b3a50 100644 --- a/mlir/lib/Transforms/Utils/Utils.cpp +++ b/mlir/lib/Transforms/Utils/Utils.cpp @@ -91,6 +91,11 @@ bool mlir::replaceAllMemRefUsesWith(const Value *oldMemRef, Value *newMemRef, !postDomInfo->postDominates(postDomInstFilter, opInst)) continue; + // Skip dealloc's - no replacement is necessary, and a replacement doesn't + // hurt dealloc's. + if (opInst->isa<DeallocOp>()) + continue; + // Check if the memref was used in a non-deferencing context. It is fine for // the memref to be used in a non-deferencing way outside of the region // where this replacement is happening. |

