summaryrefslogtreecommitdiffstats
path: root/mlir/lib
diff options
context:
space:
mode:
authorUday Bondhugula <bondhugula@google.com>2019-02-11 16:33:53 -0800
committerjpienaar <jpienaar@google.com>2019-03-29 16:24:08 -0700
commit8b3f841daf3ad728eab81c73b8fafdc050627fa6 (patch)
tree87ea816e56dd331f72ec4d9c2a7bc86cf5a36a11 /mlir/lib
parentf5eed89df06fbaf8c1dc7241bbbcfba8b5dea6ea (diff)
downloadbcm5719-llvm-8b3f841daf3ad728eab81c73b8fafdc050627fa6.tar.gz
bcm5719-llvm-8b3f841daf3ad728eab81c73b8fafdc050627fa6.zip
Generate dealloc's for the alloc's of dma-generate.
- for the DMA buffers being allocated (and their tags), generate corresponding deallocs - minor related update to replaceAllMemRefUsesWith and PipelineDataTransfer pass Code generation for DMA transfers was being done with the initial simplifying assumption that the alloc's would map to scoped allocations, and so no deallocations would be necessary. Drop this assumption to generalize. Note that even with scoped allocations, unrolling loops that have scoped allocations could create a series of allocations and exhaustion of fast memory. Having a end of lifetime marker like a dealloc in fact allows creating new scopes if necessary when lowering to a backend and still utilize scoped allocation. DMA buffers created by -dma-generate are guaranteed to have either non-overlapping lifetimes or nested lifetimes. PiperOrigin-RevId: 233502632
Diffstat (limited to 'mlir/lib')
-rw-r--r--mlir/lib/Transforms/DmaGeneration.cpp23
-rw-r--r--mlir/lib/Transforms/PipelineDataTransfer.cpp21
-rw-r--r--mlir/lib/Transforms/Utils/Utils.cpp5
3 files changed, 36 insertions, 13 deletions
diff --git a/mlir/lib/Transforms/DmaGeneration.cpp b/mlir/lib/Transforms/DmaGeneration.cpp
index 45e57416111..29cc435a8a9 100644
--- a/mlir/lib/Transforms/DmaGeneration.cpp
+++ b/mlir/lib/Transforms/DmaGeneration.cpp
@@ -59,8 +59,6 @@ namespace {
/// by the latter. Only load op's handled for now.
// TODO(bondhugula): We currently can't generate DMAs correctly when stores are
// strided. Check for strided stores.
-// TODO(mlir-team): we don't insert dealloc's for the DMA buffers; this is thus
-// natural only for scoped allocations.
struct DmaGeneration : public FunctionPass {
explicit DmaGeneration(
unsigned slowMemorySpace = 0, unsigned fastMemorySpace = 1,
@@ -331,10 +329,8 @@ bool DmaGeneration::generateDma(const MemRefRegion &region, Block *block,
Value *fastMemRef;
// Check if a buffer was already created.
- // TODO(bondhugula): union across all memory op's per buffer. For now assuming
- // that multiple memory op's on the same memref have the *same* memory
- // footprint.
- if (fastBufferMap.count(memref) == 0) {
+ bool existingBuf = fastBufferMap.count(memref) > 0;
+ if (!existingBuf) {
auto fastMemRefType = top.getMemRefType(
fastBufferShape, memRefType.getElementType(), {}, fastMemorySpace);
@@ -358,6 +354,7 @@ bool DmaGeneration::generateDma(const MemRefRegion &region, Block *block,
// Create a tag (single element 1-d memref) for the DMA.
auto tagMemRefType = top.getMemRefType({1}, top.getIntegerType(32));
auto tagMemRef = prologue.create<AllocOp>(loc, tagMemRefType);
+
auto numElementsSSA =
top.create<ConstantIndexOp>(loc, numElements.getValue());
@@ -397,13 +394,23 @@ bool DmaGeneration::generateDma(const MemRefRegion &region, Block *block,
zeroIndex, stride, numEltPerStride);
// Since new ops are being appended (for outgoing DMAs), adjust the end to
// mark end of range of the original.
- if (*nEnd == end)
- *nEnd = Block::iterator(op->getInstruction());
+ *nEnd = Block::iterator(op->getInstruction());
}
// Matching DMA wait to block on completion; tag always has a 0 index.
b->create<DmaWaitOp>(loc, tagMemRef, zeroIndex, numElementsSSA);
+ // Generate dealloc for the tag.
+ auto tagDeallocOp = epilogue.create<DeallocOp>(loc, tagMemRef);
+ if (*nEnd == end)
+ // Since new ops are being appended (for outgoing DMAs), adjust the end to
+ // mark end of range of the original.
+ *nEnd = Block::iterator(tagDeallocOp->getInstruction());
+
+ // Generate dealloc for the DMA buffer.
+ if (!existingBuf)
+ epilogue.create<DeallocOp>(loc, fastMemRef);
+
// Replace all uses of the old memref with the faster one while remapping
// access indices (subtracting out lower bound offsets for each dimension).
// Ex: to replace load %A[%i, %j] with load %Abuf[%i - %iT, %j - %jT],
diff --git a/mlir/lib/Transforms/PipelineDataTransfer.cpp b/mlir/lib/Transforms/PipelineDataTransfer.cpp
index cfa045f2279..5c2e38205e7 100644
--- a/mlir/lib/Transforms/PipelineDataTransfer.cpp
+++ b/mlir/lib/Transforms/PipelineDataTransfer.cpp
@@ -124,8 +124,9 @@ static bool doubleBuffer(Value *oldMemRef, OpPointer<AffineForOp> forOp) {
// replaceAllMemRefUsesWith will always succeed unless the forOp body has
// non-deferencing uses of the memref.
- if (!replaceAllMemRefUsesWith(oldMemRef, newMemRef, {ivModTwoOp}, AffineMap(),
- {}, &*forOp->getBody()->begin())) {
+ if (!replaceAllMemRefUsesWith(
+ oldMemRef, newMemRef, {ivModTwoOp}, AffineMap(), {},
+ /*domInstFilter=*/&*forOp->getBody()->begin())) {
LLVM_DEBUG(llvm::dbgs()
<< "memref replacement for double buffering failed\n";);
ivModTwoOp->getInstruction()->erase();
@@ -284,10 +285,20 @@ PipelineDataTransfer::runOnAffineForOp(OpPointer<AffineForOp> forOp) {
// If the old memref has no more uses, remove its 'dead' alloc if it was
// alloc'ed. (note: DMA buffers are rarely function live-in; but a 'dim'
// operation could have been used on it if it was dynamically shaped in
- // order to create the double buffer above)
- if (oldMemRef->use_empty())
- if (auto *allocInst = oldMemRef->getDefiningInst())
+ // order to create the double buffer above.)
+ // '-canonicalize' does this in a more general way, but we'll anyway do the
+ // simple/common case so that the output / test cases looks clear.
+ if (auto *allocInst = oldMemRef->getDefiningInst()) {
+ if (oldMemRef->use_empty()) {
allocInst->erase();
+ } else if (oldMemRef->hasOneUse()) {
+ auto *singleUse = oldMemRef->use_begin()->getOwner();
+ if (singleUse->isa<DeallocOp>()) {
+ singleUse->erase();
+ oldMemRef->getDefiningInst()->erase();
+ }
+ }
+ }
}
// Double the buffers for tag memrefs.
diff --git a/mlir/lib/Transforms/Utils/Utils.cpp b/mlir/lib/Transforms/Utils/Utils.cpp
index 41689be52fc..519885b3a50 100644
--- a/mlir/lib/Transforms/Utils/Utils.cpp
+++ b/mlir/lib/Transforms/Utils/Utils.cpp
@@ -91,6 +91,11 @@ bool mlir::replaceAllMemRefUsesWith(const Value *oldMemRef, Value *newMemRef,
!postDomInfo->postDominates(postDomInstFilter, opInst))
continue;
+ // Skip dealloc's - no replacement is necessary, and a replacement doesn't
+ // hurt dealloc's.
+ if (opInst->isa<DeallocOp>())
+ continue;
+
// Check if the memref was used in a non-deferencing context. It is fine for
// the memref to be used in a non-deferencing way outside of the region
// where this replacement is happening.
OpenPOWER on IntegriCloud