diff options
| author | River Riddle <riverriddle@google.com> | 2020-01-02 14:28:37 -0800 |
|---|---|---|
| committer | River Riddle <riverriddle@google.com> | 2020-01-02 14:40:09 -0800 |
| commit | fd01d8626cdcce9f34caab060f8d3fd35f6661cc (patch) | |
| tree | 10181d726e607c5eaff7ff3428f2d6b9f2dd5eeb /mlir/lib/IR/Operation.cpp | |
| parent | 1c45852c828dae0dd15136cda3d7fd6af0f75dc7 (diff) | |
| download | bcm5719-llvm-fd01d8626cdcce9f34caab060f8d3fd35f6661cc.tar.gz bcm5719-llvm-fd01d8626cdcce9f34caab060f8d3fd35f6661cc.zip | |
[mlir] Rewrite the internal representation of OpResult to be optimized for memory.
Summary:
This changes the implementation of OpResult to have some of the results be represented inline in Value, via a pointer int pair of Operation*+result number, and the rest being trailing objects on the main operation. The full details of the new representation is detailed in the proposal here:
https://groups.google.com/a/tensorflow.org/g/mlir/c/XXzzKhqqF_0/m/v6bKb08WCgAJ
The only difference between here and the above proposal is that we only steal 2-bits for the Value kind instead of 3. This means that we can only fit 2-results inline instead of 6. This allows for other users to steal the final bit for PointerUnion/etc. If necessary, we can always steal this bit back in the future to save more space if 3-6 results are common enough.
Reviewed By: jpienaar
Differential Revision: https://reviews.llvm.org/D72020
Diffstat (limited to 'mlir/lib/IR/Operation.cpp')
| -rw-r--r-- | mlir/lib/IR/Operation.cpp | 71 |
1 files changed, 41 insertions, 30 deletions
diff --git a/mlir/lib/IR/Operation.cpp b/mlir/lib/IR/Operation.cpp index fd9c40f597e..6cf7eea6b4f 100644 --- a/mlir/lib/IR/Operation.cpp +++ b/mlir/lib/IR/Operation.cpp @@ -64,17 +64,6 @@ OperationName OperationName::getFromOpaquePointer(void *pointer) { } //===----------------------------------------------------------------------===// -// OpResult -//===----------------------------------------------------------------------===// - -/// Return the result number of this result. -unsigned OpResult::getResultNumber() const { - // Results are not stored in place, so we have to find it within the list. - auto resList = getOwner()->getOpResults(); - return std::distance(resList.begin(), llvm::find(resList, *this)); -} - -//===----------------------------------------------------------------------===// // Operation //===----------------------------------------------------------------------===// @@ -124,27 +113,41 @@ Operation *Operation::create(Location location, OperationName name, bool resizableOperandList) { unsigned numSuccessors = successors.size(); + // We only need to allocate additional memory for a subset of results. + unsigned numTrailingResults = OpResult::getNumTrailing(resultTypes.size()); + // Input operands are nullptr-separated for each successor, the null operands // aren't actually stored. unsigned numOperands = operands.size() - numSuccessors; // Compute the byte size for the operation and the operand storage. - auto byteSize = - totalSizeToAlloc<OpResult, BlockOperand, Region, detail::OperandStorage>( - resultTypes.size(), numSuccessors, numRegions, - /*detail::OperandStorage*/ 1); + auto byteSize = totalSizeToAlloc<detail::TrailingOpResult, BlockOperand, + Region, detail::OperandStorage>( + numTrailingResults, numSuccessors, numRegions, + /*detail::OperandStorage*/ 1); byteSize += llvm::alignTo(detail::OperandStorage::additionalAllocSize( numOperands, resizableOperandList), alignof(Operation)); void *rawMem = malloc(byteSize); // Create the new Operation. - auto op = ::new (rawMem) Operation(location, name, resultTypes.size(), - numSuccessors, numRegions, attributes); + auto op = ::new (rawMem) Operation(location, name, resultTypes, numSuccessors, + numRegions, attributes); assert((numSuccessors == 0 || !op->isKnownNonTerminator()) && "unexpected successors in a non-terminator operation"); + // Initialize the trailing results. + if (LLVM_UNLIKELY(numTrailingResults > 0)) { + // We initialize the trailing results with their result number. This makes + // 'getResultNumber' checks much more efficient. The main purpose for these + // results is to give an anchor to the main operation anyways, so this is + // purely an optimization. + auto *trailingResultIt = op->getTrailingObjects<detail::TrailingOpResult>(); + for (unsigned i = 0; i != numTrailingResults; ++i, ++trailingResultIt) + trailingResultIt->trailingResultNumber = i; + } + // Initialize the regions. for (unsigned i = 0; i != numRegions; ++i) new (&op->getRegion(i)) Region(op); @@ -152,11 +155,6 @@ Operation *Operation::create(Location location, OperationName name, // Initialize the results and operands. new (&op->getOperandStorage()) detail::OperandStorage(numOperands, resizableOperandList); - - auto instResults = op->getOpResults(); - for (unsigned i = 0, e = resultTypes.size(); i != e; ++i) - new (&instResults[i]) OpResult(OpResult::create(resultTypes[i], op)); - auto opOperands = op->getOpOperands(); // Initialize normal operands. @@ -208,11 +206,20 @@ Operation *Operation::create(Location location, OperationName name, return op; } -Operation::Operation(Location location, OperationName name, unsigned numResults, - unsigned numSuccessors, unsigned numRegions, - const NamedAttributeList &attributes) - : location(location), numResults(numResults), numSuccs(numSuccessors), - numRegions(numRegions), name(name), attrs(attributes) {} +Operation::Operation(Location location, OperationName name, + ArrayRef<Type> resultTypes, unsigned numSuccessors, + unsigned numRegions, const NamedAttributeList &attributes) + : location(location), numSuccs(numSuccessors), numRegions(numRegions), + hasSingleResult(false), name(name), attrs(attributes) { + if (!resultTypes.empty()) { + // If there is a single result it is stored in-place, otherwise use a tuple. + hasSingleResult = resultTypes.size() == 1; + if (hasSingleResult) + resultType = resultTypes.front(); + else + resultType = TupleType::get(resultTypes, location->getContext()); + } +} // Operations are deleted through the destroy() member because they are // allocated via malloc. @@ -222,9 +229,6 @@ Operation::~Operation() { // Explicitly run the destructors for the operands and results. getOperandStorage().~OperandStorage(); - for (auto &result : getOpResults()) - result.destroy(); - // Explicitly run the destructors for the successors. for (auto &successor : getBlockOperands()) successor.~BlockOperand(); @@ -540,6 +544,13 @@ void Operation::dropAllDefinedValueUses() { block.dropAllDefinedValueUses(); } +/// Return the number of results held by this operation. +unsigned Operation::getNumResults() { + if (!resultType) + return 0; + return hasSingleResult ? 1 : resultType.cast<TupleType>().size(); +} + void Operation::setSuccessor(Block *block, unsigned index) { assert(index < getNumSuccessors()); getBlockOperands()[index].set(block); |

