summaryrefslogtreecommitdiffstats
path: root/mlir/lib/IR/Operation.cpp
diff options
context:
space:
mode:
authorRiver Riddle <riverriddle@google.com>2020-01-02 14:28:37 -0800
committerRiver Riddle <riverriddle@google.com>2020-01-02 14:40:09 -0800
commitfd01d8626cdcce9f34caab060f8d3fd35f6661cc (patch)
tree10181d726e607c5eaff7ff3428f2d6b9f2dd5eeb /mlir/lib/IR/Operation.cpp
parent1c45852c828dae0dd15136cda3d7fd6af0f75dc7 (diff)
downloadbcm5719-llvm-fd01d8626cdcce9f34caab060f8d3fd35f6661cc.tar.gz
bcm5719-llvm-fd01d8626cdcce9f34caab060f8d3fd35f6661cc.zip
[mlir] Rewrite the internal representation of OpResult to be optimized for memory.
Summary: This changes the implementation of OpResult to have some of the results be represented inline in Value, via a pointer int pair of Operation*+result number, and the rest being trailing objects on the main operation. The full details of the new representation is detailed in the proposal here: https://groups.google.com/a/tensorflow.org/g/mlir/c/XXzzKhqqF_0/m/v6bKb08WCgAJ The only difference between here and the above proposal is that we only steal 2-bits for the Value kind instead of 3. This means that we can only fit 2-results inline instead of 6. This allows for other users to steal the final bit for PointerUnion/etc. If necessary, we can always steal this bit back in the future to save more space if 3-6 results are common enough. Reviewed By: jpienaar Differential Revision: https://reviews.llvm.org/D72020
Diffstat (limited to 'mlir/lib/IR/Operation.cpp')
-rw-r--r--mlir/lib/IR/Operation.cpp71
1 files changed, 41 insertions, 30 deletions
diff --git a/mlir/lib/IR/Operation.cpp b/mlir/lib/IR/Operation.cpp
index fd9c40f597e..6cf7eea6b4f 100644
--- a/mlir/lib/IR/Operation.cpp
+++ b/mlir/lib/IR/Operation.cpp
@@ -64,17 +64,6 @@ OperationName OperationName::getFromOpaquePointer(void *pointer) {
}
//===----------------------------------------------------------------------===//
-// OpResult
-//===----------------------------------------------------------------------===//
-
-/// Return the result number of this result.
-unsigned OpResult::getResultNumber() const {
- // Results are not stored in place, so we have to find it within the list.
- auto resList = getOwner()->getOpResults();
- return std::distance(resList.begin(), llvm::find(resList, *this));
-}
-
-//===----------------------------------------------------------------------===//
// Operation
//===----------------------------------------------------------------------===//
@@ -124,27 +113,41 @@ Operation *Operation::create(Location location, OperationName name,
bool resizableOperandList) {
unsigned numSuccessors = successors.size();
+ // We only need to allocate additional memory for a subset of results.
+ unsigned numTrailingResults = OpResult::getNumTrailing(resultTypes.size());
+
// Input operands are nullptr-separated for each successor, the null operands
// aren't actually stored.
unsigned numOperands = operands.size() - numSuccessors;
// Compute the byte size for the operation and the operand storage.
- auto byteSize =
- totalSizeToAlloc<OpResult, BlockOperand, Region, detail::OperandStorage>(
- resultTypes.size(), numSuccessors, numRegions,
- /*detail::OperandStorage*/ 1);
+ auto byteSize = totalSizeToAlloc<detail::TrailingOpResult, BlockOperand,
+ Region, detail::OperandStorage>(
+ numTrailingResults, numSuccessors, numRegions,
+ /*detail::OperandStorage*/ 1);
byteSize += llvm::alignTo(detail::OperandStorage::additionalAllocSize(
numOperands, resizableOperandList),
alignof(Operation));
void *rawMem = malloc(byteSize);
// Create the new Operation.
- auto op = ::new (rawMem) Operation(location, name, resultTypes.size(),
- numSuccessors, numRegions, attributes);
+ auto op = ::new (rawMem) Operation(location, name, resultTypes, numSuccessors,
+ numRegions, attributes);
assert((numSuccessors == 0 || !op->isKnownNonTerminator()) &&
"unexpected successors in a non-terminator operation");
+ // Initialize the trailing results.
+ if (LLVM_UNLIKELY(numTrailingResults > 0)) {
+ // We initialize the trailing results with their result number. This makes
+ // 'getResultNumber' checks much more efficient. The main purpose for these
+ // results is to give an anchor to the main operation anyways, so this is
+ // purely an optimization.
+ auto *trailingResultIt = op->getTrailingObjects<detail::TrailingOpResult>();
+ for (unsigned i = 0; i != numTrailingResults; ++i, ++trailingResultIt)
+ trailingResultIt->trailingResultNumber = i;
+ }
+
// Initialize the regions.
for (unsigned i = 0; i != numRegions; ++i)
new (&op->getRegion(i)) Region(op);
@@ -152,11 +155,6 @@ Operation *Operation::create(Location location, OperationName name,
// Initialize the results and operands.
new (&op->getOperandStorage())
detail::OperandStorage(numOperands, resizableOperandList);
-
- auto instResults = op->getOpResults();
- for (unsigned i = 0, e = resultTypes.size(); i != e; ++i)
- new (&instResults[i]) OpResult(OpResult::create(resultTypes[i], op));
-
auto opOperands = op->getOpOperands();
// Initialize normal operands.
@@ -208,11 +206,20 @@ Operation *Operation::create(Location location, OperationName name,
return op;
}
-Operation::Operation(Location location, OperationName name, unsigned numResults,
- unsigned numSuccessors, unsigned numRegions,
- const NamedAttributeList &attributes)
- : location(location), numResults(numResults), numSuccs(numSuccessors),
- numRegions(numRegions), name(name), attrs(attributes) {}
+Operation::Operation(Location location, OperationName name,
+ ArrayRef<Type> resultTypes, unsigned numSuccessors,
+ unsigned numRegions, const NamedAttributeList &attributes)
+ : location(location), numSuccs(numSuccessors), numRegions(numRegions),
+ hasSingleResult(false), name(name), attrs(attributes) {
+ if (!resultTypes.empty()) {
+ // If there is a single result it is stored in-place, otherwise use a tuple.
+ hasSingleResult = resultTypes.size() == 1;
+ if (hasSingleResult)
+ resultType = resultTypes.front();
+ else
+ resultType = TupleType::get(resultTypes, location->getContext());
+ }
+}
// Operations are deleted through the destroy() member because they are
// allocated via malloc.
@@ -222,9 +229,6 @@ Operation::~Operation() {
// Explicitly run the destructors for the operands and results.
getOperandStorage().~OperandStorage();
- for (auto &result : getOpResults())
- result.destroy();
-
// Explicitly run the destructors for the successors.
for (auto &successor : getBlockOperands())
successor.~BlockOperand();
@@ -540,6 +544,13 @@ void Operation::dropAllDefinedValueUses() {
block.dropAllDefinedValueUses();
}
+/// Return the number of results held by this operation.
+unsigned Operation::getNumResults() {
+ if (!resultType)
+ return 0;
+ return hasSingleResult ? 1 : resultType.cast<TupleType>().size();
+}
+
void Operation::setSuccessor(Block *block, unsigned index) {
assert(index < getNumSuccessors());
getBlockOperands()[index].set(block);
OpenPOWER on IntegriCloud