summaryrefslogtreecommitdiffstats
path: root/polly/lib/Transform/Simplify.cpp
diff options
context:
space:
mode:
authorMichael Kruse <llvm@meinersbur.de>2017-07-29 16:21:16 +0000
committerMichael Kruse <llvm@meinersbur.de>2017-07-29 16:21:16 +0000
commitce9617f4fe9da8a84c3e16061b4b485e94c446de (patch)
treefa98be36ff4744096ffcbed14471442a2a23f005 /polly/lib/Transform/Simplify.cpp
parentc14865c0c5e3fb3625954f92b5b207ff95376e01 (diff)
downloadbcm5719-llvm-ce9617f4fe9da8a84c3e16061b4b485e94c446de.tar.gz
bcm5719-llvm-ce9617f4fe9da8a84c3e16061b4b485e94c446de.zip
[Simplify] Implement write accesses coalescing.
Write coalescing combines write accesses that - Write the same llvm::Value. - Write to the same array. - Unless they do not write anything in a statement instance (partial writes), write to the same element. - There is no other access between them that accesses the same element. This is particularly useful after DeLICM, which leaves partial writes to disjoint domains. Differential Revision: https://reviews.llvm.org/D36010 llvm-svn: 309489
Diffstat (limited to 'polly/lib/Transform/Simplify.cpp')
-rw-r--r--polly/lib/Transform/Simplify.cpp185
1 files changed, 182 insertions, 3 deletions
diff --git a/polly/lib/Transform/Simplify.cpp b/polly/lib/Transform/Simplify.cpp
index 24e4e8a9ad2..8322e6e2562 100644
--- a/polly/lib/Transform/Simplify.cpp
+++ b/polly/lib/Transform/Simplify.cpp
@@ -16,6 +16,7 @@
#include "polly/ScopPass.h"
#include "polly/Support/GICHelper.h"
#include "polly/Support/ISLOStream.h"
+#include "polly/Support/ISLTools.h"
#include "polly/Support/VirtualInstruction.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Support/Debug.h"
@@ -34,6 +35,7 @@ STATISTIC(PairUnequalAccRels, "Number of Load-Store pairs NOT removed because "
STATISTIC(InBetweenStore, "Number of Load-Store pairs NOT removed because "
"there is another store between them");
STATISTIC(TotalOverwritesRemoved, "Number of removed overwritten writes");
+STATISTIC(TotalWritesCoalesced, "Number of writes coalesced with another");
STATISTIC(TotalRedundantWritesRemoved,
"Number of writes of same value removed in any SCoP");
STATISTIC(TotalEmptyPartialAccessesRemoved,
@@ -96,6 +98,9 @@ private:
/// Number of writes that are overwritten anyway.
int OverwritesRemoved = 0;
+ /// Number of combined writes.
+ int WritesCoalesced = 0;
+
/// Number of redundant writes removed from this SCoP.
int RedundantWritesRemoved = 0;
@@ -113,9 +118,10 @@ private:
/// Return whether at least one simplification has been applied.
bool isModified() const {
- return OverwritesRemoved > 0 || RedundantWritesRemoved > 0 ||
- EmptyPartialAccessesRemoved > 0 || DeadAccessesRemoved > 0 ||
- DeadInstructionsRemoved > 0 || StmtsRemoved > 0;
+ return OverwritesRemoved > 0 || WritesCoalesced > 0 ||
+ RedundantWritesRemoved > 0 || EmptyPartialAccessesRemoved > 0 ||
+ DeadAccessesRemoved > 0 || DeadInstructionsRemoved > 0 ||
+ StmtsRemoved > 0;
}
MemoryAccess *getReadAccessForValue(ScopStmt *Stmt, llvm::Value *Val) {
@@ -239,6 +245,173 @@ private:
}
}
+ /// Combine writes that write the same value if possible.
+ ///
+ /// This function is able to combine:
+ /// - Partial writes with disjoint domain.
+ /// - Writes that write to the same array element.
+ ///
+ /// In all cases, both writes must write the same values.
+ void coalesceWrites() {
+ for (auto &Stmt : *S) {
+ isl::set Domain =
+ give(Stmt.getDomain()).intersect_params(give(S->getContext()));
+
+ // We let isl do the lookup for the same-value condition. For this, we
+ // wrap llvm::Value into an isl::set such that isl can do the lookup in
+ // its hashtable implementation. llvm::Values are only compared within a
+ // ScopStmt, so the map can be local to this scope. TODO: Refactor with
+ // ZoneAlgorithm::makeValueSet()
+ SmallDenseMap<Value *, isl::set> ValueSets;
+ auto makeValueSet = [&ValueSets, this](Value *V) -> isl::set {
+ assert(V);
+ isl::set &Result = ValueSets[V];
+ if (Result.is_null()) {
+ isl_ctx *Ctx = S->getIslCtx();
+ std::string Name =
+ getIslCompatibleName("Val", V, ValueSets.size() - 1,
+ std::string(), UseInstructionNames);
+ isl::id Id = give(isl_id_alloc(Ctx, Name.c_str(), V));
+ Result = isl::set::universe(
+ isl::space(Ctx, 0, 0).set_tuple_id(isl::dim::set, Id));
+ }
+ return Result;
+ };
+
+ // List of all eligible (for coalescing) writes of the future.
+ // { [Domain[] -> Element[]] -> [Value[] -> MemoryAccess[]] }
+ isl::union_map FutureWrites =
+ isl::union_map::empty(give(S->getParamSpace()));
+
+ // Iterate over accesses from the last to the first.
+ SmallVector<MemoryAccess *, 32> Accesses(getAccessesInOrder(Stmt));
+ for (MemoryAccess *MA : reverse(Accesses)) {
+ // In region statements, the explicit accesses can be in blocks that can
+ // be executed in any order. We therefore process only the implicit
+ // writes and stop after that.
+ if (Stmt.isRegionStmt() && isExplicitAccess(MA))
+ break;
+
+ // { Domain[] -> Element[] }
+ isl::map AccRel =
+ MA->getLatestAccessRelation().intersect_domain(Domain);
+
+ // { [Domain[] -> Element[]] }
+ isl::set AccRelWrapped = AccRel.wrap();
+
+ // { Value[] }
+ isl::set ValSet;
+
+ if (MA->isMustWrite() && (MA->isOriginalScalarKind() ||
+ isa<StoreInst>(MA->getAccessInstruction()))) {
+ // Normally, tryGetValueStored() should be used to determine which
+ // element is written, but it can return nullptr; For PHI accesses,
+ // getAccessValue() returns the PHI instead of the PHI's incoming
+ // value. In this case, where we only compare values of a single
+ // statement, this is fine, because within a statement, a PHI in a
+ // successor block has always the same value as the incoming write. We
+ // still preferably use the incoming value directly so we also catch
+ // direct uses of that.
+ Value *StoredVal = MA->tryGetValueStored();
+ if (!StoredVal)
+ StoredVal = MA->getAccessValue();
+ ValSet = makeValueSet(StoredVal);
+
+ // { Domain[] }
+ isl::set AccDomain = AccRel.domain();
+
+ // Parts of the statement's domain that is not written by this access.
+ isl::set UndefDomain = Domain.subtract(AccDomain);
+
+ // { Element[] }
+ isl::set ElementUniverse =
+ isl::set::universe(AccRel.get_space().range());
+
+ // { Domain[] -> Element[] }
+ isl::map UndefAnything =
+ isl::map::from_domain_and_range(UndefDomain, ElementUniverse);
+
+ // We are looking a compatible write access. The other write can
+ // access these elements...
+ isl::map AllowedAccesses = AccRel.unite(UndefAnything);
+
+ // ... and must write the same value.
+ // { [Domain[] -> Element[]] -> Value[] }
+ isl::map Filter =
+ isl::map::from_domain_and_range(AllowedAccesses.wrap(), ValSet);
+
+ // Lookup future write that fulfills these conditions.
+ // { [[Domain[] -> Element[]] -> Value[]] -> MemoryAccess[] }
+ isl::union_map Filtered =
+ FutureWrites.uncurry().intersect_domain(Filter.wrap());
+
+ // Iterate through the candidates.
+ Filtered.foreach_map([&, this](isl::map Map) -> isl::stat {
+ MemoryAccess *OtherMA = (MemoryAccess *)Map.get_space()
+ .get_tuple_id(isl::dim::out)
+ .get_user();
+
+ isl::map OtherAccRel =
+ OtherMA->getLatestAccessRelation().intersect_domain(Domain);
+
+ // The filter only guaranteed that some of OtherMA's accessed
+ // elements are allowed. Verify that it only accesses allowed
+ // elements. Otherwise, continue with the next candidate.
+ if (!OtherAccRel.is_subset(AllowedAccesses).is_true())
+ return isl::stat::ok;
+
+ // The combined access relation.
+ // { Domain[] -> Element[] }
+ isl::map NewAccRel = AccRel.unite(OtherAccRel);
+ simplify(NewAccRel);
+
+ // Carry out the coalescing.
+ Stmt.removeSingleMemoryAccess(MA);
+ OtherMA->setNewAccessRelation(NewAccRel.copy());
+
+ // We removed MA, OtherMA takes its role.
+ MA = OtherMA;
+
+ TotalWritesCoalesced++;
+ WritesCoalesced++;
+
+ // Don't look for more candidates.
+ return isl::stat::error;
+ });
+ }
+
+ // Two writes cannot be coalesced if there is another access (to some of
+ // the written elements) between them. Remove all visited write accesses
+ // from the list of eligible writes. Don't just remove the accessed
+ // elements, but any MemoryAccess that touches any of the invalidated
+ // elements.
+ // { MemoryAccess[] }
+ isl::union_set TouchedAccesses =
+ FutureWrites.intersect_domain(AccRelWrapped)
+ .range()
+ .unwrap()
+ .range();
+ FutureWrites =
+ FutureWrites.uncurry().subtract_range(TouchedAccesses).curry();
+
+ if (MA->isMustWrite() && !ValSet.is_null()) {
+ // { MemoryAccess[] }
+ auto AccSet =
+ isl::set::universe(isl::space(S->getIslCtx(), 0, 0)
+ .set_tuple_id(isl::dim::set, MA->getId()));
+
+ // { Val[] -> MemoryAccess[] }
+ isl::map ValAccSet = isl::map::from_domain_and_range(ValSet, AccSet);
+
+ // { [Domain[] -> Element[]] -> [Value[] -> MemoryAccess[]] }
+ isl::map AccRelValAcc =
+ isl::map::from_domain_and_range(AccRelWrapped, ValAccSet.wrap());
+ FutureWrites = FutureWrites.add_map(AccRelValAcc);
+ }
+ }
+ }
+ }
+
/// Remove writes that just write the same value already stored in the
/// element.
void removeRedundantWrites() {
@@ -413,6 +586,8 @@ private:
OS.indent(Indent) << "Statistics {\n";
OS.indent(Indent + 4) << "Overwrites removed: " << OverwritesRemoved
<< '\n';
+ OS.indent(Indent + 4) << "Partial writes coalesced: " << WritesCoalesced
+ << "\n";
OS.indent(Indent + 4) << "Redundant writes removed: "
<< RedundantWritesRemoved << "\n";
OS.indent(Indent + 4) << "Accesses with empty domains removed: "
@@ -461,6 +636,9 @@ public:
DEBUG(dbgs() << "Removing overwrites...\n");
removeOverwrites();
+ DEBUG(dbgs() << "Coalesce partial writes...\n");
+ coalesceWrites();
+
DEBUG(dbgs() << "Removing redundant writes...\n");
removeRedundantWrites();
@@ -495,6 +673,7 @@ public:
S = nullptr;
OverwritesRemoved = 0;
+ WritesCoalesced = 0;
RedundantWritesRemoved = 0;
EmptyPartialAccessesRemoved = 0;
DeadAccessesRemoved = 0;
OpenPOWER on IntegriCloud