summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp')
-rw-r--r--llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp284
1 files changed, 278 insertions, 6 deletions
diff --git a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index a9c17fdeb42..5e6850d4f59 100644
--- a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -14,6 +14,7 @@
#include "llvm/Pass.h"
#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/DenseSet.h"
@@ -56,6 +57,12 @@ static cl::opt<bool> PrintLiveSetSize("spp-print-liveset-size", cl::Hidden,
static cl::opt<bool> PrintBasePointers("spp-print-base-pointers", cl::Hidden,
cl::init(false));
+// Cost threshold measuring when it is profitable to rematerialize value instead
+// of relocating it
+static cl::opt<unsigned>
+RematerializationThreshold("spp-rematerialization-threshold", cl::Hidden,
+ cl::init(6));
+
#ifdef XDEBUG
static bool ClobberNonLive = true;
#else
@@ -78,6 +85,7 @@ struct RewriteStatepointsForGC : public FunctionPass {
// We add and rewrite a bunch of instructions, but don't really do much
// else. We could in theory preserve a lot more analyses here.
AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
}
};
} // namespace
@@ -123,6 +131,7 @@ struct GCPtrLivenessData {
// types, then update all the second type to the first type
typedef DenseMap<Value *, Value *> DefiningValueMapTy;
typedef DenseSet<llvm::Value *> StatepointLiveSetTy;
+typedef DenseMap<Instruction *, Value *> RematerializedValueMapTy;
struct PartiallyConstructedSafepointRecord {
/// The set of values known to be live accross this safepoint
@@ -138,6 +147,11 @@ struct PartiallyConstructedSafepointRecord {
/// Instruction to which exceptional gc relocates are attached
/// Makes it easier to iterate through them during relocationViaAlloca.
Instruction *UnwindToken;
+
+ /// Record live values we are rematerialized instead of relocating.
+ /// They are not included into 'liveset' field.
+ /// Maps rematerialized copy to it's original value.
+ RematerializedValueMapTy RematerializedValues;
};
}
@@ -1389,6 +1403,31 @@ insertRelocationStores(iterator_range<Value::user_iterator> GCRelocs,
}
}
+// Helper function for the "relocationViaAlloca". Similar to the
+// "insertRelocationStores" but works for rematerialized values.
+static void
+insertRematerializationStores(
+ RematerializedValueMapTy RematerializedValues,
+ DenseMap<Value *, Value *> &AllocaMap,
+ DenseSet<Value *> &VisitedLiveValues) {
+
+ for (auto RematerializedValuePair: RematerializedValues) {
+ Instruction *RematerializedValue = RematerializedValuePair.first;
+ Value *OriginalValue = RematerializedValuePair.second;
+
+ assert(AllocaMap.count(OriginalValue) &&
+ "Can not find alloca for rematerialized value");
+ Value *Alloca = AllocaMap[OriginalValue];
+
+ StoreInst *Store = new StoreInst(RematerializedValue, Alloca);
+ Store->insertAfter(RematerializedValue);
+
+#ifndef NDEBUG
+ VisitedLiveValues.insert(OriginalValue);
+#endif
+ }
+}
+
/// do all the relocation update via allocas and mem2reg
static void relocationViaAlloca(
Function &F, DominatorTree &DT, ArrayRef<Value *> live,
@@ -1406,17 +1445,38 @@ static void relocationViaAlloca(
// TODO-PERF: change data structures, reserve
DenseMap<Value *, Value *> allocaMap;
SmallVector<AllocaInst *, 200> PromotableAllocas;
+ // Used later to chack that we have enough allocas to store all values
+ std::size_t NumRematerializedValues = 0;
PromotableAllocas.reserve(live.size());
+ // Emit alloca for "LiveValue" and record it in "allocaMap" and
+ // "PromotableAllocas"
+ auto emitAllocaFor = [&](Value *LiveValue) {
+ AllocaInst *Alloca = new AllocaInst(LiveValue->getType(), "",
+ F.getEntryBlock().getFirstNonPHI());
+ allocaMap[LiveValue] = Alloca;
+ PromotableAllocas.push_back(Alloca);
+ };
+
// emit alloca for each live gc pointer
for (unsigned i = 0; i < live.size(); i++) {
- Value *liveValue = live[i];
- AllocaInst *alloca = new AllocaInst(liveValue->getType(), "",
- F.getEntryBlock().getFirstNonPHI());
- allocaMap[liveValue] = alloca;
- PromotableAllocas.push_back(alloca);
+ emitAllocaFor(live[i]);
}
+ // emit allocas for rematerialized values
+ for (size_t i = 0; i < records.size(); i++) {
+ const struct PartiallyConstructedSafepointRecord &Info = records[i];
+
+ for (auto RematerializedValuePair: Info.RematerializedValues) {
+ Value *OriginalValue = RematerializedValuePair.second;
+ if (allocaMap.count(OriginalValue) != 0)
+ continue;
+
+ emitAllocaFor(OriginalValue);
+ ++NumRematerializedValues;
+ }
+ }
+
// The next two loops are part of the same conceptual operation. We need to
// insert a store to the alloca after the original def and at each
// redefinition. We need to insert a load before each use. These are split
@@ -1444,6 +1504,10 @@ static void relocationViaAlloca(
visitedLiveValues);
}
+ // Do similar thing with rematerialized values
+ insertRematerializationStores(info.RematerializedValues, allocaMap,
+ visitedLiveValues);
+
if (ClobberNonLive) {
// As a debuging aid, pretend that an unrelocated pointer becomes null at
// the gc.statepoint. This will turn some subtle GC problems into
@@ -1548,7 +1612,7 @@ static void relocationViaAlloca(
}
}
- assert(PromotableAllocas.size() == live.size() &&
+ assert(PromotableAllocas.size() == live.size() + NumRematerializedValues &&
"we must have the same allocas with lives");
if (!PromotableAllocas.empty()) {
// apply mem2reg to promote alloca to SSA
@@ -1732,6 +1796,201 @@ static void splitVectorValues(Instruction *StatepointInst,
PromoteMemToReg(Allocas, DT);
}
+// Helper function for the "rematerializeLiveValues". It walks use chain
+// starting from the "CurrentValue" until it meets "BaseValue". Only "simple"
+// values are visited (currently it is GEP's and casts). Returns true if it
+// sucessfully reached "BaseValue" and false otherwise.
+// Fills "ChainToBase" array with all visited values. "BaseValue" is not
+// recorded.
+static bool findRematerializableChainToBasePointer(
+ SmallVectorImpl<Instruction*> &ChainToBase,
+ Value *CurrentValue, Value *BaseValue) {
+
+ // We have found a base value
+ if (CurrentValue == BaseValue) {
+ return true;
+ }
+
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurrentValue)) {
+ ChainToBase.push_back(GEP);
+ return findRematerializableChainToBasePointer(ChainToBase,
+ GEP->getPointerOperand(),
+ BaseValue);
+ }
+
+ if (CastInst *CI = dyn_cast<CastInst>(CurrentValue)) {
+ Value *Def = CI->stripPointerCasts();
+
+ // This two checks are basically similar. First one is here for the
+ // consistency with findBasePointers logic.
+ assert(!isa<CastInst>(Def) && "not a pointer cast found");
+ if (!CI->isNoopCast(CI->getModule()->getDataLayout()))
+ return false;
+
+ ChainToBase.push_back(CI);
+ return findRematerializableChainToBasePointer(ChainToBase, Def, BaseValue);
+ }
+
+ // Not supported instruction in the chain
+ return false;
+}
+
+// Helper function for the "rematerializeLiveValues". Compute cost of the use
+// chain we are going to rematerialize.
+static unsigned
+chainToBasePointerCost(SmallVectorImpl<Instruction*> &Chain,
+ TargetTransformInfo &TTI) {
+ unsigned Cost = 0;
+
+ for (Instruction *Instr : Chain) {
+ if (CastInst *CI = dyn_cast<CastInst>(Instr)) {
+ assert(CI->isNoopCast(CI->getModule()->getDataLayout()) &&
+ "non noop cast is found during rematerialization");
+
+ Type *SrcTy = CI->getOperand(0)->getType();
+ Cost += TTI.getCastInstrCost(CI->getOpcode(), CI->getType(), SrcTy);
+
+ } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Instr)) {
+ // Cost of the address calculation
+ Type *ValTy = GEP->getPointerOperandType()->getPointerElementType();
+ Cost += TTI.getAddressComputationCost(ValTy);
+
+ // And cost of the GEP itself
+ // TODO: Use TTI->getGEPCost here (it exists, but appears to be not
+ // allowed for the external usage)
+ if (!GEP->hasAllConstantIndices())
+ Cost += 2;
+
+ } else {
+ llvm_unreachable("unsupported instruciton type during rematerialization");
+ }
+ }
+
+ return Cost;
+}
+
+// From the statepoint liveset pick values that are cheaper to recompute then to
+// relocate. Remove this values from the liveset, rematerialize them after
+// statepoint and record them in "Info" structure. Note that similar to
+// relocated values we don't do any user adjustments here.
+static void rematerializeLiveValues(CallSite CS,
+ PartiallyConstructedSafepointRecord &Info,
+ TargetTransformInfo &TTI) {
+ const int ChainLengthThreshold = 10;
+
+ // Record values we are going to delete from this statepoint live set.
+ // We can not di this in following loop due to iterator invalidation.
+ SmallVector<Value *, 32> LiveValuesToBeDeleted;
+
+ for (Value *LiveValue: Info.liveset) {
+ // For each live pointer find it's defining chain
+ SmallVector<Instruction *, 3> ChainToBase;
+ assert(Info.PointerToBase.find(LiveValue) != Info.PointerToBase.end());
+ bool FoundChain =
+ findRematerializableChainToBasePointer(ChainToBase,
+ LiveValue,
+ Info.PointerToBase[LiveValue]);
+ // Nothing to do, or chain is too long
+ if (!FoundChain ||
+ ChainToBase.size() == 0 ||
+ ChainToBase.size() > ChainLengthThreshold)
+ continue;
+
+ // Compute cost of this chain
+ unsigned Cost = chainToBasePointerCost(ChainToBase, TTI);
+ // TODO: We can also account for cases when we will be able to remove some
+ // of the rematerialized values by later optimization passes. I.e if
+ // we rematerialized several intersecting chains. Or if original values
+ // don't have any uses besides this statepoint.
+
+ // For invokes we need to rematerialize each chain twice - for normal and
+ // for unwind basic blocks. Model this by multiplying cost by two.
+ if (CS.isInvoke()) {
+ Cost *= 2;
+ }
+ // If it's too expensive - skip it
+ if (Cost >= RematerializationThreshold)
+ continue;
+
+ // Remove value from the live set
+ LiveValuesToBeDeleted.push_back(LiveValue);
+
+ // Clone instructions and record them inside "Info" structure
+
+ // Walk backwards to visit top-most instructions first
+ std::reverse(ChainToBase.begin(), ChainToBase.end());
+
+ // Utility function which clones all instructions from "ChainToBase"
+ // and inserts them before "InsertBefore". Returns rematerialized value
+ // which should be used after statepoint.
+ auto rematerializeChain = [&ChainToBase](Instruction *InsertBefore) {
+ Instruction *LastClonedValue = nullptr;
+ Instruction *LastValue = nullptr;
+ for (Instruction *Instr: ChainToBase) {
+ // Only GEP's and casts are suported as we need to be careful to not
+ // introduce any new uses of pointers not in the liveset.
+ // Note that it's fine to introduce new uses of pointers which were
+ // otherwise not used after this statepoint.
+ assert(isa<GetElementPtrInst>(Instr) || isa<CastInst>(Instr));
+
+ Instruction *ClonedValue = Instr->clone();
+ ClonedValue->insertBefore(InsertBefore);
+ ClonedValue->setName(Instr->getName() + ".remat");
+
+ // If it is not first instruction in the chain then it uses previously
+ // cloned value. We should update it to use cloned value.
+ if (LastClonedValue) {
+ assert(LastValue);
+ ClonedValue->replaceUsesOfWith(LastValue, LastClonedValue);
+#ifndef NDEBUG
+ // Assert that cloned instruction does not use any instructions
+ // other than LastClonedValue
+ for (auto OpValue: ClonedValue->operand_values()) {
+ if (isa<Instruction>(OpValue))
+ assert(OpValue == LastClonedValue &&
+ "unexpected use found in rematerialized value");
+ }
+#endif
+ }
+
+ LastClonedValue = ClonedValue;
+ LastValue = Instr;
+ }
+ assert(LastClonedValue);
+ return LastClonedValue;
+ };
+
+ // Different cases for calls and invokes. For invokes we need to clone
+ // instructions both on normal and unwind path.
+ if (CS.isCall()) {
+ Instruction *InsertBefore = CS.getInstruction()->getNextNode();
+ assert(InsertBefore);
+ Instruction *RematerializedValue = rematerializeChain(InsertBefore);
+ Info.RematerializedValues[RematerializedValue] = LiveValue;
+ } else {
+ InvokeInst *Invoke = cast<InvokeInst>(CS.getInstruction());
+
+ Instruction *NormalInsertBefore =
+ Invoke->getNormalDest()->getFirstInsertionPt();
+ Instruction *UnwindInsertBefore =
+ Invoke->getUnwindDest()->getFirstInsertionPt();
+
+ Instruction *NormalRematerializedValue =
+ rematerializeChain(NormalInsertBefore);
+ Instruction *UnwindRematerializedValue =
+ rematerializeChain(UnwindInsertBefore);
+
+ Info.RematerializedValues[NormalRematerializedValue] = LiveValue;
+ Info.RematerializedValues[UnwindRematerializedValue] = LiveValue;
+ }
+ }
+
+ // Remove rematerializaed values from the live set
+ for (auto LiveValue: LiveValuesToBeDeleted) {
+ Info.liveset.erase(LiveValue);
+ }
+}
+
static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
SmallVectorImpl<CallSite> &toUpdate) {
#ifndef NDEBUG
@@ -1867,6 +2126,19 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
}
holders.clear();
+ // In order to reduce live set of statepoint we might choose to rematerialize
+ // some values instead of relocating them. This is purelly an optimization and
+ // does not influence correctness.
+ TargetTransformInfo &TTI =
+ P->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+
+ for (size_t i = 0; i < records.size(); i++) {
+ struct PartiallyConstructedSafepointRecord &info = records[i];
+ CallSite &CS = toUpdate[i];
+
+ rematerializeLiveValues(CS, info, TTI);
+ }
+
// Now run through and replace the existing statepoints with new ones with
// the live variables listed. We do not yet update uses of the values being
// relocated. We have references to live variables that need to
OpenPOWER on IntegriCloud