summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Transforms/Scalar/SROA.cpp81
1 files changed, 54 insertions, 27 deletions
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index ef91effe3ff..1653859e91c 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -735,16 +735,10 @@ private:
void handleLoadOrStore(Type *Ty, Instruction &I, const APInt &Offset,
uint64_t Size, bool IsVolatile) {
- // We allow splitting of loads and stores where the type is an integer type
- // and cover the entire alloca. This prevents us from splitting over
- // eagerly.
- // FIXME: In the great blue eventually, we should eagerly split all integer
- // loads and stores, and then have a separate step that merges adjacent
- // alloca partitions into a single partition suitable for integer widening.
- // Or we should skip the merge step and rely on GVN and other passes to
- // merge adjacent loads and stores that survive mem2reg.
- bool IsSplittable =
- Ty->isIntegerTy() && !IsVolatile && Offset == 0 && Size >= AllocSize;
+ // We allow splitting of non-volatile loads and stores where the type is an
+ // integer type. These may be used to implement 'memcpy' or other "transfer
+ // of bits" patterns.
+ bool IsSplittable = Ty->isIntegerTy() && !IsVolatile;
insertUse(I, Offset, Size, IsSplittable);
}
@@ -2620,7 +2614,8 @@ private:
// LI only used for this computation.
Value *Placeholder =
new LoadInst(UndefValue::get(LI.getType()->getPointerTo()));
- V = insertInteger(DL, IRB, Placeholder, V, NewBeginOffset, "insert");
+ V = insertInteger(DL, IRB, Placeholder, V, NewBeginOffset - BeginOffset,
+ "insert");
LI.replaceAllUsesWith(V);
Placeholder->replaceAllUsesWith(&LI);
delete Placeholder;
@@ -2699,7 +2694,8 @@ private:
DL.getTypeStoreSizeInBits(V->getType()) &&
"Non-byte-multiple bit width");
IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), SliceSize * 8);
- V = extractInteger(DL, IRB, V, NarrowTy, NewBeginOffset, "extract");
+ V = extractInteger(DL, IRB, V, NarrowTy, NewBeginOffset - BeginOffset,
+ "extract");
}
if (VecTy)
@@ -3571,7 +3567,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
assert(Offsets.Splits.empty() &&
"Should not have splits the first time we see an instruction!");
Offsets.S = &S;
- Offsets.Splits.push_back(P.endOffset());
+ Offsets.Splits.push_back(P.endOffset() - S.beginOffset());
}
// Now scan the already split slices, and add a split for any of them which
@@ -3586,13 +3582,14 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
assert(Offsets.S == S && "Found a mismatched slice!");
assert(!Offsets.Splits.empty() &&
"Cannot have an empty set of splits on the second partition!");
- assert(Offsets.Splits.back() == P.beginOffset() &&
+ assert(Offsets.Splits.back() ==
+ P.beginOffset() - Offsets.S->beginOffset() &&
"Previous split does not end where this one begins!");
// Record each split. The last partition's end isn't needed as the size
// of the slice dictates that.
if (S->endOffset() > P.endOffset())
- Offsets.Splits.push_back(P.endOffset());
+ Offsets.Splits.push_back(P.endOffset() - Offsets.S->beginOffset());
}
}
@@ -3705,7 +3702,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
NewSlices.push_back(
Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
&PLoad->getOperandUse(PLoad->getPointerOperandIndex()),
- /*IsSplittable*/ true));
+ /*IsSplittable*/ false));
DEBUG(dbgs() << " new slice [" << NewSlices.back().beginOffset()
<< ", " << NewSlices.back().endOffset() << "): " << *PLoad
<< "\n");
@@ -3843,7 +3840,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
NewSlices.push_back(
Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
&PStore->getOperandUse(PStore->getPointerOperandIndex()),
- /*IsSplittable*/ true));
+ /*IsSplittable*/ false));
DEBUG(dbgs() << " new slice [" << NewSlices.back().beginOffset()
<< ", " << NewSlices.back().endOffset() << "): " << *PStore
<< "\n");
@@ -3879,24 +3876,29 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
}
// Mark the original store as dead now that we've split it up and kill its
- // slice. Note that we leave the original load in place. It may in turn be
- // split up if it is an alloca load for some other alloca, but it may be
- // a normal load. This may introduce redundant loads, but where those can
- // be merged the rest of the optimizer should handle the merging, and this
- // uncovers SSA splits which is more important. In practice, the original
- // loads will almost always be fully split and removed eventually, and the
- // splits will be merged by any trivial CSE, including instcombine.
+ // slice. Note that we leave the original load in place unless this store
+ // was its ownly use. It may in turn be split up if it is an alloca load
+ // for some other alloca, but it may be a normal load. This may introduce
+ // redundant loads, but where those can be merged the rest of the optimizer
+ // should handle the merging, and this uncovers SSA splits which is more
+ // important. In practice, the original loads will almost always be fully
+ // split and removed eventually, and the splits will be merged by any
+ // trivial CSE, including instcombine.
+ if (LI->hasOneUse()) {
+ assert(*LI->user_begin() == SI && "Single use isn't this store!");
+ DeadInsts.insert(LI);
+ }
DeadInsts.insert(SI);
Offsets.S->kill();
}
- // Now we need to remove the killed slices, sort the newly added slices, and
- // merge the two sorted ranges of slices so that the entire range is sorted
- // properly for us to re-compute the partitions.
+ // Remove the killed slices that have ben pre-split.
AS.erase(std::remove_if(AS.begin(), AS.end(), [](const Slice &S) {
return S.isDead();
}), AS.end());
+ // Insert our new slices. This will sort and merge them into the sorted
+ // sequence.
AS.insert(NewSlices);
DEBUG(dbgs() << " Pre-split slices:\n");
@@ -4072,8 +4074,33 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
unsigned NumPartitions = 0;
bool Changed = false;
+ // First try to pre-split loads and stores.
Changed |= presplitLoadsAndStores(AI, AS);
+ // Now that we have identified any pre-splitting opportunities, mark any
+ // splittable (non-whole-alloca) loads and stores as unsplittable. If we fail
+ // to split these during pre-splitting, we want to force them to be
+ // rewritten into a partition.
+ bool IsSorted = true;
+ for (Slice &S : AS) {
+ if (!S.isSplittable())
+ continue;
+ // FIXME: We currently leave whole-alloca splittable loads and stores. This
+ // used to be the only splittable loads and stores and we need to be
+ // confident that the above handling of splittable loads and stores is
+ // completely sufficient before we forcibly disable the remaining handling.
+ if (S.beginOffset() == 0 &&
+ S.endOffset() >= DL->getTypeAllocSize(AI.getAllocatedType()))
+ continue;
+ if (isa<LoadInst>(S.getUse()->getUser()) ||
+ isa<StoreInst>(S.getUse()->getUser())) {
+ S.makeUnsplittable();
+ IsSorted = false;
+ }
+ }
+ if (!IsSorted)
+ std::sort(AS.begin(), AS.end());
+
// Rewrite each partition.
for (auto &P : AS.partitions()) {
Changed |= rewritePartition(AI, AS, P);
OpenPOWER on IntegriCloud