summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms
diff options
context:
space:
mode:
authorHiroshi Inoue <inouehrs@jp.ibm.com>2017-11-30 07:44:46 +0000
committerHiroshi Inoue <inouehrs@jp.ibm.com>2017-11-30 07:44:46 +0000
commit422e80aee2baf32d981e96406dfedc6708282e61 (patch)
treeb45c23cacf072004c4b7d3bd3f7ecf8342e05c5e /llvm/lib/Transforms
parenta495744d2c3b2726aa4d7de37f863cb6007a6a78 (diff)
downloadbcm5719-llvm-422e80aee2baf32d981e96406dfedc6708282e61.tar.gz
bcm5719-llvm-422e80aee2baf32d981e96406dfedc6708282e61.zip
[SROA] enable splitting for non-whole-alloca loads and stores
Currently, SROA splits loads and stores only when they are accessing the whole alloca. This patch relaxes this limitation to allow splitting a load/store if all other loads and stores to the alloca are disjoint to or fully included in the current load/store. If there is no other load or store that crosses the boundary of the current load/store, the current splitting implementation works as is. The whole-alloca loads and stores meet this new condition and so they are still splittable. Here is a simplified motivating example. struct record { long long a; int b; int c; }; int func(struct record r) { for (int i = 0; i < r.c; i++) r.b++; return r.b; } When updating r.b (or r.c as well), LLVM generates redundant instructions on some platforms (such as x86_64, ppc64); here, r.b and r.c are packed into one 64-bit GPR when the struct is passed as a method argument. With this patch, the above example is compiled into only few instructions without loop. Without the patch, unnecessary loop-carried dependency is introduced by SROA and the loop cannot be eliminated by the later optimizers. Differential Revision: https://reviews.llvm.org/D32998 llvm-svn: 319407
Diffstat (limited to 'llvm/lib/Transforms')
-rw-r--r--llvm/lib/Transforms/Scalar/SROA.cpp31
1 files changed, 21 insertions, 10 deletions
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index bd064978b64..d0431d48a43 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -30,6 +30,7 @@
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
@@ -4047,21 +4048,31 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
// First try to pre-split loads and stores.
Changed |= presplitLoadsAndStores(AI, AS);
- // Now that we have identified any pre-splitting opportunities, mark any
- // splittable (non-whole-alloca) loads and stores as unsplittable. If we fail
- // to split these during pre-splitting, we want to force them to be
- // rewritten into a partition.
+ // Now that we have identified any pre-splitting opportunities,
+ // mark loads and stores unsplittable except for the following case.
+ // We leave a slice splittable if all other slices are disjoint or fully
+ // included in the slice, such as whole-alloca loads and stores.
+ // If we fail to split these during pre-splitting, we want to force them
+ // to be rewritten into a partition.
bool IsSorted = true;
+
+ // If a byte boundary is included in any load or store, a slice starting or
+ // ending at the boundary is not splittable.
+ unsigned AllocaSize = DL.getTypeAllocSize(AI.getAllocatedType());
+ SmallBitVector SplittableOffset(AllocaSize+1, true);
+ for (Slice &S : AS)
+ for (unsigned O = S.beginOffset() + 1; O < S.endOffset() && O < AllocaSize;
+ O++)
+ SplittableOffset.reset(O);
+
for (Slice &S : AS) {
if (!S.isSplittable())
continue;
- // FIXME: We currently leave whole-alloca splittable loads and stores. This
- // used to be the only splittable loads and stores and we need to be
- // confident that the above handling of splittable loads and stores is
- // completely sufficient before we forcibly disable the remaining handling.
- if (S.beginOffset() == 0 &&
- S.endOffset() >= DL.getTypeAllocSize(AI.getAllocatedType()))
+
+ if ((S.beginOffset() > AllocaSize || SplittableOffset[S.beginOffset()]) &&
+ (S.endOffset() > AllocaSize || SplittableOffset[S.endOffset()]))
continue;
+
if (isa<LoadInst>(S.getUse()->getUser()) ||
isa<StoreInst>(S.getUse()->getUser())) {
S.makeUnsplittable();
OpenPOWER on IntegriCloud