summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms
diff options
context:
space:
mode:
authorChad Rosier <mcrosier@codeaurora.org>2015-10-28 14:38:49 +0000
committerChad Rosier <mcrosier@codeaurora.org>2015-10-28 14:38:49 +0000
commit7967614b2b89578c634135c3ea9992851d8b6603 (patch)
tree81af94a7904f851009ed7960c5214c49045016e6 /llvm/lib/Transforms
parentef607a20893e32a644e3f56347267ad7e8144f76 (diff)
downloadbcm5719-llvm-7967614b2b89578c634135c3ea9992851d8b6603.tar.gz
bcm5719-llvm-7967614b2b89578c634135c3ea9992851d8b6603.zip
Reapply: [LIR] Add support for creating memsets from loops with a negative stride.
The simple fix is to prevent forming memcpy from loops with a negative stride. llvm-svn: 251518
Diffstat (limited to 'llvm/lib/Transforms')
-rw-r--r--llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp56
1 files changed, 32 insertions, 24 deletions
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index ef9526fd398..ce425f1bf9f 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -31,11 +31,6 @@
// void foo(_Complex float *P)
// for (i) { __real__(*P) = 0; __imag__(*P) = 0; }
//
-// We should enhance this to handle negative strides through memory.
-// Alternatively (and perhaps better) we could rely on an earlier pass to force
-// forward iteration through memory, which is generally better for cache
-// behavior. Negative strides *do* happen for memset/memcpy loops.
-//
// This could recognize common matrix multiplies and dot product idioms and
// replace them with calls to BLAS (if linked in??).
//
@@ -124,7 +119,7 @@ private:
bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
unsigned StoreAlignment, Value *SplatValue,
Instruction *TheStore, const SCEVAddRecExpr *Ev,
- const SCEV *BECount);
+ const SCEV *BECount, bool NegStride);
bool processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
const SCEVAddRecExpr *StoreEv,
const SCEVAddRecExpr *LoadEv,
@@ -316,25 +311,27 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
// Check to see if the stride matches the size of the store. If so, then we
// know that every byte is touched in the loop.
unsigned StoreSize = (unsigned)SizeInBits >> 3;
- const SCEVConstant *Stride = dyn_cast<SCEVConstant>(StoreEv->getOperand(1));
-
- if (!Stride || StoreSize != Stride->getValue()->getValue()) {
- // TODO: Could also handle negative stride here someday, that will require
- // the validity check in mayLoopAccessLocation to be updated though.
- // Enable this to print exact negative strides.
- if (0 && Stride && StoreSize == -Stride->getValue()->getValue()) {
- dbgs() << "NEGATIVE STRIDE: " << *SI << "\n";
- dbgs() << "BB: " << *SI->getParent();
- }
+ const SCEVConstant *ConstStride =
+ dyn_cast<SCEVConstant>(StoreEv->getOperand(1));
+ if (!ConstStride)
return false;
- }
+
+ APInt Stride = ConstStride->getValue()->getValue();
+ if (StoreSize != Stride && StoreSize != -Stride)
+ return false;
+
+ bool NegStride = StoreSize == -Stride;
// See if we can optimize just this store in isolation.
if (processLoopStridedStore(StorePtr, StoreSize, SI->getAlignment(),
- StoredVal, SI, StoreEv, BECount))
+ StoredVal, SI, StoreEv, BECount, NegStride))
return true;
+ // TODO: We don't handle negative stride memcpys.
+ if (NegStride)
+ return false;
+
// If the stored value is a strided load in the same loop with the same stride
// this this may be transformable into a memcpy. This kicks in for stuff like
// for (i) A[i] = B[i];
@@ -387,7 +384,7 @@ bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI,
return processLoopStridedStore(Pointer, (unsigned)SizeInBytes,
MSI->getAlignment(), MSI->getValue(), MSI, Ev,
- BECount);
+ BECount, /*NegStride=*/false);
}
/// mayLoopAccessLocation - Return true if the specified loop might access the
@@ -468,7 +465,7 @@ static Constant *getMemSetPatternValue(Value *V, const DataLayout &DL) {
bool LoopIdiomRecognize::processLoopStridedStore(
Value *DestPtr, unsigned StoreSize, unsigned StoreAlignment,
Value *StoredVal, Instruction *TheStore, const SCEVAddRecExpr *Ev,
- const SCEV *BECount) {
+ const SCEV *BECount, bool NegStride) {
// If the stored value is a byte-wise value (like i32 -1), then it may be
// turned into a memset of i8 -1, assuming that all the consecutive bytes
@@ -506,15 +503,27 @@ bool LoopIdiomRecognize::processLoopStridedStore(
SCEVExpander Expander(*SE, DL, "loop-idiom");
Type *DestInt8PtrTy = Builder.getInt8PtrTy(DestAS);
+ Type *IntPtr = Builder.getIntPtrTy(DL, DestAS);
+
+ const SCEV *Start = Ev->getStart();
+ // If we have a negative stride, Start refers to the end of the memory
+ // location we're trying to memset. Therefore, we need to recompute the start
+ // point, which is just Start - BECount*Size.
+ if (NegStride) {
+ const SCEV *Index = SE->getTruncateOrZeroExtend(BECount, IntPtr);
+ if (StoreSize != 1)
+ Index = SE->getMulExpr(Index, SE->getConstant(IntPtr, StoreSize),
+ SCEV::FlagNUW);
+ Start = SE->getMinusSCEV(Ev->getStart(), Index);
+ }
// Okay, we have a strided store "p[i]" of a splattable value. We can turn
// this into a memset in the loop preheader now if we want. However, this
// would be unsafe to do if there is anything else in the loop that may read
// or write to the aliased location. Check for any overlap by generating the
// base pointer and checking the region.
- Value *BasePtr = Expander.expandCodeFor(Ev->getStart(), DestInt8PtrTy,
- Preheader->getTerminator());
-
+ Value *BasePtr =
+ Expander.expandCodeFor(Start, DestInt8PtrTy, Preheader->getTerminator());
if (mayLoopAccessLocation(BasePtr, MRI_ModRef, CurLoop, BECount, StoreSize,
*AA, TheStore)) {
Expander.clear();
@@ -527,7 +536,6 @@ bool LoopIdiomRecognize::processLoopStridedStore(
// The # stored bytes is (BECount+1)*Size. Expand the trip count out to
// pointer size if it isn't already.
- Type *IntPtr = Builder.getIntPtrTy(DL, DestAS);
BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr);
const SCEV *NumBytesS =
OpenPOWER on IntegriCloud