summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
diff options
context:
space:
mode:
authorArnold Schwaighofer <aschwaighofer@apple.com>2013-07-11 15:21:55 +0000
committerArnold Schwaighofer <aschwaighofer@apple.com>2013-07-11 15:21:55 +0000
commite97c71b8fddcbf61fdbfb13b520761d05a646a2c (patch)
tree86bb7d1297d31f63030a084d181a6df760cfef4d /llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
parenta3501d4b81c170d3b0fc7645a6f5357c24e517e7 (diff)
downloadbcm5719-llvm-e97c71b8fddcbf61fdbfb13b520761d05a646a2c.tar.gz
bcm5719-llvm-e97c71b8fddcbf61fdbfb13b520761d05a646a2c.zip
LoopVectorize: Vectorize all accesses in address space zero with unit stride
We can vectorize them because in the case where we wrap in the address space the unvectorized code would have had to access a pointer value of zero which is undefined behavior in address space zero according to the LLVM IR semantics. (Thank you Duncan, for pointing this out to me). Fixes PR16592. llvm-svn: 186088
Diffstat (limited to 'llvm/lib/Transforms/Vectorize/LoopVectorize.cpp')
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp24
1 files changed, 16 insertions, 8 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index e6e0f6b6ae4..020eb615714 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -3223,11 +3223,12 @@ static bool isInBoundsGep(Value *Ptr) {
/// \brief Check whether the access through \p Ptr has a constant stride.
static int isStridedPtr(ScalarEvolution *SE, DataLayout *DL, Value *Ptr,
const Loop *Lp) {
- const Type *PtrTy = Ptr->getType();
- assert(PtrTy->isPointerTy() && "Unexpected non ptr");
+ const Type *Ty = Ptr->getType();
+ assert(Ty->isPointerTy() && "Unexpected non ptr");
// Make sure that the pointer does not point to aggregate types.
- if (cast<PointerType>(Ptr->getType())->getElementType()->isAggregateType()) {
+ const PointerType *PtrTy = cast<PointerType>(Ty);
+ if (PtrTy->getElementType()->isAggregateType()) {
DEBUG(dbgs() << "LV: Bad stride - Not a pointer to a scalar type" << *Ptr
<< "\n");
return 0;
@@ -3248,11 +3249,16 @@ static int isStridedPtr(ScalarEvolution *SE, DataLayout *DL, Value *Ptr,
}
// The address calculation must not wrap. Otherwise, a dependence could be
- // inverted. An inbounds getelementptr that is a AddRec with a unit stride
+ // inverted.
+ // An inbounds getelementptr that is a AddRec with a unit stride
// cannot wrap per definition. The unit stride requirement is checked later.
+ // An getelementptr without an inbounds attribute and unit stride would have
+ // to access the pointer value "0" which is undefined behavior in address
+ // space 0, therefore we can also vectorize this case.
bool IsInBoundsGEP = isInBoundsGep(Ptr);
bool IsNoWrapAddRec = AR->getNoWrapFlags(SCEV::NoWrapMask);
- if (!IsNoWrapAddRec && !IsInBoundsGEP) {
+ bool IsInAddressSpaceZero = PtrTy->getAddressSpace() == 0;
+ if (!IsNoWrapAddRec && !IsInBoundsGEP && !IsInAddressSpaceZero) {
DEBUG(dbgs() << "LV: Bad stride - Pointer may wrap in the address space "
<< *Ptr << " SCEV: " << *PtrScev << "\n");
return 0;
@@ -3269,7 +3275,7 @@ static int isStridedPtr(ScalarEvolution *SE, DataLayout *DL, Value *Ptr,
return 0;
}
- int64_t Size = DL->getTypeAllocSize(PtrTy->getPointerElementType());
+ int64_t Size = DL->getTypeAllocSize(PtrTy->getElementType());
const APInt &APStepVal = C->getValue()->getValue();
// Huge step value - give up.
@@ -3285,8 +3291,10 @@ static int isStridedPtr(ScalarEvolution *SE, DataLayout *DL, Value *Ptr,
return 0;
// If the SCEV could wrap but we have an inbounds gep with a unit stride we
- // know we can't "wrap around the address space".
- if (!IsNoWrapAddRec && IsInBoundsGEP && Stride != 1 && Stride != -1)
+ // know we can't "wrap around the address space". In case of address space
+ // zero we know that this won't happen without triggering undefined behavior.
+ if (!IsNoWrapAddRec && (IsInBoundsGEP || IsInAddressSpaceZero) &&
+ Stride != 1 && Stride != -1)
return 0;
return Stride;
OpenPOWER on IntegriCloud