summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms
diff options
context:
space:
mode:
authorMichael Zolotukhin <mzolotukhin@apple.com>2015-09-30 21:05:43 +0000
committerMichael Zolotukhin <mzolotukhin@apple.com>2015-09-30 21:05:43 +0000
commitfc783e91e0c0696ec5b3a990a7ac91bd751e370d (patch)
tree3b24127aa0f733436e672638b5d9814499269f29 /llvm/lib/Transforms
parent757908e545e720a13c5391ce2eb399c4026859e2 (diff)
downloadbcm5719-llvm-fc783e91e0c0696ec5b3a990a7ac91bd751e370d.tar.gz
bcm5719-llvm-fc783e91e0c0696ec5b3a990a7ac91bd751e370d.zip
[SLP] Don't vectorize loads of non-packed types (like i1, i2).
Summary: Given an array of i2 elements, 4 consecutive scalar loads will be lowered to i8-sized loads and thus will access 4 consecutive bytes in memory. If we vectorize these loads into a single <4 x i2> load, it'll access only 1 byte in memory. Hence, we should prohibit vectorization in such cases. PS: Initial patch was proposed by Arnold. Reviewers: aschwaighofer, nadav, hfinkel Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D13277 llvm-svn: 248943
Diffstat (limited to 'llvm/lib/Transforms')
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp19
1 files changed, 18 insertions, 1 deletions
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index fd8818c1ca9..f9dee18af95 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1158,6 +1158,23 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
return;
}
case Instruction::Load: {
+ // Check that a vectorized load would load the same memory as a scalar
+ // load.
+ // For example we don't want vectorize loads that are smaller than 8 bit.
+ // Even though we have a packed struct {<i2, i2, i2, i2>} LLVM treats
+ // loading/storing it as an i8 struct. If we vectorize loads/stores from
+ // such a struct we read/write packed bits disagreeing with the
+ // unvectorized version.
+ const DataLayout &DL = F->getParent()->getDataLayout();
+ Type *ScalarTy = VL[0]->getType();
+
+ if (DL.getTypeSizeInBits(ScalarTy) !=
+ DL.getTypeAllocSizeInBits(ScalarTy)) {
+ BS.cancelScheduling(VL);
+ newTreeEntry(VL, false);
+ DEBUG(dbgs() << "SLP: Gathering loads of non-packed type.\n");
+ return;
+ }
// Check if the loads are consecutive or of we need to swizzle them.
for (unsigned i = 0, e = VL.size() - 1; i < e; ++i) {
LoadInst *L = cast<LoadInst>(VL[i]);
@@ -1167,7 +1184,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
DEBUG(dbgs() << "SLP: Gathering non-simple loads.\n");
return;
}
- const DataLayout &DL = F->getParent()->getDataLayout();
+
if (!isConsecutiveAccess(VL[i], VL[i + 1], DL)) {
if (VL.size() == 2 && isConsecutiveAccess(VL[1], VL[0], DL)) {
++NumLoadsWantToChangeOrder;
OpenPOWER on IntegriCloud