[LV] Fix PR26600: avoid out of bounds loads for interleaved access vectorization

Summary: If we don't have the first and last access of an interleaved load group, the first and last wide load in the loop can do an out of bounds access. Even though we discard results from speculative loads, this can cause problems, since it can technically generate page faults (or worse). We now discard interleaved load groups that don't have the first and load in the group. Reviewers: hfinkel, rengolin Subscribers: rengolin, llvm-commits, mzolotukhin, anemet Differential Revision: http://reviews.llvm.org/D17332 llvm-svn: 261331
author: Silviu Baranga <silviu.baranga@arm.com> 2016-02-19 15:46:10 +0000
committer: Silviu Baranga <silviu.baranga@arm.com> 2016-02-19 15:46:10 +0000
commit: ad1dafb2c3ba44beab20619e788ed64f5da80513 (patch)
tree: 2d422cb56a86c0b09b7e550afe6adb439b443170
parent: 2d26fe7aa639e9aa84097bf5aadc83dd4a780e35 (diff)
download: bcm5719-llvm-ad1dafb2c3ba44beab20619e788ed64f5da80513.tar.gz
bcm5719-llvm-ad1dafb2c3ba44beab20619e788ed64f5da80513.zip
3 files changed, 19 insertions, 5 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 4648f264b5f..e8af7051a32 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4719,6 +4719,8 @@ void InterleavedAccessInfo::analyzeInterleaving(
 
   // Holds all interleaved store groups temporarily.
   SmallSetVector<InterleaveGroup *, 4> StoreGroups;
+  // Holds all interleaved load groups temporarily.
+  SmallSetVector<InterleaveGroup *, 4> LoadGroups;
 
   // Search the load-load/write-write pair B-A in bottom-up order and try to
   // insert B into the interleave group of A according to 3 rules:
@@ -4746,6 +4748,8 @@ void InterleavedAccessInfo::analyzeInterleaving(
 
     if (A->mayWriteToMemory())
       StoreGroups.insert(Group);
+    else
+      LoadGroups.insert(Group);
 
     for (auto II = std::next(I); II != E; ++II) {
       Instruction *B = II->first;
@@ -4793,6 +4797,12 @@ void InterleavedAccessInfo::analyzeInterleaving(
   for (InterleaveGroup *Group : StoreGroups)
     if (Group->getNumMembers() != Group->getFactor())
       releaseGroup(Group);
+
+  // Remove interleaved load groups that don't have the first and last member.
+  // This guarantees that we won't do speculative out of bounds loads.
+  for (InterleaveGroup *Group : LoadGroups)
+    if (!Group->getMember(0) || !Group->getMember(Group->getFactor() - 1))
+      releaseGroup(Group);
 }
 
 LoopVectorizationCostModel::VectorizationFactor
diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/stride-vectorization.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/stride-vectorization.ll
index 0cb84552024..f6f2609e8fb 100644
--- a/llvm/test/Transforms/LoopVectorize/PowerPC/stride-vectorization.ll
+++ b/llvm/test/Transforms/LoopVectorize/PowerPC/stride-vectorization.ll
@@ -16,9 +16,15 @@ for.cond.cleanup:                                 ; preds = %for.body
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %0 = shl nsw i64 %indvars.iv, 1
+  %odd.idx = add nsw i64 %0, 1
+
   %arrayidx = getelementptr inbounds double, double* %b, i64 %0
+  %arrayidx.odd = getelementptr inbounds double, double* %b, i64 %odd.idx
+
   %1 = load double, double* %arrayidx, align 8
-  %add = fadd double %1, 1.000000e+00
+  %2 = load double, double* %arrayidx.odd, align 8
+
+  %add = fadd double %1, %2
   %arrayidx2 = getelementptr inbounds double, double* %a, i64 %indvars.iv
   store double %add, double* %arrayidx2, align 8
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll
index d7237a5c27d..54ce3e29293 100644
--- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll
+++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll
@@ -292,10 +292,8 @@ for.body:                                         ; preds = %for.body, %entry
 ; }
 
 ; CHECK-LABEL: @even_load(
-; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* %{{.*}}, align 4
-; CHECK: %strided.vec = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-; CHECK-NOT: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-; CHECK: shl nsw <4 x i32> %strided.vec, <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NOT: %wide.vec = load <8 x i32>, <8 x i32>* %{{.*}}, align 4
+; CHECK-NOT: %strided.vec = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 
 define void @even_load(i32* noalias nocapture readonly %A, i32* noalias nocapture %B) {
 entry:
author	Silviu Baranga <silviu.baranga@arm.com>	2016-02-19 15:46:10 +0000
committer	Silviu Baranga <silviu.baranga@arm.com>	2016-02-19 15:46:10 +0000
commit	ad1dafb2c3ba44beab20619e788ed64f5da80513 (patch)
tree	2d422cb56a86c0b09b7e550afe6adb439b443170
parent	2d26fe7aa639e9aa84097bf5aadc83dd4a780e35 (diff)
download	bcm5719-llvm-ad1dafb2c3ba44beab20619e788ed64f5da80513.tar.gz bcm5719-llvm-ad1dafb2c3ba44beab20619e788ed64f5da80513.zip