[LV] Consider users that are memory accesses in uniforms expansion step

When expanding the set of uniform instructions beyond the seed instructions (e.g., consecutive pointers), we mark a new instruction uniform if all its loop-varying users are uniform. We should also allow users that are consecutive or interleaved memory accesses. This fixes cases where we have an instruction that is used as the pointer operand of a consecutive access but also used by a non-memory instruction that later becomes uniform as part of the expansion. llvm-svn: 297179
author: Matthew Simpson <mssimpso@codeaurora.org> 2017-03-07 18:47:30 +0000
committer: Matthew Simpson <mssimpso@codeaurora.org> 2017-03-07 18:47:30 +0000
commit: c86b2134c79ed9791fc87269b23c89fd1669dfcd (patch)
tree: d94daa3eabf045b5c25b1c6113693ba7e8d04b90
parent: c08a79fbf28cba5b86c1ae11425d347e09b96c0d (diff)
download: bcm5719-llvm-c86b2134c79ed9791fc87269b23c89fd1669dfcd.tar.gz
bcm5719-llvm-c86b2134c79ed9791fc87269b23c89fd1669dfcd.zip
2 files changed, 53 insertions, 1 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 763ce083837..836a38d9813 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5675,7 +5675,9 @@ void LoopVectorizationCostModel::collectLoopUniforms(unsigned VF) {
         continue;
       auto *OI = cast<Instruction>(OV);
       if (all_of(OI->users(), [&](User *U) -> bool {
-            return isOutOfScope(U) || Worklist.count(cast<Instruction>(U));
+            auto *J = cast<Instruction>(U);
+            return !TheLoop->contains(J) || Worklist.count(J) ||
+                   (OI == getPointerOperand(J) && isUniformDecision(J, VF));
           })) {
         Worklist.insert(OI);
         DEBUG(dbgs() << "LV: Found uniform instruction: " << *OI << "\n");
diff --git a/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll b/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll
index 88b2aa36b08..125829090c3 100644
--- a/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll
+++ b/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll
@@ -438,3 +438,53 @@ for.end:
   %tmp5 = phi i32 [ %tmp2, %for.body ]
   ret i32 %tmp5
 }
+
+; INTER-LABEL: bitcast_pointer_operand
+;
+; Check that a pointer operand having a user other than a memory access is
+; recognized as uniform after vectorization. In this test case, %tmp1 is a
+; bitcast that is used by a load and a getelementptr instruction (%tmp2). Once
+; %tmp2 is marked uniform, %tmp1 should be marked uniform as well.
+;
+; INTER:       LV: Found uniform instruction: %cond = icmp slt i64 %i.next, %n
+; INTER-NEXT:  LV: Found uniform instruction: %tmp2 = getelementptr inbounds i8, i8* %tmp1, i64 3
+; INTER-NEXT:  LV: Found uniform instruction: %tmp6 = getelementptr inbounds i8, i8* %B, i64 %i
+; INTER-NEXT:  LV: Found uniform instruction: %tmp1 = bitcast i64* %tmp0 to i8*
+; INTER-NEXT:  LV: Found uniform instruction: %tmp0 = getelementptr inbounds i64, i64* %A, i64 %i
+; INTER-NEXT:  LV: Found uniform instruction: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
+; INTER-NEXT:  LV: Found uniform instruction: %i.next = add nuw nsw i64 %i, 1
+; INTER:       vector.body:
+; INTER-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
+; INTER-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, i64* %A, i64 [[INDEX]]
+; INTER-NEXT:    [[TMP5:%.*]] = bitcast i64* [[TMP4]] to <32 x i8>*
+; INTER-NEXT:    [[WIDE_VEC:%.*]] = load <32 x i8>, <32 x i8>* [[TMP5]], align 1
+; INTER-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <32 x i8> [[WIDE_VEC]], <32 x i8> undef, <4 x i32> <i32 0, i32 8, i32 16, i32 24>
+; INTER-NEXT:    [[STRIDED_VEC5:%.*]] = shufflevector <32 x i8> [[WIDE_VEC]], <32 x i8> undef, <4 x i32> <i32 3, i32 11, i32 19, i32 27>
+; INTER-NEXT:    [[TMP6:%.*]] = xor <4 x i8> [[STRIDED_VEC5]], [[STRIDED_VEC]]
+; INTER-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, i8* %B, i64 [[INDEX]]
+; INTER-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to <4 x i8>*
+; INTER-NEXT:    store <4 x i8> [[TMP6]], <4 x i8>* [[TMP8]], align 1
+; INTER-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
+; INTER:         br i1 {{.*}}, label %middle.block, label %vector.body
+;
+define void @bitcast_pointer_operand(i64* %A, i8* %B, i64 %n) {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
+  %tmp0 = getelementptr inbounds i64, i64* %A, i64 %i
+  %tmp1 = bitcast i64* %tmp0 to i8*
+  %tmp2 = getelementptr inbounds i8, i8* %tmp1, i64 3
+  %tmp3 = load i8, i8* %tmp2, align 1
+  %tmp4 = load i8, i8* %tmp1, align 1
+  %tmp5 = xor i8 %tmp3, %tmp4
+  %tmp6 = getelementptr inbounds i8, i8* %B, i64 %i
+  store i8 %tmp5, i8* %tmp6
+  %i.next = add nuw nsw i64 %i, 1
+  %cond = icmp slt i64 %i.next, %n
+  br i1 %cond, label %for.body, label %for.end
+
+for.end:
+  ret void
+}
author	Matthew Simpson <mssimpso@codeaurora.org>	2017-03-07 18:47:30 +0000
committer	Matthew Simpson <mssimpso@codeaurora.org>	2017-03-07 18:47:30 +0000
commit	c86b2134c79ed9791fc87269b23c89fd1669dfcd (patch)
tree	d94daa3eabf045b5c25b1c6113693ba7e8d04b90
parent	c08a79fbf28cba5b86c1ae11425d347e09b96c0d (diff)
download	bcm5719-llvm-c86b2134c79ed9791fc87269b23c89fd1669dfcd.tar.gz bcm5719-llvm-c86b2134c79ed9791fc87269b23c89fd1669dfcd.zip