diff options
| author | Matthew Simpson <mssimpso@codeaurora.org> | 2016-09-02 16:19:22 +0000 | 
|---|---|---|
| committer | Matthew Simpson <mssimpso@codeaurora.org> | 2016-09-02 16:19:22 +0000 | 
| commit | b65c230eab30b3fa5586d1459d115300a219e781 (patch) | |
| tree | 55db48d546713712f2cfa03dd1490edcc0ad639a /llvm | |
| parent | f26ef0a27afc42450e5af19135a32755b9f47b1d (diff) | |
| download | bcm5719-llvm-b65c230eab30b3fa5586d1459d115300a219e781.tar.gz bcm5719-llvm-b65c230eab30b3fa5586d1459d115300a219e781.zip  | |
[LV] Ensure reverse interleaved group GEPs remain uniform
For uniform instructions, we're only required to generate a scalar value for
the first vector lane of each unroll iteration. Thus, if we have a reverse
interleaved group, computing the member index off the scalar GEP corresponding
to the last vector lane of its pointer operand technically makes the GEP
non-uniform. We should compute the member index off the first scalar GEP
instead.
I've added the updated member index computation to the existing reverse
interleaved group test.
llvm-svn: 280497
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 12 | ||||
| -rw-r--r-- | llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll | 10 | 
2 files changed, 19 insertions, 3 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 9add69d0539..1b0586e56e3 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2600,8 +2600,18 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr) {    setDebugLocFromInst(Builder, Ptr);    SmallVector<Value *, 2> NewPtrs;    unsigned Index = Group->getIndex(Instr); + +  // If the group is reverse, adjust the index to refer to the last vector lane +  // instead of the first. We adjust the index from the first vector lane, +  // rather than directly getting the pointer for lane VF - 1, because the +  // pointer operand of the interleaved access is supposed to be uniform. For +  // uniform instructions, we're only required to generate a value for the +  // first vector lane in each unroll iteration. +  if (Group->isReverse()) +    Index += (VF - 1) * Group->getFactor(); +    for (unsigned Part = 0; Part < UF; Part++) { -    Value *NewPtr = getScalarValue(Ptr, Part, Group->isReverse() ? VF - 1 : 0); +    Value *NewPtr = getScalarValue(Ptr, Part, 0);      // Notice current instruction could be any index. Need to adjust the address      // to the member of index 0. diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll index 34998782aa8..d84dc42bdf5 100644 --- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll @@ -244,17 +244,23 @@ for.body:                                         ; preds = %for.body, %entry  ; }  ; CHECK-LABEL: @test_reversed_load2_store2( -; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* {{.*}}, align 4 +; CHECK: %[[G0:.+]] = getelementptr inbounds %struct.ST2, %struct.ST2* %A, i64 %offset.idx, i32 0 +; CHECK: %[[G1:.+]] = getelementptr i32, i32* %[[G0]], i64 -6 +; CHECK: %[[B0:.+]] = bitcast i32* %[[G1]] to <8 x i32>* +; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* %[[B0]], align 4  ; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>  ; CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0>  ; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>  ; CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0>  ; CHECK: add nsw <4 x i32>  ; CHECK: sub nsw <4 x i32> +; CHECK: %[[G2:.+]] = getelementptr inbounds %struct.ST2, %struct.ST2* %B, i64 %offset.idx, i32 1 +; CHECK: %[[G3:.+]] = getelementptr i32, i32* %[[G2]], i64 -7 +; CHECK: %[[B1:.+]] = bitcast i32* %[[G3]] to <8 x i32>*  ; CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0>  ; CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0>  ; CHECK: %interleaved.vec = shufflevector <4 x i32> {{.*}}, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> -; CHECK: store <8 x i32> %interleaved.vec, <8 x i32>* %{{.*}}, align 4 +; CHECK: store <8 x i32> %interleaved.vec, <8 x i32>* %[[B1]], align 4  %struct.ST2 = type { i32, i32 }  | 

