[LV] Generate both scalar and vector integer induction variables

This patch enables the vectorizer to generate both scalar and vector versions of an integer induction variable for a given loop. Previously, we only generated a scalar induction variable if we knew all its users were going to be scalar. Otherwise, we generated a vector induction variable. In the case of a loop with both scalar and vector users of the induction variable, we would generate the vector induction variable and extract scalar values from it for the scalar users. With this patch, we now generate both versions of the induction variable when there are both scalar and vector users and select which version to use based on whether the user is scalar or vector. Differential Revision: https://reviews.llvm.org/D22869 llvm-svn: 277474
author: Matthew Simpson <mssimpso@codeaurora.org> 2016-08-02 15:25:16 +0000
committer: Matthew Simpson <mssimpso@codeaurora.org> 2016-08-02 15:25:16 +0000
commit: 18d88983179a300adcae41c835fbcf7990c91b2d (patch)
tree: 481776138dad13c03e9e14f6a4463028fdfaf486 /llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll
parent: 7a7004a9993d3715e741aaafecf90ad9d5af7cf9 (diff)
download: bcm5719-llvm-18d88983179a300adcae41c835fbcf7990c91b2d.tar.gz
bcm5719-llvm-18d88983179a300adcae41c835fbcf7990c91b2d.zip
1 files changed, 34 insertions, 32 deletions
diff --git a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll
index 464bbb9d4f9..4b9e996d408 100755
--- a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll
@@ -19,54 +19,56 @@ define void @_Z3fn1v() #0 {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX:%.*]].next, %vector.body ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <16 x i64> [ 
 ; CHECK-NEXT:    [[VEC_IND3:%.*]] = phi <16 x i64> [ 
+; CHECK-NEXT:    [[SHL:%.*]] = shl i64 %index, 1
+; CHECK-NEXT:    %offset.idx = add i64 [[SHL]], 8
+; CHECK-NEXT:    [[IND00:%.*]] = add i64 %offset.idx, 0
+; CHECK-NEXT:    [[IND02:%.*]] = add i64 %offset.idx, 2
+; CHECK-NEXT:    [[IND04:%.*]] = add i64 %offset.idx, 4
+; CHECK-NEXT:    [[IND06:%.*]] = add i64 %offset.idx, 6
+; CHECK-NEXT:    [[IND08:%.*]] = add i64 %offset.idx, 8
+; CHECK-NEXT:    [[IND10:%.*]] = add i64 %offset.idx, 10
+; CHECK-NEXT:    [[IND12:%.*]] = add i64 %offset.idx, 12
+; CHECK-NEXT:    [[IND14:%.*]] = add i64 %offset.idx, 14
+; CHECK-NEXT:    [[IND16:%.*]] = add i64 %offset.idx, 16
+; CHECK-NEXT:    [[IND18:%.*]] = add i64 %offset.idx, 18
+; CHECK-NEXT:    [[IND20:%.*]] = add i64 %offset.idx, 20
+; CHECK-NEXT:    [[IND22:%.*]] = add i64 %offset.idx, 22
+; CHECK-NEXT:    [[IND24:%.*]] = add i64 %offset.idx, 24
+; CHECK-NEXT:    [[IND26:%.*]] = add i64 %offset.idx, 26
+; CHECK-NEXT:    [[IND28:%.*]] = add i64 %offset.idx, 28
+; CHECK-NEXT:    [[IND30:%.*]] = add i64 %offset.idx, 30
 ; CHECK-NEXT:    [[TMP10:%.*]] = sub nsw <16 x i64> <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8>, [[VEC_IND]]
-; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <16 x i64> [[VEC_IND]], i32 0
-; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[TMP11]]
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND00]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <16 x [10 x i32]*> undef, [10 x i32]* [[TMP12]], i32 0
-; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <16 x i64> [[VEC_IND]], i32 1
-; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[TMP14]]
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND02]]
 ; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <16 x [10 x i32]*> [[TMP13]], [10 x i32]* [[TMP15]], i32 1
-; CHECK-NEXT:    [[TMP17:%.*]] = extractelement <16 x i64> [[VEC_IND]], i32 2
-; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[TMP17]]
+; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND04]]
 ; CHECK-NEXT:    [[TMP19:%.*]] = insertelement <16 x [10 x i32]*> [[TMP16]], [10 x i32]* [[TMP18]], i32 2
-; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <16 x i64> [[VEC_IND]], i32 3
-; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[TMP20]]
+; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND06]]
 ; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <16 x [10 x i32]*> [[TMP19]], [10 x i32]* [[TMP21]], i32 3
-; CHECK-NEXT:    [[TMP23:%.*]] = extractelement <16 x i64> [[VEC_IND]], i32 4
-; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[TMP23]]
+; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND08]]
 ; CHECK-NEXT:    [[TMP25:%.*]] = insertelement <16 x [10 x i32]*> [[TMP22]], [10 x i32]* [[TMP24]], i32 4
-; CHECK-NEXT:    [[TMP26:%.*]] = extractelement <16 x i64> [[VEC_IND]], i32 5
-; CHECK-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[TMP26]]
+; CHECK-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND10]]
 ; CHECK-NEXT:    [[TMP28:%.*]] = insertelement <16 x [10 x i32]*> [[TMP25]], [10 x i32]* [[TMP27]], i32 5
-; CHECK-NEXT:    [[TMP29:%.*]] = extractelement <16 x i64> [[VEC_IND]], i32 6
-; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[TMP29]]
+; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND12]]
 ; CHECK-NEXT:    [[TMP31:%.*]] = insertelement <16 x [10 x i32]*> [[TMP28]], [10 x i32]* [[TMP30]], i32 6
-; CHECK-NEXT:    [[TMP32:%.*]] = extractelement <16 x i64> [[VEC_IND]], i32 7
-; CHECK-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[TMP32]]
+; CHECK-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND14]]
 ; CHECK-NEXT:    [[TMP34:%.*]] = insertelement <16 x [10 x i32]*> [[TMP31]], [10 x i32]* [[TMP33]], i32 7
-; CHECK-NEXT:    [[TMP35:%.*]] = extractelement <16 x i64> [[VEC_IND]], i32 8
-; CHECK-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[TMP35]]
+; CHECK-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND16]]
 ; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <16 x [10 x i32]*> [[TMP34]], [10 x i32]* [[TMP36]], i32 8
-; CHECK-NEXT:    [[TMP38:%.*]] = extractelement <16 x i64> [[VEC_IND]], i32 9
-; CHECK-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[TMP38]]
+; CHECK-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND18]]
 ; CHECK-NEXT:    [[TMP40:%.*]] = insertelement <16 x [10 x i32]*> [[TMP37]], [10 x i32]* [[TMP39]], i32 9
-; CHECK-NEXT:    [[TMP41:%.*]] = extractelement <16 x i64> [[VEC_IND]], i32 10
-; CHECK-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[TMP41]]
+; CHECK-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND20]]
 ; CHECK-NEXT:    [[TMP43:%.*]] = insertelement <16 x [10 x i32]*> [[TMP40]], [10 x i32]* [[TMP42]], i32 10
-; CHECK-NEXT:    [[TMP44:%.*]] = extractelement <16 x i64> [[VEC_IND]], i32 11
-; CHECK-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[TMP44]]
+; CHECK-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND22]]
 ; CHECK-NEXT:    [[TMP46:%.*]] = insertelement <16 x [10 x i32]*> [[TMP43]], [10 x i32]* [[TMP45]], i32 11
-; CHECK-NEXT:    [[TMP47:%.*]] = extractelement <16 x i64> [[VEC_IND]], i32 12
-; CHECK-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[TMP47]]
+; CHECK-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND24]]
 ; CHECK-NEXT:    [[TMP49:%.*]] = insertelement <16 x [10 x i32]*> [[TMP46]], [10 x i32]* [[TMP48]], i32 12
-; CHECK-NEXT:    [[TMP50:%.*]] = extractelement <16 x i64> [[VEC_IND]], i32 13
-; CHECK-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[TMP50]]
+; CHECK-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND26]]
 ; CHECK-NEXT:    [[TMP52:%.*]] = insertelement <16 x [10 x i32]*> [[TMP49]], [10 x i32]* [[TMP51]], i32 13
-; CHECK-NEXT:    [[TMP53:%.*]] = extractelement <16 x i64> [[VEC_IND]], i32 14
-; CHECK-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[TMP53]]
+; CHECK-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND28]]
 ; CHECK-NEXT:    [[TMP55:%.*]] = insertelement <16 x [10 x i32]*> [[TMP52]], [10 x i32]* [[TMP54]], i32 14
-; CHECK-NEXT:    [[TMP56:%.*]] = extractelement <16 x i64> [[VEC_IND]], i32 15
-; CHECK-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[TMP56]]
+; CHECK-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND30]]
 ; CHECK-NEXT:    [[TMP58:%.*]] = insertelement <16 x [10 x i32]*> [[TMP55]], [10 x i32]* [[TMP57]], i32 15
 ; CHECK-NEXT:    [[TMP59:%.*]] = add nsw <16 x i64> [[TMP10]], [[VEC_IND3]]
 ; CHECK-NEXT:    [[TMP60:%.*]] = extractelement <16 x [10 x i32]*> [[TMP58]], i32 0
author	Matthew Simpson <mssimpso@codeaurora.org>	2016-08-02 15:25:16 +0000
committer	Matthew Simpson <mssimpso@codeaurora.org>	2016-08-02 15:25:16 +0000
commit	18d88983179a300adcae41c835fbcf7990c91b2d (patch)
tree	481776138dad13c03e9e14f6a4463028fdfaf486 /llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll
parent	7a7004a9993d3715e741aaafecf90ad9d5af7cf9 (diff)
download	bcm5719-llvm-18d88983179a300adcae41c835fbcf7990c91b2d.tar.gz bcm5719-llvm-18d88983179a300adcae41c835fbcf7990c91b2d.zip