summaryrefslogtreecommitdiffstats
path: root/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll
diff options
context:
space:
mode:
authorMatthew Simpson <mssimpso@codeaurora.org>2016-08-02 15:25:16 +0000
committerMatthew Simpson <mssimpso@codeaurora.org>2016-08-02 15:25:16 +0000
commit18d88983179a300adcae41c835fbcf7990c91b2d (patch)
tree481776138dad13c03e9e14f6a4463028fdfaf486 /llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll
parent7a7004a9993d3715e741aaafecf90ad9d5af7cf9 (diff)
downloadbcm5719-llvm-18d88983179a300adcae41c835fbcf7990c91b2d.tar.gz
bcm5719-llvm-18d88983179a300adcae41c835fbcf7990c91b2d.zip
[LV] Generate both scalar and vector integer induction variables
This patch enables the vectorizer to generate both scalar and vector versions of an integer induction variable for a given loop. Previously, we only generated a scalar induction variable if we knew all its users were going to be scalar. Otherwise, we generated a vector induction variable. In the case of a loop with both scalar and vector users of the induction variable, we would generate the vector induction variable and extract scalar values from it for the scalar users. With this patch, we now generate both versions of the induction variable when there are both scalar and vector users and select which version to use based on whether the user is scalar or vector. Differential Revision: https://reviews.llvm.org/D22869 llvm-svn: 277474
Diffstat (limited to 'llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll')
-rwxr-xr-xllvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll66
1 files changed, 34 insertions, 32 deletions
diff --git a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll
index 464bbb9d4f9..4b9e996d408 100755
--- a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll
@@ -19,54 +19,56 @@ define void @_Z3fn1v() #0 {
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX:%.*]].next, %vector.body ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <16 x i64> [
; CHECK-NEXT: [[VEC_IND3:%.*]] = phi <16 x i64> [
+; CHECK-NEXT: [[SHL:%.*]] = shl i64 %index, 1
+; CHECK-NEXT: %offset.idx = add i64 [[SHL]], 8
+; CHECK-NEXT: [[IND00:%.*]] = add i64 %offset.idx, 0
+; CHECK-NEXT: [[IND02:%.*]] = add i64 %offset.idx, 2
+; CHECK-NEXT: [[IND04:%.*]] = add i64 %offset.idx, 4
+; CHECK-NEXT: [[IND06:%.*]] = add i64 %offset.idx, 6
+; CHECK-NEXT: [[IND08:%.*]] = add i64 %offset.idx, 8
+; CHECK-NEXT: [[IND10:%.*]] = add i64 %offset.idx, 10
+; CHECK-NEXT: [[IND12:%.*]] = add i64 %offset.idx, 12
+; CHECK-NEXT: [[IND14:%.*]] = add i64 %offset.idx, 14
+; CHECK-NEXT: [[IND16:%.*]] = add i64 %offset.idx, 16
+; CHECK-NEXT: [[IND18:%.*]] = add i64 %offset.idx, 18
+; CHECK-NEXT: [[IND20:%.*]] = add i64 %offset.idx, 20
+; CHECK-NEXT: [[IND22:%.*]] = add i64 %offset.idx, 22
+; CHECK-NEXT: [[IND24:%.*]] = add i64 %offset.idx, 24
+; CHECK-NEXT: [[IND26:%.*]] = add i64 %offset.idx, 26
+; CHECK-NEXT: [[IND28:%.*]] = add i64 %offset.idx, 28
+; CHECK-NEXT: [[IND30:%.*]] = add i64 %offset.idx, 30
; CHECK-NEXT: [[TMP10:%.*]] = sub nsw <16 x i64> <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8>, [[VEC_IND]]
-; CHECK-NEXT: [[TMP11:%.*]] = extractelement <16 x i64> [[VEC_IND]], i32 0
-; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[TMP11]]
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND00]]
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <16 x [10 x i32]*> undef, [10 x i32]* [[TMP12]], i32 0
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <16 x i64> [[VEC_IND]], i32 1
-; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[TMP14]]
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND02]]
; CHECK-NEXT: [[TMP16:%.*]] = insertelement <16 x [10 x i32]*> [[TMP13]], [10 x i32]* [[TMP15]], i32 1
-; CHECK-NEXT: [[TMP17:%.*]] = extractelement <16 x i64> [[VEC_IND]], i32 2
-; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[TMP17]]
+; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND04]]
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <16 x [10 x i32]*> [[TMP16]], [10 x i32]* [[TMP18]], i32 2
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <16 x i64> [[VEC_IND]], i32 3
-; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[TMP20]]
+; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND06]]
; CHECK-NEXT: [[TMP22:%.*]] = insertelement <16 x [10 x i32]*> [[TMP19]], [10 x i32]* [[TMP21]], i32 3
-; CHECK-NEXT: [[TMP23:%.*]] = extractelement <16 x i64> [[VEC_IND]], i32 4
-; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[TMP23]]
+; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND08]]
; CHECK-NEXT: [[TMP25:%.*]] = insertelement <16 x [10 x i32]*> [[TMP22]], [10 x i32]* [[TMP24]], i32 4
-; CHECK-NEXT: [[TMP26:%.*]] = extractelement <16 x i64> [[VEC_IND]], i32 5
-; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[TMP26]]
+; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND10]]
; CHECK-NEXT: [[TMP28:%.*]] = insertelement <16 x [10 x i32]*> [[TMP25]], [10 x i32]* [[TMP27]], i32 5
-; CHECK-NEXT: [[TMP29:%.*]] = extractelement <16 x i64> [[VEC_IND]], i32 6
-; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[TMP29]]
+; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND12]]
; CHECK-NEXT: [[TMP31:%.*]] = insertelement <16 x [10 x i32]*> [[TMP28]], [10 x i32]* [[TMP30]], i32 6
-; CHECK-NEXT: [[TMP32:%.*]] = extractelement <16 x i64> [[VEC_IND]], i32 7
-; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[TMP32]]
+; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND14]]
; CHECK-NEXT: [[TMP34:%.*]] = insertelement <16 x [10 x i32]*> [[TMP31]], [10 x i32]* [[TMP33]], i32 7
-; CHECK-NEXT: [[TMP35:%.*]] = extractelement <16 x i64> [[VEC_IND]], i32 8
-; CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[TMP35]]
+; CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND16]]
; CHECK-NEXT: [[TMP37:%.*]] = insertelement <16 x [10 x i32]*> [[TMP34]], [10 x i32]* [[TMP36]], i32 8
-; CHECK-NEXT: [[TMP38:%.*]] = extractelement <16 x i64> [[VEC_IND]], i32 9
-; CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[TMP38]]
+; CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND18]]
; CHECK-NEXT: [[TMP40:%.*]] = insertelement <16 x [10 x i32]*> [[TMP37]], [10 x i32]* [[TMP39]], i32 9
-; CHECK-NEXT: [[TMP41:%.*]] = extractelement <16 x i64> [[VEC_IND]], i32 10
-; CHECK-NEXT: [[TMP42:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[TMP41]]
+; CHECK-NEXT: [[TMP42:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND20]]
; CHECK-NEXT: [[TMP43:%.*]] = insertelement <16 x [10 x i32]*> [[TMP40]], [10 x i32]* [[TMP42]], i32 10
-; CHECK-NEXT: [[TMP44:%.*]] = extractelement <16 x i64> [[VEC_IND]], i32 11
-; CHECK-NEXT: [[TMP45:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[TMP44]]
+; CHECK-NEXT: [[TMP45:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND22]]
; CHECK-NEXT: [[TMP46:%.*]] = insertelement <16 x [10 x i32]*> [[TMP43]], [10 x i32]* [[TMP45]], i32 11
-; CHECK-NEXT: [[TMP47:%.*]] = extractelement <16 x i64> [[VEC_IND]], i32 12
-; CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[TMP47]]
+; CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND24]]
; CHECK-NEXT: [[TMP49:%.*]] = insertelement <16 x [10 x i32]*> [[TMP46]], [10 x i32]* [[TMP48]], i32 12
-; CHECK-NEXT: [[TMP50:%.*]] = extractelement <16 x i64> [[VEC_IND]], i32 13
-; CHECK-NEXT: [[TMP51:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[TMP50]]
+; CHECK-NEXT: [[TMP51:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND26]]
; CHECK-NEXT: [[TMP52:%.*]] = insertelement <16 x [10 x i32]*> [[TMP49]], [10 x i32]* [[TMP51]], i32 13
-; CHECK-NEXT: [[TMP53:%.*]] = extractelement <16 x i64> [[VEC_IND]], i32 14
-; CHECK-NEXT: [[TMP54:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[TMP53]]
+; CHECK-NEXT: [[TMP54:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND28]]
; CHECK-NEXT: [[TMP55:%.*]] = insertelement <16 x [10 x i32]*> [[TMP52]], [10 x i32]* [[TMP54]], i32 14
-; CHECK-NEXT: [[TMP56:%.*]] = extractelement <16 x i64> [[VEC_IND]], i32 15
-; CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[TMP56]]
+; CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND30]]
; CHECK-NEXT: [[TMP58:%.*]] = insertelement <16 x [10 x i32]*> [[TMP55]], [10 x i32]* [[TMP57]], i32 15
; CHECK-NEXT: [[TMP59:%.*]] = add nsw <16 x i64> [[TMP10]], [[VEC_IND3]]
; CHECK-NEXT: [[TMP60:%.*]] = extractelement <16 x [10 x i32]*> [[TMP58]], i32 0
OpenPOWER on IntegriCloud