summaryrefslogtreecommitdiffstats
path: root/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll
diff options
context:
space:
mode:
authorMatthew Simpson <mssimpso@codeaurora.org>2016-08-29 20:14:04 +0000
committerMatthew Simpson <mssimpso@codeaurora.org>2016-08-29 20:14:04 +0000
commitdf19502b16b59fd6ccfda2ee4a3afa99af31682e (patch)
tree084d24f4a597b9a8eb14c50daa75a218ab0e0bea /llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll
parent54695a339f5d4f336865a564ab539ff1b5743223 (diff)
downloadbcm5719-llvm-df19502b16b59fd6ccfda2ee4a3afa99af31682e.tar.gz
bcm5719-llvm-df19502b16b59fd6ccfda2ee4a3afa99af31682e.zip
[LV] Move insertelement sequence after scalar definitions
After r279649 when getting a vector value from VectorLoopValueMap, we create an insertelement sequence on-demand if the value has been scalarized instead of vectorized. We previously inserted this insertelement sequence before the value's first vector user. However, this insert location is problematic if that user is the phi node of a first-order recurrence. With this patch, we move the insertelement sequence after the last scalar instruction we created when scalarizing the value. Thus, the value's vector definition in the new loop will immediately follow its scalar definitions. This should fix PR30183. Reference: https://llvm.org/bugs/show_bug.cgi?id=30183 llvm-svn: 280001
Diffstat (limited to 'llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll')
-rwxr-xr-xllvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll32
1 files changed, 16 insertions, 16 deletions
diff --git a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll
index 5f165c52efc..ec67e632efb 100755
--- a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll
@@ -54,6 +54,22 @@ define void @_Z3fn1v() #0 {
; CHECK-NEXT: [[TMP51:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND26]]
; CHECK-NEXT: [[TMP54:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND28]]
; CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND30]]
+; CHECK-NEXT: [[TMP13:%.*]] = insertelement <16 x [10 x i32]*> undef, [10 x i32]* [[TMP12]], i32 0
+; CHECK-NEXT: [[TMP16:%.*]] = insertelement <16 x [10 x i32]*> [[TMP13]], [10 x i32]* [[TMP15]], i32 1
+; CHECK-NEXT: [[TMP19:%.*]] = insertelement <16 x [10 x i32]*> [[TMP16]], [10 x i32]* [[TMP18]], i32 2
+; CHECK-NEXT: [[TMP22:%.*]] = insertelement <16 x [10 x i32]*> [[TMP19]], [10 x i32]* [[TMP21]], i32 3
+; CHECK-NEXT: [[TMP25:%.*]] = insertelement <16 x [10 x i32]*> [[TMP22]], [10 x i32]* [[TMP24]], i32 4
+; CHECK-NEXT: [[TMP28:%.*]] = insertelement <16 x [10 x i32]*> [[TMP25]], [10 x i32]* [[TMP27]], i32 5
+; CHECK-NEXT: [[TMP31:%.*]] = insertelement <16 x [10 x i32]*> [[TMP28]], [10 x i32]* [[TMP30]], i32 6
+; CHECK-NEXT: [[TMP34:%.*]] = insertelement <16 x [10 x i32]*> [[TMP31]], [10 x i32]* [[TMP33]], i32 7
+; CHECK-NEXT: [[TMP37:%.*]] = insertelement <16 x [10 x i32]*> [[TMP34]], [10 x i32]* [[TMP36]], i32 8
+; CHECK-NEXT: [[TMP40:%.*]] = insertelement <16 x [10 x i32]*> [[TMP37]], [10 x i32]* [[TMP39]], i32 9
+; CHECK-NEXT: [[TMP43:%.*]] = insertelement <16 x [10 x i32]*> [[TMP40]], [10 x i32]* [[TMP42]], i32 10
+; CHECK-NEXT: [[TMP46:%.*]] = insertelement <16 x [10 x i32]*> [[TMP43]], [10 x i32]* [[TMP45]], i32 11
+; CHECK-NEXT: [[TMP49:%.*]] = insertelement <16 x [10 x i32]*> [[TMP46]], [10 x i32]* [[TMP48]], i32 12
+; CHECK-NEXT: [[TMP52:%.*]] = insertelement <16 x [10 x i32]*> [[TMP49]], [10 x i32]* [[TMP51]], i32 13
+; CHECK-NEXT: [[TMP55:%.*]] = insertelement <16 x [10 x i32]*> [[TMP52]], [10 x i32]* [[TMP54]], i32 14
+; CHECK-NEXT: [[TMP58:%.*]] = insertelement <16 x [10 x i32]*> [[TMP55]], [10 x i32]* [[TMP57]], i32 15
; CHECK-NEXT: [[TMP59:%.*]] = add nsw <16 x i64> [[TMP10]], [[VEC_IND3]]
; CHECK-NEXT: [[TMP61:%.*]] = extractelement <16 x i64> [[TMP59]], i32 0
; CHECK-NEXT: [[TMP62:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP12]], i64 [[TMP61]], i64 0
@@ -87,22 +103,6 @@ define void @_Z3fn1v() #0 {
; CHECK-NEXT: [[TMP118:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP54]], i64 [[TMP117]], i64 0
; CHECK-NEXT: [[TMP121:%.*]] = extractelement <16 x i64> [[TMP59]], i32 15
; CHECK-NEXT: [[TMP122:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP57]], i64 [[TMP121]], i64 0
-; CHECK-NEXT: [[TMP13:%.*]] = insertelement <16 x [10 x i32]*> undef, [10 x i32]* [[TMP12]], i32 0
-; CHECK-NEXT: [[TMP16:%.*]] = insertelement <16 x [10 x i32]*> [[TMP13]], [10 x i32]* [[TMP15]], i32 1
-; CHECK-NEXT: [[TMP19:%.*]] = insertelement <16 x [10 x i32]*> [[TMP16]], [10 x i32]* [[TMP18]], i32 2
-; CHECK-NEXT: [[TMP22:%.*]] = insertelement <16 x [10 x i32]*> [[TMP19]], [10 x i32]* [[TMP21]], i32 3
-; CHECK-NEXT: [[TMP25:%.*]] = insertelement <16 x [10 x i32]*> [[TMP22]], [10 x i32]* [[TMP24]], i32 4
-; CHECK-NEXT: [[TMP28:%.*]] = insertelement <16 x [10 x i32]*> [[TMP25]], [10 x i32]* [[TMP27]], i32 5
-; CHECK-NEXT: [[TMP31:%.*]] = insertelement <16 x [10 x i32]*> [[TMP28]], [10 x i32]* [[TMP30]], i32 6
-; CHECK-NEXT: [[TMP34:%.*]] = insertelement <16 x [10 x i32]*> [[TMP31]], [10 x i32]* [[TMP33]], i32 7
-; CHECK-NEXT: [[TMP37:%.*]] = insertelement <16 x [10 x i32]*> [[TMP34]], [10 x i32]* [[TMP36]], i32 8
-; CHECK-NEXT: [[TMP40:%.*]] = insertelement <16 x [10 x i32]*> [[TMP37]], [10 x i32]* [[TMP39]], i32 9
-; CHECK-NEXT: [[TMP43:%.*]] = insertelement <16 x [10 x i32]*> [[TMP40]], [10 x i32]* [[TMP42]], i32 10
-; CHECK-NEXT: [[TMP46:%.*]] = insertelement <16 x [10 x i32]*> [[TMP43]], [10 x i32]* [[TMP45]], i32 11
-; CHECK-NEXT: [[TMP49:%.*]] = insertelement <16 x [10 x i32]*> [[TMP46]], [10 x i32]* [[TMP48]], i32 12
-; CHECK-NEXT: [[TMP52:%.*]] = insertelement <16 x [10 x i32]*> [[TMP49]], [10 x i32]* [[TMP51]], i32 13
-; CHECK-NEXT: [[TMP55:%.*]] = insertelement <16 x [10 x i32]*> [[TMP52]], [10 x i32]* [[TMP54]], i32 14
-; CHECK-NEXT: [[TMP58:%.*]] = insertelement <16 x [10 x i32]*> [[TMP55]], [10 x i32]* [[TMP57]], i32 15
; CHECK-NEXT: [[VECTORGEP:%.*]] = getelementptr inbounds [10 x i32], <16 x [10 x i32]*> [[TMP58]], <16 x i64> [[TMP59]], i64 0
; CHECK-NEXT: call void @llvm.masked.scatter.v16i32(<16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>, <16 x i32*> [[VECTORGEP]], i32 16, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
; CHECK: [[STEP_ADD:%.*]] = add <16 x i64> [[VEC_IND]], <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
OpenPOWER on IntegriCloud