summaryrefslogtreecommitdiffstats
path: root/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
diff options
context:
space:
mode:
authorAlexey Bataev <a.bataev@hotmail.com>2017-08-07 15:25:49 +0000
committerAlexey Bataev <a.bataev@hotmail.com>2017-08-07 15:25:49 +0000
commit9581b42589d72d1c931c4bcbc0101df7c507e27e (patch)
tree9dc9c13fffbf8b481b51046bd59b054dd22effd9 /llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
parentfaeac6b15edb504985eb66ecca09b681ee48aa53 (diff)
downloadbcm5719-llvm-9581b42589d72d1c931c4bcbc0101df7c507e27e.tar.gz
bcm5719-llvm-9581b42589d72d1c931c4bcbc0101df7c507e27e.zip
[SLP] General improvements of SLP vectorization process.
Patch tries to improve two-pass vectorization analysis, existing in SLP vectorizer. What it does: 1. Defines key nodes, that are the vectorization roots. Previously vectorization started if StoreInst or ReturnInst is found. For now, the vectorization started for all Instructions with no users and void types (Terminators, StoreInst) + CallInsts. 2. CmpInsts, InsertElementInsts and InsertValueInsts are stored in the array. This array is processed only after the vectorization of the first-after-these instructions key node is finished. Vectorization goes in reverse order to try to vectorize as much code as possible. Reviewers: mzolotukhin, Ayal, mkuper, gilr, hfinkel, RKSimon Subscribers: ashahid, anemet, RKSimon, mssimpso, llvm-commits Differential Revision: https://reviews.llvm.org/D29826 llvm-svn: 310260
Diffstat (limited to 'llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll')
-rw-r--r--llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll40
1 files changed, 23 insertions, 17 deletions
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
index 9e4f503155e..46386e8b63e 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
@@ -303,24 +303,30 @@ define <4 x float> @simple_select_no_users(<4 x float> %a, <4 x float> %b, <4 x
; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x float> %b, i32 1
; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x float> %b, i32 2
; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x float> %b, i32 3
-; CHECK-NEXT: [[CMP0:%.*]] = icmp ne i32 [[C0]], 0
-; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[C1]], 0
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> undef, i32 [[C2]], i32 0
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[C3]], i32 1
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> undef, i32 [[C0]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[C1]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
-; CHECK-NEXT: [[S0:%.*]] = select i1 [[CMP0]], float [[A0]], float [[B0]]
-; CHECK-NEXT: [[S1:%.*]] = select i1 [[CMP1]], float [[A1]], float [[B1]]
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x float> undef, float [[A2]], i32 0
-; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> [[TMP4]], float [[A3]], i32 1
-; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> undef, float [[B2]], i32 0
-; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x float> [[TMP6]], float [[B3]], i32 1
-; CHECK-NEXT: [[TMP8:%.*]] = select <2 x i1> [[TMP3]], <2 x float> [[TMP5]], <2 x float> [[TMP7]]
-; CHECK-NEXT: [[RA:%.*]] = insertelement <4 x float> undef, float [[S0]], i32 0
-; CHECK-NEXT: [[RB:%.*]] = insertelement <4 x float> [[RA]], float [[S1]], i32 1
-; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP8]], i32 0
-; CHECK-NEXT: [[RC:%.*]] = insertelement <4 x float> undef, float [[TMP9]], i32 2
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[TMP8]], i32 1
-; CHECK-NEXT: [[RD:%.*]] = insertelement <4 x float> [[RC]], float [[TMP10]], i32 3
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> undef, i32 [[C2]], i32 0
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> [[TMP4]], i32 [[C3]], i32 1
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <2 x i32> [[TMP5]], zeroinitializer
+; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x float> undef, float [[A0]], i32 0
+; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x float> [[TMP7]], float [[A1]], i32 1
+; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x float> undef, float [[B0]], i32 0
+; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x float> [[TMP9]], float [[B1]], i32 1
+; CHECK-NEXT: [[TMP11:%.*]] = select <2 x i1> [[TMP3]], <2 x float> [[TMP8]], <2 x float> [[TMP10]]
+; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x float> undef, float [[A2]], i32 0
+; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x float> [[TMP12]], float [[A3]], i32 1
+; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x float> undef, float [[B2]], i32 0
+; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x float> [[TMP14]], float [[B3]], i32 1
+; CHECK-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP6]], <2 x float> [[TMP13]], <2 x float> [[TMP15]]
+; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x float> [[TMP11]], i32 0
+; CHECK-NEXT: [[RA:%.*]] = insertelement <4 x float> undef, float [[TMP17]], i32 0
+; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x float> [[TMP11]], i32 1
+; CHECK-NEXT: [[RB:%.*]] = insertelement <4 x float> [[RA]], float [[TMP18]], i32 1
+; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x float> [[TMP16]], i32 0
+; CHECK-NEXT: [[RC:%.*]] = insertelement <4 x float> undef, float [[TMP19]], i32 2
+; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x float> [[TMP16]], i32 1
+; CHECK-NEXT: [[RD:%.*]] = insertelement <4 x float> [[RC]], float [[TMP20]], i32 3
; CHECK-NEXT: ret <4 x float> [[RD]]
;
; ZEROTHRESH-LABEL: @simple_select_no_users(
OpenPOWER on IntegriCloud