summaryrefslogtreecommitdiffstats
path: root/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll
diff options
context:
space:
mode:
authorAlexey Bataev <a.bataev@hotmail.com>2018-04-03 17:14:47 +0000
committerAlexey Bataev <a.bataev@hotmail.com>2018-04-03 17:14:47 +0000
commit428e9d9d878441c010daf6b62399d1df69bc9433 (patch)
tree94167e908a09a4c2f901b0fe07f2c556a7857f00 /llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll
parentbe1e2621905b3d61032065caeb2d6ae7e1e3fb54 (diff)
downloadbcm5719-llvm-428e9d9d878441c010daf6b62399d1df69bc9433.tar.gz
bcm5719-llvm-428e9d9d878441c010daf6b62399d1df69bc9433.zip
[SLP] Fix PR36481: vectorize reassociated instructions.
Summary: If the load/extractelement/extractvalue instructions are not originally consecutive, the SLP vectorizer is unable to vectorize them. Patch allows reordering of such instructions. Patch does not support reordering of the repeated instruction, this must be handled in the separate patch. Reviewers: RKSimon, spatel, hfinkel, mkuper, Ayal, ashahid Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D43776 llvm-svn: 329085
Diffstat (limited to 'llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll')
-rw-r--r--llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll25
1 files changed, 10 insertions, 15 deletions
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll
index ee2875035c9..8928a0ca7e4 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll
@@ -11,21 +11,16 @@
define i32 @fn1() {
; CHECK-LABEL: @fn1(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i64 0, i32 0), align 4
-; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* bitcast (i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i64 0, i32 1) to <2 x i32>*), align 4
-; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i64 0, i32 3), align 4
-; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> undef, i32 [[TMP3]], i32 0
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
-; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[TMP5]], i32 1
-; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP2]], i32 2
-; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[TMP0]], i32 3
-; CHECK-NEXT: [[TMP9:%.*]] = icmp sgt <4 x i32> [[TMP8]], zeroinitializer
-; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> [[TMP4]], i32 ptrtoint (i32 ()* @fn1 to i32), i32 1
-; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 ptrtoint (i32 ()* @fn1 to i32), i32 2
-; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 8, i32 3
-; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP9]], <4 x i32> [[TMP12]], <4 x i32> <i32 6, i32 0, i32 0, i32 0>
-; CHECK-NEXT: store <4 x i32> [[TMP13]], <4 x i32>* bitcast ([4 x i32]* @a to <4 x i32>*), align 4
+; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([4 x i32]* @b to <4 x i32>*), align 4
+; CHECK-NEXT: [[REORDER_SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
+; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[REORDER_SHUFFLE]], zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[REORDER_SHUFFLE]], i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> undef, i32 [[TMP2]], i32 0
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 ptrtoint (i32 ()* @fn1 to i32), i32 1
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 ptrtoint (i32 ()* @fn1 to i32), i32 2
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 8, i32 3
+; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP6]], <4 x i32> <i32 6, i32 0, i32 0, i32 0>
+; CHECK-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* bitcast ([4 x i32]* @a to <4 x i32>*), align 4
; CHECK-NEXT: ret i32 0
;
entry:
OpenPOWER on IntegriCloud