diff options
author | Alexey Bataev <a.bataev@hotmail.com> | 2018-04-03 17:14:47 +0000 |
---|---|---|
committer | Alexey Bataev <a.bataev@hotmail.com> | 2018-04-03 17:14:47 +0000 |
commit | 428e9d9d878441c010daf6b62399d1df69bc9433 (patch) | |
tree | 94167e908a09a4c2f901b0fe07f2c556a7857f00 /llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll | |
parent | be1e2621905b3d61032065caeb2d6ae7e1e3fb54 (diff) | |
download | bcm5719-llvm-428e9d9d878441c010daf6b62399d1df69bc9433.tar.gz bcm5719-llvm-428e9d9d878441c010daf6b62399d1df69bc9433.zip |
[SLP] Fix PR36481: vectorize reassociated instructions.
Summary:
If the load/extractelement/extractvalue instructions are not originally
consecutive, the SLP vectorizer is unable to vectorize them. Patch
allows reordering of such instructions.
Patch does not support reordering of the repeated instruction, this must
be handled in the separate patch.
Reviewers: RKSimon, spatel, hfinkel, mkuper, Ayal, ashahid
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D43776
llvm-svn: 329085
Diffstat (limited to 'llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll')
-rw-r--r-- | llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll | 25 |
1 files changed, 10 insertions, 15 deletions
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll index ee2875035c9..8928a0ca7e4 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll @@ -11,21 +11,16 @@ define i32 @fn1() { ; CHECK-LABEL: @fn1( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i64 0, i32 0), align 4 -; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* bitcast (i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i64 0, i32 1) to <2 x i32>*), align 4 -; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i64 0, i32 3), align 4 -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> undef, i32 [[TMP3]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[TMP5]], i32 1 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP2]], i32 2 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[TMP0]], i32 3 -; CHECK-NEXT: [[TMP9:%.*]] = icmp sgt <4 x i32> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> [[TMP4]], i32 ptrtoint (i32 ()* @fn1 to i32), i32 1 -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 ptrtoint (i32 ()* @fn1 to i32), i32 2 -; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 8, i32 3 -; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP9]], <4 x i32> [[TMP12]], <4 x i32> <i32 6, i32 0, i32 0, i32 0> -; CHECK-NEXT: store <4 x i32> [[TMP13]], <4 x i32>* bitcast ([4 x i32]* @a to <4 x i32>*), align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([4 x i32]* @b to <4 x i32>*), align 4 +; CHECK-NEXT: [[REORDER_SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0> +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[REORDER_SHUFFLE]], zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[REORDER_SHUFFLE]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> undef, i32 [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 ptrtoint (i32 ()* @fn1 to i32), i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 ptrtoint (i32 ()* @fn1 to i32), i32 2 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 8, i32 3 +; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP6]], <4 x i32> <i32 6, i32 0, i32 0, i32 0> +; CHECK-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* bitcast ([4 x i32]* @a to <4 x i32>*), align 4 ; CHECK-NEXT: ret i32 0 ; entry: |